kitab

Unnamed repository; edit this file 'description' to name the repository.
Info | Log | Files | Refs | LICENSE

commit 2960a34feeacd9d134c62bc9374a8168f5c02d15
parent 7f7f08699771a79719794add97492291ec71a4e6
Author: lash <dev@holbrook.no>
Date:   Sat,  6 Aug 2022 09:25:01 +0000

Add set xattr script, simple metadata instance validation

Diffstat:
Aset.sh | 46++++++++++++++++++++++++++++++++++++++++++++++
Msrc/main.rs | 26++++++++++++++++++++------
Msrc/meta.rs | 35+++++++++++++++++++++++++----------
3 files changed, 91 insertions(+), 16 deletions(-)

diff --git a/set.sh b/set.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +f=$1 + +if [ ! -f "$f" ]; then + >&2 echo $f is not a file + exit 1 +fi + +echo -n "Title: " +read title +if [ ! -z "$title" ]; then + setfattr -n user.dcterms:title -v "$title" "$f" +fi + +echo -n "Author: " +read author +if [ ! -z "$author" ]; then + setfattr -n user.dcterms:creator -v "$author" "$f" +fi + +echo -n "Subject (comma,separated list): " +read subject +if [ ! -z "$subject" ]; then + setfattr -n user.dcterms:subject -v "$subject" "$f" +fi + +echo -n "Language: " +read language +if [ ! -z "$language" ]; then + setfattr -n user.dcterms:language -v "$language" "$f" +fi + +echo -n "Type: " +read typ +if [ ! -z "$typ" ]; then + setfattr -n user.dcterms:type -v "$typ" "$f" +fi + +mime=`file -b --mime-type "$f"` +echo -n "Mime ($mime): " +read mime_in +if [ ! -z "$mime_in" ]; then + mime=$mime_in +fi +setfattr -n user.dcterms:MediaType -v "$mime" "$f" diff --git a/src/main.rs b/src/main.rs @@ -2,6 +2,7 @@ use std::default; use std::fs::{ File, create_dir_all, + metadata, }; use std::io::Write; use std::path::{ @@ -23,6 +24,7 @@ use hex; use log::{ debug, info, + warn, }; use biblatex::EntryType; @@ -146,12 +148,13 @@ fn store(index_path: &Path, m: &MetaData) { } fn exec_import_xattr(f: &Path, index_path: &Path) -> bool { - let m = MetaData::from_xattr(f); - match m.typ() { - EntryType::Unknown(v) => { + let m = match MetaData::from_xattr(f) { + Ok(r) => { + r + } + Err(e) => { return false; - }, - _ => {}, + } }; debug!("successfully processed xattr import source"); @@ -234,13 +237,23 @@ fn exec_import(p: &Path, index_path: &Path) { .filter(|e| !e.file_type().is_dir()) { let fp = entry.path(); - debug!("processing {:?}", fp); + debug!("attempt xattr import {:?}", fp); if exec_import_xattr(fp, index_path) { continue; } + + let st = entry.metadata().unwrap(); + if st.len() > 1048576 { + warn!("skipping metadata content probe for file >1MB"); + continue; + } + + debug!("attempt rdf import {:?}", fp); if exec_import_rdf(fp, index_path) { continue; } + + debug!("attempt biblatex import {:?}", fp); if exec_import_biblatex(fp, index_path) { continue; } @@ -271,6 +284,7 @@ fn main() { _ => {}, } + let mut r = true; match args.subcommand_matches("apply") { Some(v) => { diff --git a/src/meta.rs b/src/meta.rs @@ -36,7 +36,7 @@ use crate::dc::{ DC_XATTR_TYPE, DC_XATTR_MEDIATYPE, }; - +use crate::error::ParseError; use crate::digest; use log::{ @@ -64,9 +64,6 @@ pub struct MetaData { publish_date: PublishDate, } -//pub fn check_xattr() { - -//} /// Generates the native `sha512` digest of a file. /// @@ -230,14 +227,16 @@ impl MetaData { } /// Instantiate metadata from the extended attributes of the file in `filepath`. - pub fn from_xattr(filepath: &path::Path) -> MetaData { + pub fn from_xattr(filepath: &path::Path) -> Result<MetaData, ParseError> { let mut title: String = String::new(); let mut author: String = String::new(); let mut typ: EntryType = EntryType::Unknown(String::new()); let filename: FileName; + debug!("Calculate digest for file {:?}", &filepath); let digest = digest_from_path(filepath); + debug!("Calculated digest {} for file {:?}", hex::encode(&digest), &filepath); filename = filepath.file_name() .unwrap() @@ -274,6 +273,9 @@ impl MetaData { } let mut metadata = MetaData::new(title.as_str(), author.as_str(), typ, digest, Some(filename)); + if !metadata.validate() { + return Err(ParseError{}); + } match xattr::get(filepath, "user.dcterms:subject") { Ok(v) => { @@ -317,7 +319,7 @@ impl MetaData { #[cfg(feature = "magic")] metadata.set_mime_magic(filepath); - metadata + Ok(metadata) } @@ -446,6 +448,19 @@ impl MetaData { } Ok(m) } + + + /// Check whether a Metadata instance represents a valid entry. + pub fn validate(&self) -> bool { + let empty = String::new(); + if self.title() == empty { + return false; + } + if self.author() == empty { + return false; + } + true + } } impl fmt::Debug for MetaData { @@ -469,7 +484,7 @@ mod tests { #[test] fn test_metadata_create() { let s = path::Path::new("testdata/bitcoin.pdf"); - let meta = MetaData::from_xattr(s); + let meta = MetaData::from_xattr(s).unwrap(); assert_eq!(meta.dc.title, "Bitcoin: A Peer-to-Peer Electronic Cash System"); assert_eq!(meta.dc.author, "Satoshi Nakamoto"); assert_eq!(meta.fingerprint(), String::from("2ac531ee521cf93f8419c2018f770fbb42c65396178e079a416e7038d3f9ab9fc2c35c4d838bc8b5dd68f4c13759fe9cdf90a46528412fefe1294cb26beabf4e")); @@ -490,7 +505,7 @@ mod tests { m.set_language("nb-NO"); m.to_xattr(fp); - let m_check = MetaData::from_xattr(fp); + let m_check = MetaData::from_xattr(fp).unwrap(); assert_eq!(m_check.title(), "foo"); assert_eq!(m_check.author(), "bar"); assert_eq!(m_check.fingerprint(), digest_hex); @@ -515,7 +530,7 @@ mod tests { #[test] fn test_metadata_xattr_magic() { let s = path::Path::new("testdata/bitcoin.pdf"); - let meta = MetaData::from_xattr(s); + let meta = MetaData::from_xattr(s).unwrap(); #[cfg(feature = "magic")] { @@ -523,7 +538,7 @@ mod tests { let f = NamedTempFile::new_in(".").unwrap(); let fp = f.path(); write(&f, &[0, 1, 2, 3]); - let meta_empty = MetaData::from_xattr(fp); + let meta_empty = MetaData::from_xattr(fp).unwrap(); assert_eq!(meta_empty.mime().unwrap(), "application/octet-stream"); } }