kitab

Unnamed repository; edit this file 'description' to name the repository.
Info | Log | Files | Refs | LICENSE

commit 5da4623992d55e2e5356da183985b39754a2986c
parent 343e902b10679c37b92e59c20fa32a87f78df08d
Author: lash <dev@holbrook.no>
Date:   Sun, 26 Jun 2022 06:48:58 +0000

Add key value importer from file

Diffstat:
MCargo.toml | 4++++
Msrc/dc/mod.rs | 13+++++++++++++
Msrc/meta.rs | 194++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Msrc/rdf.rs | 58++++++++++++++++++++++++++++++++++++----------------------
4 files changed, 241 insertions(+), 28 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml @@ -20,6 +20,10 @@ sha2 = "^0.10.2" log = "^0.4" env_logger = "^0.9" urn = "^0.4.0" +#tempfile = "^3.3.0" + +[dev-dependencies] +tempfile = "^3.3.0" #[dependencies.rdf] #rio_turtle = "~0.7.1" diff --git a/src/dc/mod.rs b/src/dc/mod.rs @@ -13,6 +13,19 @@ pub struct DCMetaData { pub language: Option<LanguageIdentifier>, } +pub const DC_IRI_TITLE: &str = "https://purl.org/dc/terms/title"; +pub const DC_IRI_CREATOR: &str = "https://purl.org/dc/terms/creator"; +pub const DC_IRI_SUBJECT: &str = "https://purl.org/dc/terms/subject"; +pub const DC_IRI_LANGUAGE: &str = "https://purl.org/dc/terms/language"; +pub const DC_IRI_TYPE: &str = "https://purl.org/dc/terms/type"; +pub const DC_IRI_MEDIATYPE: &str = "https://purl.org/dc/terms/MediaType"; +pub const DC_XATTR_TITLE: &str = "user.dcterms:title"; +pub const DC_XATTR_CREATOR: &str = "user.dcterms:creator"; +pub const DC_XATTR_SUBJECT: &str = "user.dcterms:subject"; +pub const DC_XATTR_LANGUAGE: &str = "user.dcterms:language"; +pub const DC_XATTR_TYPE: &str = "user.dcterms:type"; +pub const DC_XATTR_MEDIATYPE: &str = "user.dcterms:MediaType"; + impl DCMetaData { pub fn new(title: &str, author: &str, typ: EntryType) -> DCMetaData { DCMetaData{ diff --git a/src/meta.rs b/src/meta.rs @@ -13,15 +13,31 @@ use std::fs::{ File, metadata, }; -use std::io::Read; +use std::path::Path; +use std::io::{ + Read, + BufRead, + BufReader, +}; use unic_langid_impl::LanguageIdentifier; use biblatex::EntryType; use std::str::FromStr; use std::os::linux::fs::MetadataExt; -use crate::dc::DCMetaData; +use crate::dc::{ + DCMetaData, + DC_XATTR_TITLE, + DC_XATTR_CREATOR, + DC_XATTR_SUBJECT, + DC_XATTR_LANGUAGE, + DC_XATTR_TYPE, + DC_XATTR_MEDIATYPE, +}; -//pub type Digest = Vec<u8>; +use log::{ + debug, + info, +}; pub type PublishDate = (u8, u8, u32); @@ -38,6 +54,10 @@ pub struct MetaData { retrieval_timestamp: u32, } +pub fn check_xattr() { + +} + impl MetaData { pub fn new(title: &str, author: &str, typ: EntryType, digest: Vec<u8>, filename: Option<FileName>) -> MetaData { let dc = DCMetaData::new(title, author, typ); @@ -46,7 +66,6 @@ impl MetaData { dc: dc, digest: vec!(), comment: String::new(), - //local_name: filepath.to_str().unwrap().to_string(), local_name: filename, publish_date: (0, 0, 0), retrieval_timestamp: 0, @@ -74,7 +93,7 @@ impl MetaData { } pub fn set_author(&mut self, author: &str) { - self.dc.title = String::from(author); + self.dc.author = String::from(author); } pub fn set_fingerprint(&mut self, fingerprint: Vec<u8>) { @@ -228,18 +247,142 @@ impl MetaData { _ => {}, } + match xattr::get(filepath, "user.dcterms:language") { + Ok(v) => { + match v { + Some(v) => { + let s = std::str::from_utf8(&v).unwrap(); + metadata.set_language(s); + }, + None => {}, + } + }, + _ => {}, + } + metadata } - pub fn to_xattr(&self, filepath: &Path) -> Result<(), std::io::Error> { + + pub fn to_xattr(&self, filepath: &path::Path) -> Result<(), std::io::Error> { + let filename = filepath.file_name() + .unwrap() + .to_os_string() + .into_string() + .unwrap(); + + xattr::set(filepath, DC_XATTR_TITLE, self.dc.title.as_bytes()); + xattr::set(filepath, DC_XATTR_CREATOR, self.dc.author.as_bytes()); + xattr::set(filepath, DC_XATTR_TYPE, self.dc.typ.to_string().as_bytes()); + + match &self.dc.language { + Some(v) => { + xattr::set(filepath, DC_XATTR_LANGUAGE, v.to_string().as_bytes()); + }, + _ => {}, + }; + + match &self.dc.mime { + Some(v) => { + xattr::set(filepath, DC_XATTR_MEDIATYPE, v.to_string().as_bytes()); + }, + _ => {}, + }; + + match &self.dc.subject { + Some(v) => { + xattr::set(filepath, DC_XATTR_SUBJECT, v.as_bytes()); + }, + _ => {}, + }; + Ok(()) } + + fn process_predicate(&mut self, predicate: &str, object: &str) -> bool { + match predicate.to_lowercase().as_str() { + "title" => { + self.set_title(object); + info!("found title: {}", object); + }, + "author" => { + self.set_author(object); + info!("found author: {}", object); + }, + "subject" => { + self.set_subject(object); + info!("found subject: {}", object); + }, + "typ" => { + self.set_typ(object); + info!("found typ: {}", object); + }, + "language" => { + self.set_language(object); + info!("found language: {}", object); + }, + "mime" => { + self.set_mime_str(object); + info!("found mime: {}", object); + }, + _ => { + return false; + }, + } + true + } + + fn process_line(&mut self, s: &str) { + match s.split_once(":") { + Some((predicate, object_raw)) => { + let object = object_raw.trim(); + self.process_predicate(predicate, object); + }, + None => { + }, + } + } + + pub fn from_file(f: File) -> Result<MetaData, std::io::Error> { + let mut m = MetaData::empty(); + //let f = File::open(path).unwrap(); + let mut fb = BufReader::new(f); + loop { + let mut s = String::new(); + match fb.read_line(&mut s) { + Ok(v) => { + if v == 0 { + break; + } + m.process_line(s.as_str()); + }, + Err(e) => { + return Err(e); + }, + } + } + Ok(m) + } } +//impl FromStr for MetaData { +// type Err = std::io::Error; +// +// fn from_str(s: &str) -> Result<MetaData, <MetaData as FromStr>::Err> { +// BufRead +// MetaData::empty(); +// Ok(()) +// } +//} + #[cfg(test)] mod tests { use super::MetaData; use std::path; + use tempfile::NamedTempFile; + use biblatex::EntryType; + use std::fs::File; + use env_logger; #[test] fn test_metadata_create() { @@ -249,4 +392,43 @@ mod tests { assert_eq!(meta.dc.author, "Satoshi Nakamoto"); assert_eq!(meta.fingerprint(), String::from("2ac531ee521cf93f8419c2018f770fbb42c65396178e079a416e7038d3f9ab9fc2c35c4d838bc8b5dd68f4c13759fe9cdf90a46528412fefe1294cb26beabf4e")); } + + #[test] + fn test_metadata_set() { + let digest_hex = "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e"; + let digest = hex::decode(&digest_hex).unwrap(); + + let f = NamedTempFile::new_in(".").unwrap(); + let fp = f.path(); + let fps = String::from(fp.to_str().unwrap()); + + let mut m = MetaData::new("foo", "bar", EntryType::Article, digest, Some(fps)); + m.set_subject("baz"); + m.set_mime_str("foo/bar"); + m.set_language("nb-NO"); + m.to_xattr(fp); + + let m_check = MetaData::from_xattr(fp); + assert_eq!(m_check.title(), "foo"); + assert_eq!(m_check.author(), "bar"); + assert_eq!(m_check.fingerprint(), digest_hex); + assert_eq!(m_check.typ(), EntryType::Article); + assert_eq!(m_check.subject().unwrap(), "baz"); + assert_eq!(m_check.mime().unwrap(), "foo/bar"); + assert_eq!(m_check.language().unwrap(), "nb-NO"); + } + + #[test] + fn test_metadata_file() { + env_logger::init(); + + let f = File::open("testdata/meta.txt").unwrap(); + let m_check = MetaData::from_file(f).unwrap(); + assert_eq!(m_check.title(), "foo"); + assert_eq!(m_check.author(), "bar"); + assert_eq!(m_check.typ(), EntryType::Report); + assert_eq!(m_check.subject().unwrap(), "baz"); + assert_eq!(m_check.mime().unwrap(), "text/plain"); + assert_eq!(m_check.language().unwrap(), "nb-NO"); + } } diff --git a/src/rdf.rs b/src/rdf.rs @@ -33,7 +33,19 @@ use log::{ use crate::meta::MetaData; +use crate::dc::{ + DC_IRI_TITLE, + DC_IRI_CREATOR, + DC_IRI_SUBJECT, + DC_IRI_LANGUAGE, + DC_IRI_TYPE, + DC_IRI_MEDIATYPE, +}; +pub enum RdfError { + UrnError(UrnError), + HashMismatchError, +} pub fn write(entry: &MetaData, w: impl Write) -> Result<usize, std::io::Error> { let mut tfmt = TurtleFormatter::new(w); @@ -47,37 +59,37 @@ pub fn write(entry: &MetaData, w: impl Write) -> Result<usize, std::io::Error> { tfmt.format(&Triple{ subject: urn, - predicate: NamedNode { iri: "https://purl.org/dc/terms/title" }.into(), + predicate: NamedNode { iri: DC_IRI_TITLE }.into(), object: Literal::Simple { value: entry.title().as_str() }.into(), }); tfmt.format(&Triple{ subject: urn, - predicate: NamedNode { iri: "https://purl.org/dc/terms/creator" }.into(), + predicate: NamedNode { iri: DC_IRI_CREATOR }.into(), object: Literal::Simple { value: entry.author().as_str() }.into(), }); let typ = entry.typ().to_string(); tfmt.format(&Triple{ subject: urn, - predicate: NamedNode { iri: "https://purl.org/dc/terms/type" }.into(), + predicate: NamedNode { iri: DC_IRI_TYPE }.into(), object: Literal::Simple { value: typ.as_str() }.into(), }); match entry.subject() { Some(v) => { tfmt.format(&Triple{ subject: urn, - predicate: NamedNode { iri: "https://purl.org/dc/terms/subject" }.into(), + predicate: NamedNode { iri: DC_IRI_SUBJECT }.into(), object: Literal::Simple { value: v.as_str() }.into(), }); }, _ => (), }; - match entry.mime() { + match entry.mime() { Some(v) => { let m: String = v.to_string(); tfmt.format(&Triple{ subject: urn, - predicate: NamedNode { iri: "https://purl.org/dc/terms/MediaType" }.into(), + predicate: NamedNode { iri: DC_IRI_MEDIATYPE }.into(), object: Literal::Simple { value: m.as_str() }.into(), }); }, @@ -89,7 +101,7 @@ pub fn write(entry: &MetaData, w: impl Write) -> Result<usize, std::io::Error> { let m: String = v.to_string(); tfmt.format(&Triple{ subject: urn, - predicate: NamedNode { iri: "https://purl.org/dc/terms/language" }.into(), + predicate: NamedNode { iri: DC_IRI_LANGUAGE }.into(), object: Literal::Simple { value: m.as_str() }.into(), }); }, @@ -101,53 +113,54 @@ pub fn write(entry: &MetaData, w: impl Write) -> Result<usize, std::io::Error> { } -pub fn handle_parse_match(metadata: &mut MetaData, triple: Triple) -> Result<(), UrnError> { +fn handle_parse_match(metadata: &mut MetaData, triple: Triple) -> Result<(), RdfError> { let subject_iri = triple.subject.to_string(); let l = subject_iri.len()-1; let subject = &subject_iri[1..l]; let subject_urn = Urn::from_str(subject).unwrap(); if subject_urn.nid() != "sha512" { - return Err(UrnError::InvalidNid); + return Err(RdfError::UrnError(UrnError::InvalidNid)); } + let v = subject_urn.nss(); + let b = hex::decode(&v).unwrap(); if metadata.fingerprint().len() == 0 { - let v = subject_urn.nss(); - let b = hex::decode(&v).unwrap(); info!("setting fingerprint {}", v); metadata.set_fingerprint(b); + } else if metadata.fingerprint() != v { + return Err(RdfError::HashMismatchError); } let field = triple.predicate.iri; match field { - "https://purl.org/dc/terms/title" => { + DC_IRI_TITLE => { let title = triple.object.to_string().replace("\"", ""); metadata.set_title(title.as_str()); info!("found title: {}", title); }, - "https://purl.org/dc/terms/creator" => { + DC_IRI_CREATOR => { let author = triple.object.to_string().replace("\"", ""); metadata.set_author(author.as_str()); info!("found author: {}", author); }, - "https://purl.org/dc/terms/subject" => { + DC_IRI_SUBJECT => { let mut subject = triple.object.to_string().replace("\"", ""); metadata.set_subject(subject.as_str()); info!("found subject: {}", subject); }, - "https://purl.org/dc/terms/language" => { + DC_IRI_LANGUAGE => { let mut lang = triple.object.to_string().replace("\"", ""); metadata.set_language(lang.as_str()); info!("found language: {}", lang); }, - "https://purl.org/dc/terms/type" => { + DC_IRI_TYPE => { let mut typ = triple.object.to_string().replace("\"", ""); metadata.set_typ(typ.as_str()); info!("found entry type: {}", typ); }, - "https://purl.org/dc/terms/MediaType" => { - let mut mime_type = triple.object.to_string(); - let l = mime_type.len()-1; - metadata.set_mime_str(&mime_type[1..l]); + DC_IRI_MEDIATYPE => { + let mut mime_type = triple.object.to_string().replace("\"", ""); + metadata.set_mime_str(mime_type.as_str()); info!("found mime type: {}", mime_type); }, _ => { @@ -192,8 +205,9 @@ mod tests { let mut m = MetaData::new("foo", "bar", EntryType::Article, Vec::from(digest), None); m.set_subject("baz"); m.set_mime_str("foo/bar"); - m.set_language("en-US"); - let v = stdout(); + m.set_language("nb-NO"); + //let v = stdout(); + let mut v: Vec<u8> = vec!(); let r = write(&m, v); }