commit 5da4623992d55e2e5356da183985b39754a2986c
parent 343e902b10679c37b92e59c20fa32a87f78df08d
Author: lash <dev@holbrook.no>
Date: Sun, 26 Jun 2022 06:48:58 +0000
Add key value importer from file
Diffstat:
M | Cargo.toml | | | 4 | ++++ |
M | src/dc/mod.rs | | | 13 | +++++++++++++ |
M | src/meta.rs | | | 194 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- |
M | src/rdf.rs | | | 58 | ++++++++++++++++++++++++++++++++++++---------------------- |
4 files changed, 241 insertions(+), 28 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
@@ -20,6 +20,10 @@ sha2 = "^0.10.2"
log = "^0.4"
env_logger = "^0.9"
urn = "^0.4.0"
+#tempfile = "^3.3.0"
+
+[dev-dependencies]
+tempfile = "^3.3.0"
#[dependencies.rdf]
#rio_turtle = "~0.7.1"
diff --git a/src/dc/mod.rs b/src/dc/mod.rs
@@ -13,6 +13,19 @@ pub struct DCMetaData {
pub language: Option<LanguageIdentifier>,
}
+pub const DC_IRI_TITLE: &str = "https://purl.org/dc/terms/title";
+pub const DC_IRI_CREATOR: &str = "https://purl.org/dc/terms/creator";
+pub const DC_IRI_SUBJECT: &str = "https://purl.org/dc/terms/subject";
+pub const DC_IRI_LANGUAGE: &str = "https://purl.org/dc/terms/language";
+pub const DC_IRI_TYPE: &str = "https://purl.org/dc/terms/type";
+pub const DC_IRI_MEDIATYPE: &str = "https://purl.org/dc/terms/MediaType";
+pub const DC_XATTR_TITLE: &str = "user.dcterms:title";
+pub const DC_XATTR_CREATOR: &str = "user.dcterms:creator";
+pub const DC_XATTR_SUBJECT: &str = "user.dcterms:subject";
+pub const DC_XATTR_LANGUAGE: &str = "user.dcterms:language";
+pub const DC_XATTR_TYPE: &str = "user.dcterms:type";
+pub const DC_XATTR_MEDIATYPE: &str = "user.dcterms:MediaType";
+
impl DCMetaData {
pub fn new(title: &str, author: &str, typ: EntryType) -> DCMetaData {
DCMetaData{
diff --git a/src/meta.rs b/src/meta.rs
@@ -13,15 +13,31 @@ use std::fs::{
File,
metadata,
};
-use std::io::Read;
+use std::path::Path;
+use std::io::{
+ Read,
+ BufRead,
+ BufReader,
+};
use unic_langid_impl::LanguageIdentifier;
use biblatex::EntryType;
use std::str::FromStr;
use std::os::linux::fs::MetadataExt;
-use crate::dc::DCMetaData;
+use crate::dc::{
+ DCMetaData,
+ DC_XATTR_TITLE,
+ DC_XATTR_CREATOR,
+ DC_XATTR_SUBJECT,
+ DC_XATTR_LANGUAGE,
+ DC_XATTR_TYPE,
+ DC_XATTR_MEDIATYPE,
+};
-//pub type Digest = Vec<u8>;
+use log::{
+ debug,
+ info,
+};
pub type PublishDate = (u8, u8, u32);
@@ -38,6 +54,10 @@ pub struct MetaData {
retrieval_timestamp: u32,
}
+pub fn check_xattr() {
+
+}
+
impl MetaData {
pub fn new(title: &str, author: &str, typ: EntryType, digest: Vec<u8>, filename: Option<FileName>) -> MetaData {
let dc = DCMetaData::new(title, author, typ);
@@ -46,7 +66,6 @@ impl MetaData {
dc: dc,
digest: vec!(),
comment: String::new(),
- //local_name: filepath.to_str().unwrap().to_string(),
local_name: filename,
publish_date: (0, 0, 0),
retrieval_timestamp: 0,
@@ -74,7 +93,7 @@ impl MetaData {
}
pub fn set_author(&mut self, author: &str) {
- self.dc.title = String::from(author);
+ self.dc.author = String::from(author);
}
pub fn set_fingerprint(&mut self, fingerprint: Vec<u8>) {
@@ -228,18 +247,142 @@ impl MetaData {
_ => {},
}
+ match xattr::get(filepath, "user.dcterms:language") {
+ Ok(v) => {
+ match v {
+ Some(v) => {
+ let s = std::str::from_utf8(&v).unwrap();
+ metadata.set_language(s);
+ },
+ None => {},
+ }
+ },
+ _ => {},
+ }
+
metadata
}
- pub fn to_xattr(&self, filepath: &Path) -> Result<(), std::io::Error> {
+
+ pub fn to_xattr(&self, filepath: &path::Path) -> Result<(), std::io::Error> {
+ let filename = filepath.file_name()
+ .unwrap()
+ .to_os_string()
+ .into_string()
+ .unwrap();
+
+ xattr::set(filepath, DC_XATTR_TITLE, self.dc.title.as_bytes());
+ xattr::set(filepath, DC_XATTR_CREATOR, self.dc.author.as_bytes());
+ xattr::set(filepath, DC_XATTR_TYPE, self.dc.typ.to_string().as_bytes());
+
+ match &self.dc.language {
+ Some(v) => {
+ xattr::set(filepath, DC_XATTR_LANGUAGE, v.to_string().as_bytes());
+ },
+ _ => {},
+ };
+
+ match &self.dc.mime {
+ Some(v) => {
+ xattr::set(filepath, DC_XATTR_MEDIATYPE, v.to_string().as_bytes());
+ },
+ _ => {},
+ };
+
+ match &self.dc.subject {
+ Some(v) => {
+ xattr::set(filepath, DC_XATTR_SUBJECT, v.as_bytes());
+ },
+ _ => {},
+ };
+
Ok(())
}
+
+ fn process_predicate(&mut self, predicate: &str, object: &str) -> bool {
+ match predicate.to_lowercase().as_str() {
+ "title" => {
+ self.set_title(object);
+ info!("found title: {}", object);
+ },
+ "author" => {
+ self.set_author(object);
+ info!("found author: {}", object);
+ },
+ "subject" => {
+ self.set_subject(object);
+ info!("found subject: {}", object);
+ },
+ "typ" => {
+ self.set_typ(object);
+ info!("found typ: {}", object);
+ },
+ "language" => {
+ self.set_language(object);
+ info!("found language: {}", object);
+ },
+ "mime" => {
+ self.set_mime_str(object);
+ info!("found mime: {}", object);
+ },
+ _ => {
+ return false;
+ },
+ }
+ true
+ }
+
+ fn process_line(&mut self, s: &str) {
+ match s.split_once(":") {
+ Some((predicate, object_raw)) => {
+ let object = object_raw.trim();
+ self.process_predicate(predicate, object);
+ },
+ None => {
+ },
+ }
+ }
+
+ pub fn from_file(f: File) -> Result<MetaData, std::io::Error> {
+ let mut m = MetaData::empty();
+ //let f = File::open(path).unwrap();
+ let mut fb = BufReader::new(f);
+ loop {
+ let mut s = String::new();
+ match fb.read_line(&mut s) {
+ Ok(v) => {
+ if v == 0 {
+ break;
+ }
+ m.process_line(s.as_str());
+ },
+ Err(e) => {
+ return Err(e);
+ },
+ }
+ }
+ Ok(m)
+ }
}
+//impl FromStr for MetaData {
+// type Err = std::io::Error;
+//
+// fn from_str(s: &str) -> Result<MetaData, <MetaData as FromStr>::Err> {
+// BufRead
+// MetaData::empty();
+// Ok(())
+// }
+//}
+
#[cfg(test)]
mod tests {
use super::MetaData;
use std::path;
+ use tempfile::NamedTempFile;
+ use biblatex::EntryType;
+ use std::fs::File;
+ use env_logger;
#[test]
fn test_metadata_create() {
@@ -249,4 +392,43 @@ mod tests {
assert_eq!(meta.dc.author, "Satoshi Nakamoto");
assert_eq!(meta.fingerprint(), String::from("2ac531ee521cf93f8419c2018f770fbb42c65396178e079a416e7038d3f9ab9fc2c35c4d838bc8b5dd68f4c13759fe9cdf90a46528412fefe1294cb26beabf4e"));
}
+
+ #[test]
+ fn test_metadata_set() {
+ let digest_hex = "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e";
+ let digest = hex::decode(&digest_hex).unwrap();
+
+ let f = NamedTempFile::new_in(".").unwrap();
+ let fp = f.path();
+ let fps = String::from(fp.to_str().unwrap());
+
+ let mut m = MetaData::new("foo", "bar", EntryType::Article, digest, Some(fps));
+ m.set_subject("baz");
+ m.set_mime_str("foo/bar");
+ m.set_language("nb-NO");
+ m.to_xattr(fp);
+
+ let m_check = MetaData::from_xattr(fp);
+ assert_eq!(m_check.title(), "foo");
+ assert_eq!(m_check.author(), "bar");
+ assert_eq!(m_check.fingerprint(), digest_hex);
+ assert_eq!(m_check.typ(), EntryType::Article);
+ assert_eq!(m_check.subject().unwrap(), "baz");
+ assert_eq!(m_check.mime().unwrap(), "foo/bar");
+ assert_eq!(m_check.language().unwrap(), "nb-NO");
+ }
+
+ #[test]
+ fn test_metadata_file() {
+ env_logger::init();
+
+ let f = File::open("testdata/meta.txt").unwrap();
+ let m_check = MetaData::from_file(f).unwrap();
+ assert_eq!(m_check.title(), "foo");
+ assert_eq!(m_check.author(), "bar");
+ assert_eq!(m_check.typ(), EntryType::Report);
+ assert_eq!(m_check.subject().unwrap(), "baz");
+ assert_eq!(m_check.mime().unwrap(), "text/plain");
+ assert_eq!(m_check.language().unwrap(), "nb-NO");
+ }
}
diff --git a/src/rdf.rs b/src/rdf.rs
@@ -33,7 +33,19 @@ use log::{
use crate::meta::MetaData;
+use crate::dc::{
+ DC_IRI_TITLE,
+ DC_IRI_CREATOR,
+ DC_IRI_SUBJECT,
+ DC_IRI_LANGUAGE,
+ DC_IRI_TYPE,
+ DC_IRI_MEDIATYPE,
+};
+pub enum RdfError {
+ UrnError(UrnError),
+ HashMismatchError,
+}
pub fn write(entry: &MetaData, w: impl Write) -> Result<usize, std::io::Error> {
let mut tfmt = TurtleFormatter::new(w);
@@ -47,37 +59,37 @@ pub fn write(entry: &MetaData, w: impl Write) -> Result<usize, std::io::Error> {
tfmt.format(&Triple{
subject: urn,
- predicate: NamedNode { iri: "https://purl.org/dc/terms/title" }.into(),
+ predicate: NamedNode { iri: DC_IRI_TITLE }.into(),
object: Literal::Simple { value: entry.title().as_str() }.into(),
});
tfmt.format(&Triple{
subject: urn,
- predicate: NamedNode { iri: "https://purl.org/dc/terms/creator" }.into(),
+ predicate: NamedNode { iri: DC_IRI_CREATOR }.into(),
object: Literal::Simple { value: entry.author().as_str() }.into(),
});
let typ = entry.typ().to_string();
tfmt.format(&Triple{
subject: urn,
- predicate: NamedNode { iri: "https://purl.org/dc/terms/type" }.into(),
+ predicate: NamedNode { iri: DC_IRI_TYPE }.into(),
object: Literal::Simple { value: typ.as_str() }.into(),
});
match entry.subject() {
Some(v) => {
tfmt.format(&Triple{
subject: urn,
- predicate: NamedNode { iri: "https://purl.org/dc/terms/subject" }.into(),
+ predicate: NamedNode { iri: DC_IRI_SUBJECT }.into(),
object: Literal::Simple { value: v.as_str() }.into(),
});
},
_ => (),
};
- match entry.mime() {
+ match entry.mime() {
Some(v) => {
let m: String = v.to_string();
tfmt.format(&Triple{
subject: urn,
- predicate: NamedNode { iri: "https://purl.org/dc/terms/MediaType" }.into(),
+ predicate: NamedNode { iri: DC_IRI_MEDIATYPE }.into(),
object: Literal::Simple { value: m.as_str() }.into(),
});
},
@@ -89,7 +101,7 @@ pub fn write(entry: &MetaData, w: impl Write) -> Result<usize, std::io::Error> {
let m: String = v.to_string();
tfmt.format(&Triple{
subject: urn,
- predicate: NamedNode { iri: "https://purl.org/dc/terms/language" }.into(),
+ predicate: NamedNode { iri: DC_IRI_LANGUAGE }.into(),
object: Literal::Simple { value: m.as_str() }.into(),
});
},
@@ -101,53 +113,54 @@ pub fn write(entry: &MetaData, w: impl Write) -> Result<usize, std::io::Error> {
}
-pub fn handle_parse_match(metadata: &mut MetaData, triple: Triple) -> Result<(), UrnError> {
+fn handle_parse_match(metadata: &mut MetaData, triple: Triple) -> Result<(), RdfError> {
let subject_iri = triple.subject.to_string();
let l = subject_iri.len()-1;
let subject = &subject_iri[1..l];
let subject_urn = Urn::from_str(subject).unwrap();
if subject_urn.nid() != "sha512" {
- return Err(UrnError::InvalidNid);
+ return Err(RdfError::UrnError(UrnError::InvalidNid));
}
+ let v = subject_urn.nss();
+ let b = hex::decode(&v).unwrap();
if metadata.fingerprint().len() == 0 {
- let v = subject_urn.nss();
- let b = hex::decode(&v).unwrap();
info!("setting fingerprint {}", v);
metadata.set_fingerprint(b);
+ } else if metadata.fingerprint() != v {
+ return Err(RdfError::HashMismatchError);
}
let field = triple.predicate.iri;
match field {
- "https://purl.org/dc/terms/title" => {
+ DC_IRI_TITLE => {
let title = triple.object.to_string().replace("\"", "");
metadata.set_title(title.as_str());
info!("found title: {}", title);
},
- "https://purl.org/dc/terms/creator" => {
+ DC_IRI_CREATOR => {
let author = triple.object.to_string().replace("\"", "");
metadata.set_author(author.as_str());
info!("found author: {}", author);
},
- "https://purl.org/dc/terms/subject" => {
+ DC_IRI_SUBJECT => {
let mut subject = triple.object.to_string().replace("\"", "");
metadata.set_subject(subject.as_str());
info!("found subject: {}", subject);
},
- "https://purl.org/dc/terms/language" => {
+ DC_IRI_LANGUAGE => {
let mut lang = triple.object.to_string().replace("\"", "");
metadata.set_language(lang.as_str());
info!("found language: {}", lang);
},
- "https://purl.org/dc/terms/type" => {
+ DC_IRI_TYPE => {
let mut typ = triple.object.to_string().replace("\"", "");
metadata.set_typ(typ.as_str());
info!("found entry type: {}", typ);
},
- "https://purl.org/dc/terms/MediaType" => {
- let mut mime_type = triple.object.to_string();
- let l = mime_type.len()-1;
- metadata.set_mime_str(&mime_type[1..l]);
+ DC_IRI_MEDIATYPE => {
+ let mut mime_type = triple.object.to_string().replace("\"", "");
+ metadata.set_mime_str(mime_type.as_str());
info!("found mime type: {}", mime_type);
},
_ => {
@@ -192,8 +205,9 @@ mod tests {
let mut m = MetaData::new("foo", "bar", EntryType::Article, Vec::from(digest), None);
m.set_subject("baz");
m.set_mime_str("foo/bar");
- m.set_language("en-US");
- let v = stdout();
+ m.set_language("nb-NO");
+ //let v = stdout();
+ let mut v: Vec<u8> = vec!();
let r = write(&m, v);
}