commit c1218fd2cd3d1ad96cc55144ed100e615f77e19e
parent 800291aa0fbd8c496da676b174626918a1e9678d
Author: lash <dev@holbrook.no>
Date:   Sat, 25 Jun 2022 18:38:53 +0000
Add read from turtle to metadata
Diffstat:
4 files changed, 156 insertions(+), 13 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
@@ -17,6 +17,9 @@ mime = "^0.3.13"
 unic-langid-impl = "^0.9.0"
 biblatex = "^0.6.2"
 sha2 = "^0.10.2"
+log = "^0.4"
+env_logger = "^0.9"
+urn = "^0.4.0"
 
 #[dependencies.rdf]
 #rio_turtle = "~0.7.1"
diff --git a/src/meta.rs b/src/meta.rs
@@ -42,22 +42,49 @@ impl MetaData {
     pub fn new(title: &str, author: &str, typ: EntryType, digest: Vec<u8>, filename: Option<FileName>) -> MetaData {
         let dc = DCMetaData::new(title, author, typ);
 
-        let sz = Sha512::output_size();
-        if digest.len() != sz {
-            panic!("wrong digest size, must be {}", sz);
-        }
+        let mut m = MetaData{
+                dc: dc,
+                digest: vec!(),
+                comment: String::new(),
+                //local_name: filepath.to_str().unwrap().to_string(),
+                local_name: filename,
+                publish_date: (0, 0, 0),
+                retrieval_timestamp: 0,
+        };
+
+        m.set_fingerprint(digest);
+        m
+    }
 
+    pub fn empty() -> MetaData {
+        let dc = DCMetaData::new("", "", EntryType::Unknown(String::new()));
         MetaData{
                 dc: dc,
-                digest: digest,
+                digest: vec!(),
                 comment: String::new(),
                 //local_name: filepath.to_str().unwrap().to_string(),
-                local_name: filename,
+                local_name: None,
                 publish_date: (0, 0, 0),
                 retrieval_timestamp: 0,
         }
     }
 
+    pub fn set_title(&mut self, title: &str) {
+        self.dc.title = String::from(title);
+    }
+
+    pub fn set_author(&mut self, author: &str) {
+        self.dc.title = String::from(author);
+    }
+
+    pub fn set_fingerprint(&mut self, fingerprint: Vec<u8>) {
+        let sz = Sha512::output_size();
+        if fingerprint.len() != sz {
+            panic!("wrong digest size, must be {}", sz);
+        }
+        self.digest = fingerprint;
+    }
+
     pub fn title(&self) -> String {
         self.dc.title.clone()
     }
@@ -66,6 +93,10 @@ impl MetaData {
         self.dc.author.clone()
     }
 
+    pub fn set_typ(&mut self, typ: &str) {
+        self.dc.typ = EntryType::from_str(typ).unwrap();
+    }
+
     pub fn typ(&self) -> EntryType {
         self.dc.typ.clone()
     }
diff --git a/src/rdf.rs b/src/rdf.rs
@@ -3,19 +3,38 @@ use std::io::{
     Read,
     Write
 };
+use std::str::FromStr;
+use std::io::{
+    BufReader,
+};
 
+use rio_turtle::{
+    TurtleParser,
+    TurtleError,
+    TurtleFormatter,
+};
+use rio_api::parser::TriplesParser;
+use rio_api::formatter::TriplesFormatter;
 use rio_api::model::{
     NamedNode,
     Literal,
     Triple,
     Subject,
 };
-use rio_turtle::TurtleFormatter;
-use rio_api::formatter::TriplesFormatter;
+use urn::{
+    Urn,
+    Error as UrnError,
+};
+
+use log::{
+    debug,
+    info,
+};
 
 use crate::meta::MetaData;
 
 
+
 pub fn write(entry: &MetaData, w: impl Write) -> Result<usize, std::io::Error> {
     let mut tfmt = TurtleFormatter::new(w);
     
@@ -81,16 +100,98 @@ pub fn write(entry: &MetaData, w: impl Write) -> Result<usize, std::io::Error> {
     Ok(0)
 }
 
+
+pub fn handle_parse_match(metadata: &mut MetaData, triple: Triple) -> Result<(), UrnError> {
+    let subject_iri = triple.subject.to_string();
+    let l = subject_iri.len()-1;
+    let subject = &subject_iri[1..l];
+    let subject_urn = Urn::from_str(subject).unwrap();
+    if subject_urn.nid() != "sha512" {
+        return Err(UrnError::InvalidNid);
+    }
+
+    if metadata.fingerprint().len() == 0 {
+        let v = subject_urn.nss();
+        let b = hex::decode(&v).unwrap();
+        info!("setting fingerprint {}", v);
+        metadata.set_fingerprint(b);
+    }
+
+    let field = triple.predicate.iri;
+    match field {
+        "https://purl.org/dc/terms/title" => {
+            let title = triple.object.to_string();
+            let l = title.len()-1;
+            metadata.set_title(&title[1..l]);
+            info!("found title: {}", title);
+        },
+        "https://purl.org/dc/terms/creator" => {
+            let author = triple.object.to_string();
+            let l = author.len()-1;
+            metadata.set_author(&author[1..l]);
+            info!("found author: {}", author);
+        },
+        "https://purl.org/dc/terms/subject" => {
+            let mut subject = triple.object.to_string();
+            let l = subject.len()-1;
+            metadata.set_subject(&subject[1..l]); //.as_str());
+            info!("found subject: {}", subject);
+        },
+        "https://purl.org/dc/terms/language" => {
+            let mut lang = triple.object.to_string();
+            let l = lang.len()-1;
+            metadata.set_language(&lang[1..l]);
+            info!("found language: {}", lang);
+        },
+        "https://purl.org/dc/terms/type" => {
+            let mut typ = triple.object.to_string();
+            let l = typ.len()-1;
+            metadata.set_typ(&typ[1..l]);
+            info!("found entry type: {}", typ);
+        },
+        "https://purl.org/dc/terms/MediaType" => {
+            let mut mime_type = triple.object.to_string();
+            let l = mime_type.len()-1;
+            metadata.set_mime_str(&mime_type[1..l]);
+            info!("found mime type: {}", mime_type);
+        },
+        _ => {
+            debug!("skipping unknown predicate: {}", field);
+        },
+    };
+    Ok(())
+}
+
+pub fn read(r: impl Read) {
+    let mut metadata = MetaData::empty();
+    let bf = BufReader::new(r);
+    let mut tp = TurtleParser::new(bf, None);
+    let r: Result<_, TurtleError> = tp.parse_all(&mut |r| {
+        match r {
+            Triple{subject, predicate, object } => {
+                handle_parse_match(&mut metadata, r);
+            },
+            _ => {},
+        }
+        Ok(())
+    });
+}
+
 #[cfg(test)]
 mod tests {
-    use super::write;
+    use super::{
+        write,
+        read,
+    };
     use super::MetaData;
     use std::io::stdout;
+    use std::fs::File;
     use std::default::Default;
     use biblatex::EntryType;
+    use env_logger;
 
     #[test]
-    fn test_write() {
+    fn test_turtle_write() {
         let mut digest = Vec::with_capacity(64);
         digest.resize(64, 0x2a);
         let mut m = MetaData::new("foo", "bar", EntryType::Article, Vec::from(digest), None);
@@ -100,4 +201,12 @@ mod tests {
         let v = stdout();
         let r = write(&m, v);
     }
+
+    #[test]
+    fn test_turtle_read() {
+        env_logger::init();
+
+        let f = File::open("testdata/meta.ttl").unwrap();
+        read(&f);
+    }
 }
diff --git a/testdata/meta.ttl b/testdata/meta.ttl
@@ -1,10 +1,10 @@
 @prefix dcterms: <https://purl.org/dc/terms/> .
 @prefix dcmi: <https://purl.org/dc/dcmi/> .
 
-<URN:sha512:2ac531ee521cf93f8419c2018f770fbb42c65396178e079a416e7038d3f9ab9fc2c35c4d838bc8b5dd68f4c13759fe9cdf90a46528412fefe1294cb26beabf4e>
+<urn:sha512:2ac531ee521cf93f8419c2018f770fbb42c65396178e079a416e7038d3f9ab9fc2c35c4d838bc8b5dd68f4c13759fe9cdf90a46528412fefe1294cb26beabf4e>
 	dcterms:title "Bitcoin: A Peer-to-Peer Electronic Cash System" ;
 	dcterms:subject "bitcoin,cryptocurrency,cryptography" ;
 	dcterms:creator "Satoshi Nakamoto" ;
-	dcterms:type "Whitepaper" ;
+	dcterms:type "article" ;
 	dcterms:MediaType "application/pdf" ;
-	dcterms:language "en" .
+	dcterms:language "en-US" .