commit 68a17a5fd159bf8e1cd9be2a492d3a3b157b6986
parent 9214f86501ef9d9f5ab834bbdb6d18fe45b2fb8e
Author: lash <dev@holbrook.no>
Date:   Sun, 24 Jul 2022 08:57:09 +0000
Add language, keywords, digest from biblatex, rehabilitate scan
Diffstat:
7 files changed, 168 insertions(+), 33 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
@@ -35,17 +35,17 @@ tempfile = "^3.3.0"
 
 [dependencies.rio_turtle]
 version = "~0.7.1"
-optional = true
+#optional = true
 
 [dependencies.rio_api]
 version = "^0.7.1"
-optional = true
+#optional = true
 
 [dependencies.tree_magic]
 version = "^0.2.1"
 optional = true
 
 [features]
-rdf = ["rio_turtle", "rio_api"]
+#rdf = ["rio_turtle", "rio_api"]
 #dump_bibtex = ["biblatex"]
 magic = ["tree_magic"]
diff --git a/src/biblatex.rs b/src/biblatex.rs
@@ -0,0 +1,87 @@
+use std::io::{
+    Read,
+};
+use std::str;
+
+use log::debug;
+use biblatex::{
+    Bibliography,
+    Type,
+    Entry as Entry,
+};
+
+use crate::meta::MetaData;
+
+fn parse_digest(entry: &Entry) -> Vec<u8> {
+    let note = entry.get("note").unwrap();
+    let note_s = String::from_chunks(note).unwrap();
+    let mut digest_val = note_s.split(":");
+
+    let mut digest = Vec::new();
+
+    match digest_val.next() {
+        Some(v) => {
+            if v == "sha512" {
+                let digest_hex = digest_val.next().unwrap();
+                let mut digest_imported = hex::decode(digest_hex).unwrap();
+                digest.append(&mut digest_imported);
+                debug!("parsed digest {}", hex::encode(&digest));
+            }
+        },
+        None => {},
+    };
+    
+    if digest.len() == 0 {
+        digest.resize(64, 0);
+    }
+
+    digest
+}
+
+pub fn read_all(mut r: impl Read) -> Vec<MetaData> {
+    let mut s = String::new();
+    let c = r.read_to_string(&mut s);
+    let bib = Bibliography::parse(&s).unwrap();
+
+    let mut rr: Vec<MetaData> = vec!();
+
+    for e in bib.iter() {
+        let authors = e.author()
+            .unwrap()
+            .into_iter()
+            .map(|v| {
+            format!("{} {}", v.given_name, v.name)
+        });
+        let authors_s = authors.fold(String::new(), |x, y| {
+            if x.len() == 0 {
+                return y
+            }
+            format!("{}, {}", x, y)
+        });
+        let digest = parse_digest(&e);
+
+        let title = e.title().unwrap();
+        let title_s = String::from_chunks(title).unwrap();
+
+        let mut m = MetaData::new(title_s.as_str(), authors_s.as_str(), e.entry_type.clone(), digest, None);
+
+        match e.keywords() {
+            Ok(v) => {
+                let s = String::from_chunks(v).unwrap();
+                m.set_subject(s.as_str());
+            },
+            _ => {},
+        };
+
+        match e.language() {
+            Ok(v) => {
+                m.set_language(v.as_str());
+            },
+            _ => {},
+        }
+
+        debug!("read metadata {:?}", &m);
+        rr.push(m);
+    }
+    rr
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -6,7 +6,6 @@ pub mod dc;
 
 pub mod store;
 
-#[cfg(feature = "rdf")]
 pub mod rdf;
 
 pub mod biblatex;
diff --git a/src/main.rs b/src/main.rs
@@ -25,7 +25,6 @@ use log::{
     info,
 };
 
-#[cfg(feature = "rdf")]
 use kitab::rdf::{
     read as rdf_read,
     write as rdf_write,
@@ -81,20 +80,20 @@ fn args_setup() -> ArgMatches<'static> {
 
 // commands
 // kitab import <file> - attempt in order import rdf, import spec
-// kitab export <file> - export rdf/turtle
-// kitab scan <path> - recursively 
+    // kitab export <file> - export rdf/turtle
+    // kitab scan <path> - recursively 
 
-fn resolve_directory(args: &ArgMatches) -> PathBuf {
-    match BaseDirs::new() {
-        Some(v) => {
+    fn resolve_directory(args: &ArgMatches) -> PathBuf {
+        match BaseDirs::new() {
+            Some(v) => {
             let d = v.data_dir();
             d.join("kitab")
-                .join("idx")
-        },
-        _ => {
-            PathBuf::from(".")
-                .join(".kitab")
-                .join("/idx")
+                    .join("idx")
+            },
+            _ => {
+                PathBuf::from(".")
+                    .join(".kitab")
+                    .join("/idx")
         },
     }
 }
@@ -120,24 +119,29 @@ fn str_to_path(args: &ArgMatches) -> PathBuf {
     p_canon
 }
 
+fn store(index_path: &Path, m: &MetaData) {
+    let fp = index_path.join(m.fingerprint());
+    create_dir_all(&index_path);
+    debug!("writing record for title {} to {:?}", m.title(), &fp);
+
+    let ff = File::create(&fp).unwrap();
+    rdf_write(&m, &ff).unwrap();
+    debug!("stored as rdf {:?}", fp);
+}
+
 fn exec_import_rdf(f: &Path, index_path: &Path) {
-    #[cfg(feature = "rdf")]
-    {
-        let f = File::open(f).unwrap();
-        let m = rdf_read(&f);
-        
-        let fp = index_path.join(m.fingerprint());
-        create_dir_all(&index_path);
-        debug!("writing record for title {} to {:?}", m.title(), &fp);
-    
-        let ff = File::create(fp).unwrap();
-        rdf_write(&m, &ff).unwrap();
-    }
+    let f = File::open(f).unwrap();
+    let m = rdf_read(&f);
+    store(index_path, &m);    
 }
 
 fn exec_import_biblatex(f: &Path, index_path: &Path) {
     let f = File::open(f).unwrap();
-    biblatex_read_all(&f);
+    let entries = biblatex_read_all(&f);
+
+    for m in entries {
+        store(index_path, &m);    
+    }
 }
 
 fn exec_scan(p: &Path, index_path: &Path) {
@@ -152,9 +156,10 @@ fn exec_scan(p: &Path, index_path: &Path) {
             let fp = index_path.join(&z_hex);
             match fp.canonicalize() {
                 Ok(v) => {
-                    info!("apply {:?} for {:?}", entry, z_hex);
-                    let m = MetaData::from_path(ep).unwrap();
-                    m.to_xattr(&p);
+                    let f = File::open(&v).unwrap();
+                    let m = rdf_read(f);
+                    info!("apply {:?} -> {:?} for {:?}", entry, &m, z_hex);
+                    m.to_xattr(&ep);
                 },
                 Err(e) => {
                     debug!("metadata not found for {:?} -> {:?}", entry, z_hex);
@@ -175,7 +180,7 @@ fn main() {
         Some(v) => {
             let p = str_to_path(v);
             info!("have path {:?}", &p);
-            //return exec_import(p.as_path(), index_dir.as_path());
+            //return exec_import_rdf(p.as_path(), index_dir.as_path());
             return exec_import_biblatex(p.as_path(), index_dir.as_path());
         },
         _ => {},
diff --git a/src/meta.rs b/src/meta.rs
@@ -359,6 +359,7 @@ impl MetaData {
 
     pub fn from_path(p: &path::Path) -> Result<MetaData, std::io::Error> {
         let f = File::open(&p).unwrap();
+        debug!("openning {}", p.display());
         let mut m = MetaData::from_file(f).unwrap();
         Ok(m)
     }
@@ -385,6 +386,12 @@ impl MetaData {
     }
 }
 
+impl fmt::Debug for MetaData {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", format_args!("title \"{}\" author \"{}\" digest {}", self.title(), self.author(), self.fingerprint()))
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::MetaData;
diff --git a/testdata/meta.biblatex b/testdata/meta.biblatex
@@ -0,0 +1,17 @@
+@article{
+	bitcoin_whitepaper,
+	title = "Bitcoin: A Peer-to-Peer Electronic Cash System",
+	author = "Nakamoto, Satoshi",
+	keywords = "bitcoin,cryptocurrency",
+	language = "en",
+	note = "sha512:2ac531ee521cf93f8419c2018f770fbb42c65396178e079a416e7038d3f9ab9fc2c35c4d838bc8b5dd68f4c13759fe9cdf90a46528412fefe1294cb26beabf4e",
+}
+@inproceedings{
+	decentralizedmarketplace_smartcities,
+	author = "Gowri Sankar Ramachandran, Rahul Radhakrishnan and Bhaskar Krishnamachari",
+	title = "Towards a Decentralized Data Marketplace for Smart Cities",
+	booktitle = "IEEE International Smart Cities Conference",
+	month = sep,
+	year = 2018,
+	note = "sha512:f450b0b35ed8bd1c00b45b4f6ebd645079ae8bf3b8abd28aea62fc2ab3bab2878e021e0b6c182f776e24e5ed956c204d647b4c5b0f64a73e3753f736ffe2818c",
+}
diff --git a/tests/rdf_test.rs b/tests/rdf_test.rs
@@ -0,0 +1,20 @@
+use std::io::{
+    Write,
+    BufWriter,
+};
+
+use biblatex::EntryType;
+
+use kitab::rdf::write as rdf_write;
+use kitab::meta::MetaData;
+
+
+#[test]
+fn test_rdf_dump() {
+    let v = Vec::new();
+    let w = BufWriter::new(v);
+    let mut digest: Vec<u8> = Vec::new();
+    digest.resize(64, 0);
+    let metadata = MetaData::new("foo", "Bar Baz", EntryType::Article, digest, None);
+    let r = rdf_write(&metadata, w);
+}