kitab

Unnamed repository; edit this file 'description' to name the repository.
Info | Log | Files | Refs | LICENSE

commit 68a17a5fd159bf8e1cd9be2a492d3a3b157b6986
parent 9214f86501ef9d9f5ab834bbdb6d18fe45b2fb8e
Author: lash <dev@holbrook.no>
Date:   Sun, 24 Jul 2022 08:57:09 +0000

Add language, keywords, digest from biblatex, rehabilitate scan

Diffstat:
MCargo.toml | 6+++---
Asrc/biblatex.rs | 87+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/lib.rs | 1-
Msrc/main.rs | 63++++++++++++++++++++++++++++++++++-----------------------------
Msrc/meta.rs | 7+++++++
Atestdata/meta.biblatex | 17+++++++++++++++++
Atests/rdf_test.rs | 20++++++++++++++++++++
7 files changed, 168 insertions(+), 33 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml @@ -35,17 +35,17 @@ tempfile = "^3.3.0" [dependencies.rio_turtle] version = "~0.7.1" -optional = true +#optional = true [dependencies.rio_api] version = "^0.7.1" -optional = true +#optional = true [dependencies.tree_magic] version = "^0.2.1" optional = true [features] -rdf = ["rio_turtle", "rio_api"] +#rdf = ["rio_turtle", "rio_api"] #dump_bibtex = ["biblatex"] magic = ["tree_magic"] diff --git a/src/biblatex.rs b/src/biblatex.rs @@ -0,0 +1,87 @@ +use std::io::{ + Read, +}; +use std::str; + +use log::debug; +use biblatex::{ + Bibliography, + Type, + Entry as Entry, +}; + +use crate::meta::MetaData; + +fn parse_digest(entry: &Entry) -> Vec<u8> { + let note = entry.get("note").unwrap(); + let note_s = String::from_chunks(note).unwrap(); + let mut digest_val = note_s.split(":"); + + let mut digest = Vec::new(); + + match digest_val.next() { + Some(v) => { + if v == "sha512" { + let digest_hex = digest_val.next().unwrap(); + let mut digest_imported = hex::decode(digest_hex).unwrap(); + digest.append(&mut digest_imported); + debug!("parsed digest {}", hex::encode(&digest)); + } + }, + None => {}, + }; + + if digest.len() == 0 { + digest.resize(64, 0); + } + + digest +} + +pub fn read_all(mut r: impl Read) -> Vec<MetaData> { + let mut s = String::new(); + let c = r.read_to_string(&mut s); + let bib = Bibliography::parse(&s).unwrap(); + + let mut rr: Vec<MetaData> = vec!(); + + for e in bib.iter() { + let authors = e.author() + .unwrap() + .into_iter() + .map(|v| { + format!("{} {}", v.given_name, v.name) + }); + let authors_s = authors.fold(String::new(), |x, y| { + if x.len() == 0 { + return y + } + format!("{}, {}", x, y) + }); + let digest = parse_digest(&e); + + let title = e.title().unwrap(); + let title_s = String::from_chunks(title).unwrap(); + + let mut m = MetaData::new(title_s.as_str(), authors_s.as_str(), e.entry_type.clone(), digest, None); + + match e.keywords() { + Ok(v) => { + let s = String::from_chunks(v).unwrap(); + m.set_subject(s.as_str()); + }, + _ => {}, + }; + + match e.language() { + Ok(v) => { + m.set_language(v.as_str()); + }, + _ => {}, + } + + debug!("read metadata {:?}", &m); + rr.push(m); + } + rr +} diff --git a/src/lib.rs b/src/lib.rs @@ -6,7 +6,6 @@ pub mod dc; pub mod store; -#[cfg(feature = "rdf")] pub mod rdf; pub mod biblatex; diff --git a/src/main.rs b/src/main.rs @@ -25,7 +25,6 @@ use log::{ info, }; -#[cfg(feature = "rdf")] use kitab::rdf::{ read as rdf_read, write as rdf_write, @@ -81,20 +80,20 @@ fn args_setup() -> ArgMatches<'static> { // commands // kitab import <file> - attempt in order import rdf, import spec -// kitab export <file> - export rdf/turtle -// kitab scan <path> - recursively + // kitab export <file> - export rdf/turtle + // kitab scan <path> - recursively -fn resolve_directory(args: &ArgMatches) -> PathBuf { - match BaseDirs::new() { - Some(v) => { + fn resolve_directory(args: &ArgMatches) -> PathBuf { + match BaseDirs::new() { + Some(v) => { let d = v.data_dir(); d.join("kitab") - .join("idx") - }, - _ => { - PathBuf::from(".") - .join(".kitab") - .join("/idx") + .join("idx") + }, + _ => { + PathBuf::from(".") + .join(".kitab") + .join("/idx") }, } } @@ -120,24 +119,29 @@ fn str_to_path(args: &ArgMatches) -> PathBuf { p_canon } +fn store(index_path: &Path, m: &MetaData) { + let fp = index_path.join(m.fingerprint()); + create_dir_all(&index_path); + debug!("writing record for title {} to {:?}", m.title(), &fp); + + let ff = File::create(&fp).unwrap(); + rdf_write(&m, &ff).unwrap(); + debug!("stored as rdf {:?}", fp); +} + fn exec_import_rdf(f: &Path, index_path: &Path) { - #[cfg(feature = "rdf")] - { - let f = File::open(f).unwrap(); - let m = rdf_read(&f); - - let fp = index_path.join(m.fingerprint()); - create_dir_all(&index_path); - debug!("writing record for title {} to {:?}", m.title(), &fp); - - let ff = File::create(fp).unwrap(); - rdf_write(&m, &ff).unwrap(); - } + let f = File::open(f).unwrap(); + let m = rdf_read(&f); + store(index_path, &m); } fn exec_import_biblatex(f: &Path, index_path: &Path) { let f = File::open(f).unwrap(); - biblatex_read_all(&f); + let entries = biblatex_read_all(&f); + + for m in entries { + store(index_path, &m); + } } fn exec_scan(p: &Path, index_path: &Path) { @@ -152,9 +156,10 @@ fn exec_scan(p: &Path, index_path: &Path) { let fp = index_path.join(&z_hex); match fp.canonicalize() { Ok(v) => { - info!("apply {:?} for {:?}", entry, z_hex); - let m = MetaData::from_path(ep).unwrap(); - m.to_xattr(&p); + let f = File::open(&v).unwrap(); + let m = rdf_read(f); + info!("apply {:?} -> {:?} for {:?}", entry, &m, z_hex); + m.to_xattr(&ep); }, Err(e) => { debug!("metadata not found for {:?} -> {:?}", entry, z_hex); @@ -175,7 +180,7 @@ fn main() { Some(v) => { let p = str_to_path(v); info!("have path {:?}", &p); - //return exec_import(p.as_path(), index_dir.as_path()); + //return exec_import_rdf(p.as_path(), index_dir.as_path()); return exec_import_biblatex(p.as_path(), index_dir.as_path()); }, _ => {}, diff --git a/src/meta.rs b/src/meta.rs @@ -359,6 +359,7 @@ impl MetaData { pub fn from_path(p: &path::Path) -> Result<MetaData, std::io::Error> { let f = File::open(&p).unwrap(); + debug!("openning {}", p.display()); let mut m = MetaData::from_file(f).unwrap(); Ok(m) } @@ -385,6 +386,12 @@ impl MetaData { } } +impl fmt::Debug for MetaData { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", format_args!("title \"{}\" author \"{}\" digest {}", self.title(), self.author(), self.fingerprint())) + } +} + #[cfg(test)] mod tests { use super::MetaData; diff --git a/testdata/meta.biblatex b/testdata/meta.biblatex @@ -0,0 +1,17 @@ +@article{ + bitcoin_whitepaper, + title = "Bitcoin: A Peer-to-Peer Electronic Cash System", + author = "Nakamoto, Satoshi", + keywords = "bitcoin,cryptocurrency", + language = "en", + note = "sha512:2ac531ee521cf93f8419c2018f770fbb42c65396178e079a416e7038d3f9ab9fc2c35c4d838bc8b5dd68f4c13759fe9cdf90a46528412fefe1294cb26beabf4e", +} +@inproceedings{ + decentralizedmarketplace_smartcities, + author = "Gowri Sankar Ramachandran, Rahul Radhakrishnan and Bhaskar Krishnamachari", + title = "Towards a Decentralized Data Marketplace for Smart Cities", + booktitle = "IEEE International Smart Cities Conference", + month = sep, + year = 2018, + note = "sha512:f450b0b35ed8bd1c00b45b4f6ebd645079ae8bf3b8abd28aea62fc2ab3bab2878e021e0b6c182f776e24e5ed956c204d647b4c5b0f64a73e3753f736ffe2818c", +} diff --git a/tests/rdf_test.rs b/tests/rdf_test.rs @@ -0,0 +1,20 @@ +use std::io::{ + Write, + BufWriter, +}; + +use biblatex::EntryType; + +use kitab::rdf::write as rdf_write; +use kitab::meta::MetaData; + + +#[test] +fn test_rdf_dump() { + let v = Vec::new(); + let w = BufWriter::new(v); + let mut digest: Vec<u8> = Vec::new(); + digest.resize(64, 0); + let metadata = MetaData::new("foo", "Bar Baz", EntryType::Article, digest, None); + let r = rdf_write(&metadata, w); +}