commit 68a17a5fd159bf8e1cd9be2a492d3a3b157b6986
parent 9214f86501ef9d9f5ab834bbdb6d18fe45b2fb8e
Author: lash <dev@holbrook.no>
Date: Sun, 24 Jul 2022 08:57:09 +0000
Add language, keywords, digest from biblatex, rehabilitate scan
Diffstat:
7 files changed, 168 insertions(+), 33 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
@@ -35,17 +35,17 @@ tempfile = "^3.3.0"
[dependencies.rio_turtle]
version = "~0.7.1"
-optional = true
+#optional = true
[dependencies.rio_api]
version = "^0.7.1"
-optional = true
+#optional = true
[dependencies.tree_magic]
version = "^0.2.1"
optional = true
[features]
-rdf = ["rio_turtle", "rio_api"]
+#rdf = ["rio_turtle", "rio_api"]
#dump_bibtex = ["biblatex"]
magic = ["tree_magic"]
diff --git a/src/biblatex.rs b/src/biblatex.rs
@@ -0,0 +1,87 @@
+use std::io::{
+ Read,
+};
+use std::str;
+
+use log::debug;
+use biblatex::{
+ Bibliography,
+ Type,
+ Entry as Entry,
+};
+
+use crate::meta::MetaData;
+
+fn parse_digest(entry: &Entry) -> Vec<u8> {
+ let note = entry.get("note").unwrap();
+ let note_s = String::from_chunks(note).unwrap();
+ let mut digest_val = note_s.split(":");
+
+ let mut digest = Vec::new();
+
+ match digest_val.next() {
+ Some(v) => {
+ if v == "sha512" {
+ let digest_hex = digest_val.next().unwrap();
+ let mut digest_imported = hex::decode(digest_hex).unwrap();
+ digest.append(&mut digest_imported);
+ debug!("parsed digest {}", hex::encode(&digest));
+ }
+ },
+ None => {},
+ };
+
+ if digest.len() == 0 {
+ digest.resize(64, 0);
+ }
+
+ digest
+}
+
+pub fn read_all(mut r: impl Read) -> Vec<MetaData> {
+ let mut s = String::new();
+ let c = r.read_to_string(&mut s);
+ let bib = Bibliography::parse(&s).unwrap();
+
+ let mut rr: Vec<MetaData> = vec!();
+
+ for e in bib.iter() {
+ let authors = e.author()
+ .unwrap()
+ .into_iter()
+ .map(|v| {
+ format!("{} {}", v.given_name, v.name)
+ });
+ let authors_s = authors.fold(String::new(), |x, y| {
+ if x.len() == 0 {
+ return y
+ }
+ format!("{}, {}", x, y)
+ });
+ let digest = parse_digest(&e);
+
+ let title = e.title().unwrap();
+ let title_s = String::from_chunks(title).unwrap();
+
+ let mut m = MetaData::new(title_s.as_str(), authors_s.as_str(), e.entry_type.clone(), digest, None);
+
+ match e.keywords() {
+ Ok(v) => {
+ let s = String::from_chunks(v).unwrap();
+ m.set_subject(s.as_str());
+ },
+ _ => {},
+ };
+
+ match e.language() {
+ Ok(v) => {
+ m.set_language(v.as_str());
+ },
+ _ => {},
+ }
+
+ debug!("read metadata {:?}", &m);
+ rr.push(m);
+ }
+ rr
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -6,7 +6,6 @@ pub mod dc;
pub mod store;
-#[cfg(feature = "rdf")]
pub mod rdf;
pub mod biblatex;
diff --git a/src/main.rs b/src/main.rs
@@ -25,7 +25,6 @@ use log::{
info,
};
-#[cfg(feature = "rdf")]
use kitab::rdf::{
read as rdf_read,
write as rdf_write,
@@ -81,20 +80,20 @@ fn args_setup() -> ArgMatches<'static> {
// commands
// kitab import <file> - attempt in order import rdf, import spec
-// kitab export <file> - export rdf/turtle
-// kitab scan <path> - recursively
+ // kitab export <file> - export rdf/turtle
+ // kitab scan <path> - recursively
-fn resolve_directory(args: &ArgMatches) -> PathBuf {
- match BaseDirs::new() {
- Some(v) => {
+ fn resolve_directory(args: &ArgMatches) -> PathBuf {
+ match BaseDirs::new() {
+ Some(v) => {
let d = v.data_dir();
d.join("kitab")
- .join("idx")
- },
- _ => {
- PathBuf::from(".")
- .join(".kitab")
- .join("/idx")
+ .join("idx")
+ },
+ _ => {
+ PathBuf::from(".")
+ .join(".kitab")
+ .join("/idx")
},
}
}
@@ -120,24 +119,29 @@ fn str_to_path(args: &ArgMatches) -> PathBuf {
p_canon
}
+fn store(index_path: &Path, m: &MetaData) {
+ let fp = index_path.join(m.fingerprint());
+ create_dir_all(&index_path);
+ debug!("writing record for title {} to {:?}", m.title(), &fp);
+
+ let ff = File::create(&fp).unwrap();
+ rdf_write(&m, &ff).unwrap();
+ debug!("stored as rdf {:?}", fp);
+}
+
fn exec_import_rdf(f: &Path, index_path: &Path) {
- #[cfg(feature = "rdf")]
- {
- let f = File::open(f).unwrap();
- let m = rdf_read(&f);
-
- let fp = index_path.join(m.fingerprint());
- create_dir_all(&index_path);
- debug!("writing record for title {} to {:?}", m.title(), &fp);
-
- let ff = File::create(fp).unwrap();
- rdf_write(&m, &ff).unwrap();
- }
+ let f = File::open(f).unwrap();
+ let m = rdf_read(&f);
+ store(index_path, &m);
}
fn exec_import_biblatex(f: &Path, index_path: &Path) {
let f = File::open(f).unwrap();
- biblatex_read_all(&f);
+ let entries = biblatex_read_all(&f);
+
+ for m in entries {
+ store(index_path, &m);
+ }
}
fn exec_scan(p: &Path, index_path: &Path) {
@@ -152,9 +156,10 @@ fn exec_scan(p: &Path, index_path: &Path) {
let fp = index_path.join(&z_hex);
match fp.canonicalize() {
Ok(v) => {
- info!("apply {:?} for {:?}", entry, z_hex);
- let m = MetaData::from_path(ep).unwrap();
- m.to_xattr(&p);
+ let f = File::open(&v).unwrap();
+ let m = rdf_read(f);
+ info!("apply {:?} -> {:?} for {:?}", entry, &m, z_hex);
+ m.to_xattr(&ep);
},
Err(e) => {
debug!("metadata not found for {:?} -> {:?}", entry, z_hex);
@@ -175,7 +180,7 @@ fn main() {
Some(v) => {
let p = str_to_path(v);
info!("have path {:?}", &p);
- //return exec_import(p.as_path(), index_dir.as_path());
+ //return exec_import_rdf(p.as_path(), index_dir.as_path());
return exec_import_biblatex(p.as_path(), index_dir.as_path());
},
_ => {},
diff --git a/src/meta.rs b/src/meta.rs
@@ -359,6 +359,7 @@ impl MetaData {
pub fn from_path(p: &path::Path) -> Result<MetaData, std::io::Error> {
let f = File::open(&p).unwrap();
+ debug!("openning {}", p.display());
let mut m = MetaData::from_file(f).unwrap();
Ok(m)
}
@@ -385,6 +386,12 @@ impl MetaData {
}
}
+impl fmt::Debug for MetaData {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}", format_args!("title \"{}\" author \"{}\" digest {}", self.title(), self.author(), self.fingerprint()))
+ }
+}
+
#[cfg(test)]
mod tests {
use super::MetaData;
diff --git a/testdata/meta.biblatex b/testdata/meta.biblatex
@@ -0,0 +1,17 @@
+@article{
+ bitcoin_whitepaper,
+ title = "Bitcoin: A Peer-to-Peer Electronic Cash System",
+ author = "Nakamoto, Satoshi",
+ keywords = "bitcoin,cryptocurrency",
+ language = "en",
+ note = "sha512:2ac531ee521cf93f8419c2018f770fbb42c65396178e079a416e7038d3f9ab9fc2c35c4d838bc8b5dd68f4c13759fe9cdf90a46528412fefe1294cb26beabf4e",
+}
+@inproceedings{
+ decentralizedmarketplace_smartcities,
+ author = "Gowri Sankar Ramachandran, Rahul Radhakrishnan and Bhaskar Krishnamachari",
+ title = "Towards a Decentralized Data Marketplace for Smart Cities",
+ booktitle = "IEEE International Smart Cities Conference",
+ month = sep,
+ year = 2018,
+ note = "sha512:f450b0b35ed8bd1c00b45b4f6ebd645079ae8bf3b8abd28aea62fc2ab3bab2878e021e0b6c182f776e24e5ed956c204d647b4c5b0f64a73e3753f736ffe2818c",
+}
diff --git a/tests/rdf_test.rs b/tests/rdf_test.rs
@@ -0,0 +1,20 @@
+use std::io::{
+ Write,
+ BufWriter,
+};
+
+use biblatex::EntryType;
+
+use kitab::rdf::write as rdf_write;
+use kitab::meta::MetaData;
+
+
+#[test]
+fn test_rdf_dump() {
+ let v = Vec::new();
+ let w = BufWriter::new(v);
+ let mut digest: Vec<u8> = Vec::new();
+ digest.resize(64, 0);
+ let metadata = MetaData::new("foo", "Bar Baz", EntryType::Article, digest, None);
+ let r = rdf_write(&metadata, w);
+}