kitab

Unnamed repository; edit this file 'description' to name the repository.
Info | Log | Files | Refs | LICENSE

commit c1218fd2cd3d1ad96cc55144ed100e615f77e19e
parent 800291aa0fbd8c496da676b174626918a1e9678d
Author: lash <dev@holbrook.no>
Date:   Sat, 25 Jun 2022 18:38:53 +0000

Add read from turtle to metadata

Diffstat:
MCargo.toml | 3+++
Msrc/meta.rs | 43+++++++++++++++++++++++++++++++++++++------
Msrc/rdf.rs | 117++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Mtestdata/meta.ttl | 6+++---
4 files changed, 156 insertions(+), 13 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml @@ -17,6 +17,9 @@ mime = "^0.3.13" unic-langid-impl = "^0.9.0" biblatex = "^0.6.2" sha2 = "^0.10.2" +log = "^0.4" +env_logger = "^0.9" +urn = "^0.4.0" #[dependencies.rdf] #rio_turtle = "~0.7.1" diff --git a/src/meta.rs b/src/meta.rs @@ -42,22 +42,49 @@ impl MetaData { pub fn new(title: &str, author: &str, typ: EntryType, digest: Vec<u8>, filename: Option<FileName>) -> MetaData { let dc = DCMetaData::new(title, author, typ); - let sz = Sha512::output_size(); - if digest.len() != sz { - panic!("wrong digest size, must be {}", sz); - } + let mut m = MetaData{ + dc: dc, + digest: vec!(), + comment: String::new(), + //local_name: filepath.to_str().unwrap().to_string(), + local_name: filename, + publish_date: (0, 0, 0), + retrieval_timestamp: 0, + }; + + m.set_fingerprint(digest); + m + } + pub fn empty() -> MetaData { + let dc = DCMetaData::new("", "", EntryType::Unknown(String::new())); MetaData{ dc: dc, - digest: digest, + digest: vec!(), comment: String::new(), //local_name: filepath.to_str().unwrap().to_string(), - local_name: filename, + local_name: None, publish_date: (0, 0, 0), retrieval_timestamp: 0, } } + pub fn set_title(&mut self, title: &str) { + self.dc.title = String::from(title); + } + + pub fn set_author(&mut self, author: &str) { + self.dc.title = String::from(author); + } + + pub fn set_fingerprint(&mut self, fingerprint: Vec<u8>) { + let sz = Sha512::output_size(); + if fingerprint.len() != sz { + panic!("wrong digest size, must be {}", sz); + } + self.digest = fingerprint; + } + pub fn title(&self) -> String { self.dc.title.clone() } @@ -66,6 +93,10 @@ impl MetaData { self.dc.author.clone() } + pub fn set_typ(&mut self, typ: &str) { + self.dc.typ = EntryType::from_str(typ).unwrap(); + } + pub fn typ(&self) -> EntryType { self.dc.typ.clone() } diff --git a/src/rdf.rs b/src/rdf.rs @@ -3,19 +3,38 @@ use std::io::{ Read, Write }; +use std::str::FromStr; +use std::io::{ + BufReader, +}; +use rio_turtle::{ + TurtleParser, + TurtleError, + TurtleFormatter, +}; +use rio_api::parser::TriplesParser; +use rio_api::formatter::TriplesFormatter; use rio_api::model::{ NamedNode, Literal, Triple, Subject, }; -use rio_turtle::TurtleFormatter; -use rio_api::formatter::TriplesFormatter; +use urn::{ + Urn, + Error as UrnError, +}; + +use log::{ + debug, + info, +}; use crate::meta::MetaData; + pub fn write(entry: &MetaData, w: impl Write) -> Result<usize, std::io::Error> { let mut tfmt = TurtleFormatter::new(w); @@ -81,16 +100,98 @@ pub fn write(entry: &MetaData, w: impl Write) -> Result<usize, std::io::Error> { Ok(0) } + +pub fn handle_parse_match(metadata: &mut MetaData, triple: Triple) -> Result<(), UrnError> { + let subject_iri = triple.subject.to_string(); + let l = subject_iri.len()-1; + let subject = &subject_iri[1..l]; + let subject_urn = Urn::from_str(subject).unwrap(); + if subject_urn.nid() != "sha512" { + return Err(UrnError::InvalidNid); + } + + if metadata.fingerprint().len() == 0 { + let v = subject_urn.nss(); + let b = hex::decode(&v).unwrap(); + info!("setting fingerprint {}", v); + metadata.set_fingerprint(b); + } + + let field = triple.predicate.iri; + match field { + "https://purl.org/dc/terms/title" => { + let title = triple.object.to_string(); + let l = title.len()-1; + metadata.set_title(&title[1..l]); + info!("found title: {}", title); + }, + "https://purl.org/dc/terms/creator" => { + let author = triple.object.to_string(); + let l = author.len()-1; + metadata.set_author(&author[1..l]); + info!("found author: {}", author); + }, + "https://purl.org/dc/terms/subject" => { + let mut subject = triple.object.to_string(); + let l = subject.len()-1; + metadata.set_subject(&subject[1..l]); //.as_str()); + info!("found subject: {}", subject); + }, + "https://purl.org/dc/terms/language" => { + let mut lang = triple.object.to_string(); + let l = lang.len()-1; + metadata.set_language(&lang[1..l]); + info!("found language: {}", lang); + }, + "https://purl.org/dc/terms/type" => { + let mut typ = triple.object.to_string(); + let l = typ.len()-1; + metadata.set_typ(&typ[1..l]); + info!("found entry type: {}", typ); + }, + "https://purl.org/dc/terms/MediaType" => { + let mut mime_type = triple.object.to_string(); + let l = mime_type.len()-1; + metadata.set_mime_str(&mime_type[1..l]); + info!("found mime type: {}", mime_type); + }, + _ => { + debug!("skipping unknown predicate: {}", field); + }, + }; + Ok(()) +} + +pub fn read(r: impl Read) { + let mut metadata = MetaData::empty(); + let bf = BufReader::new(r); + let mut tp = TurtleParser::new(bf, None); + let r: Result<_, TurtleError> = tp.parse_all(&mut |r| { + match r { + Triple{subject, predicate, object } => { + handle_parse_match(&mut metadata, r); + }, + _ => {}, + } + Ok(()) + }); +} + #[cfg(test)] mod tests { - use super::write; + use super::{ + write, + read, + }; use super::MetaData; use std::io::stdout; + use std::fs::File; use std::default::Default; use biblatex::EntryType; + use env_logger; #[test] - fn test_write() { + fn test_turtle_write() { let mut digest = Vec::with_capacity(64); digest.resize(64, 0x2a); let mut m = MetaData::new("foo", "bar", EntryType::Article, Vec::from(digest), None); @@ -100,4 +201,12 @@ mod tests { let v = stdout(); let r = write(&m, v); } + + #[test] + fn test_turtle_read() { + env_logger::init(); + + let f = File::open("testdata/meta.ttl").unwrap(); + read(&f); + } } diff --git a/testdata/meta.ttl b/testdata/meta.ttl @@ -1,10 +1,10 @@ @prefix dcterms: <https://purl.org/dc/terms/> . @prefix dcmi: <https://purl.org/dc/dcmi/> . -<URN:sha512:2ac531ee521cf93f8419c2018f770fbb42c65396178e079a416e7038d3f9ab9fc2c35c4d838bc8b5dd68f4c13759fe9cdf90a46528412fefe1294cb26beabf4e> +<urn:sha512:2ac531ee521cf93f8419c2018f770fbb42c65396178e079a416e7038d3f9ab9fc2c35c4d838bc8b5dd68f4c13759fe9cdf90a46528412fefe1294cb26beabf4e> dcterms:title "Bitcoin: A Peer-to-Peer Electronic Cash System" ; dcterms:subject "bitcoin,cryptocurrency,cryptography" ; dcterms:creator "Satoshi Nakamoto" ; - dcterms:type "Whitepaper" ; + dcterms:type "article" ; dcterms:MediaType "application/pdf" ; - dcterms:language "en" . + dcterms:language "en-US" .