commit c1218fd2cd3d1ad96cc55144ed100e615f77e19e
parent 800291aa0fbd8c496da676b174626918a1e9678d
Author: lash <dev@holbrook.no>
Date: Sat, 25 Jun 2022 18:38:53 +0000
Add read from turtle to metadata
Diffstat:
4 files changed, 156 insertions(+), 13 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
@@ -17,6 +17,9 @@ mime = "^0.3.13"
unic-langid-impl = "^0.9.0"
biblatex = "^0.6.2"
sha2 = "^0.10.2"
+log = "^0.4"
+env_logger = "^0.9"
+urn = "^0.4.0"
#[dependencies.rdf]
#rio_turtle = "~0.7.1"
diff --git a/src/meta.rs b/src/meta.rs
@@ -42,22 +42,49 @@ impl MetaData {
pub fn new(title: &str, author: &str, typ: EntryType, digest: Vec<u8>, filename: Option<FileName>) -> MetaData {
let dc = DCMetaData::new(title, author, typ);
- let sz = Sha512::output_size();
- if digest.len() != sz {
- panic!("wrong digest size, must be {}", sz);
- }
+ let mut m = MetaData{
+ dc: dc,
+ digest: vec!(),
+ comment: String::new(),
+ //local_name: filepath.to_str().unwrap().to_string(),
+ local_name: filename,
+ publish_date: (0, 0, 0),
+ retrieval_timestamp: 0,
+ };
+
+ m.set_fingerprint(digest);
+ m
+ }
+ pub fn empty() -> MetaData {
+ let dc = DCMetaData::new("", "", EntryType::Unknown(String::new()));
MetaData{
dc: dc,
- digest: digest,
+ digest: vec!(),
comment: String::new(),
//local_name: filepath.to_str().unwrap().to_string(),
- local_name: filename,
+ local_name: None,
publish_date: (0, 0, 0),
retrieval_timestamp: 0,
}
}
+ pub fn set_title(&mut self, title: &str) {
+ self.dc.title = String::from(title);
+ }
+
+ pub fn set_author(&mut self, author: &str) {
+ self.dc.title = String::from(author);
+ }
+
+ pub fn set_fingerprint(&mut self, fingerprint: Vec<u8>) {
+ let sz = Sha512::output_size();
+ if fingerprint.len() != sz {
+ panic!("wrong digest size, must be {}", sz);
+ }
+ self.digest = fingerprint;
+ }
+
pub fn title(&self) -> String {
self.dc.title.clone()
}
@@ -66,6 +93,10 @@ impl MetaData {
self.dc.author.clone()
}
+ pub fn set_typ(&mut self, typ: &str) {
+ self.dc.typ = EntryType::from_str(typ).unwrap();
+ }
+
pub fn typ(&self) -> EntryType {
self.dc.typ.clone()
}
diff --git a/src/rdf.rs b/src/rdf.rs
@@ -3,19 +3,38 @@ use std::io::{
Read,
Write
};
+use std::str::FromStr;
+use std::io::{
+ BufReader,
+};
+use rio_turtle::{
+ TurtleParser,
+ TurtleError,
+ TurtleFormatter,
+};
+use rio_api::parser::TriplesParser;
+use rio_api::formatter::TriplesFormatter;
use rio_api::model::{
NamedNode,
Literal,
Triple,
Subject,
};
-use rio_turtle::TurtleFormatter;
-use rio_api::formatter::TriplesFormatter;
+use urn::{
+ Urn,
+ Error as UrnError,
+};
+
+use log::{
+ debug,
+ info,
+};
use crate::meta::MetaData;
+
pub fn write(entry: &MetaData, w: impl Write) -> Result<usize, std::io::Error> {
let mut tfmt = TurtleFormatter::new(w);
@@ -81,16 +100,98 @@ pub fn write(entry: &MetaData, w: impl Write) -> Result<usize, std::io::Error> {
Ok(0)
}
+
+pub fn handle_parse_match(metadata: &mut MetaData, triple: Triple) -> Result<(), UrnError> {
+ let subject_iri = triple.subject.to_string();
+ let l = subject_iri.len()-1;
+ let subject = &subject_iri[1..l];
+ let subject_urn = Urn::from_str(subject).unwrap();
+ if subject_urn.nid() != "sha512" {
+ return Err(UrnError::InvalidNid);
+ }
+
+ if metadata.fingerprint().len() == 0 {
+ let v = subject_urn.nss();
+ let b = hex::decode(&v).unwrap();
+ info!("setting fingerprint {}", v);
+ metadata.set_fingerprint(b);
+ }
+
+ let field = triple.predicate.iri;
+ match field {
+ "https://purl.org/dc/terms/title" => {
+ let title = triple.object.to_string();
+ let l = title.len()-1;
+ metadata.set_title(&title[1..l]);
+ info!("found title: {}", title);
+ },
+ "https://purl.org/dc/terms/creator" => {
+ let author = triple.object.to_string();
+ let l = author.len()-1;
+ metadata.set_author(&author[1..l]);
+ info!("found author: {}", author);
+ },
+ "https://purl.org/dc/terms/subject" => {
+ let mut subject = triple.object.to_string();
+ let l = subject.len()-1;
+ metadata.set_subject(&subject[1..l]); //.as_str());
+ info!("found subject: {}", subject);
+ },
+ "https://purl.org/dc/terms/language" => {
+ let mut lang = triple.object.to_string();
+ let l = lang.len()-1;
+ metadata.set_language(&lang[1..l]);
+ info!("found language: {}", lang);
+ },
+ "https://purl.org/dc/terms/type" => {
+ let mut typ = triple.object.to_string();
+ let l = typ.len()-1;
+ metadata.set_typ(&typ[1..l]);
+ info!("found entry type: {}", typ);
+ },
+ "https://purl.org/dc/terms/MediaType" => {
+ let mut mime_type = triple.object.to_string();
+ let l = mime_type.len()-1;
+ metadata.set_mime_str(&mime_type[1..l]);
+ info!("found mime type: {}", mime_type);
+ },
+ _ => {
+ debug!("skipping unknown predicate: {}", field);
+ },
+ };
+ Ok(())
+}
+
+pub fn read(r: impl Read) {
+ let mut metadata = MetaData::empty();
+ let bf = BufReader::new(r);
+ let mut tp = TurtleParser::new(bf, None);
+ let r: Result<_, TurtleError> = tp.parse_all(&mut |r| {
+ match r {
+ Triple{subject, predicate, object } => {
+ handle_parse_match(&mut metadata, r);
+ },
+ _ => {},
+ }
+ Ok(())
+ });
+}
+
#[cfg(test)]
mod tests {
- use super::write;
+ use super::{
+ write,
+ read,
+ };
use super::MetaData;
use std::io::stdout;
+ use std::fs::File;
use std::default::Default;
use biblatex::EntryType;
+ use env_logger;
#[test]
- fn test_write() {
+ fn test_turtle_write() {
let mut digest = Vec::with_capacity(64);
digest.resize(64, 0x2a);
let mut m = MetaData::new("foo", "bar", EntryType::Article, Vec::from(digest), None);
@@ -100,4 +201,12 @@ mod tests {
let v = stdout();
let r = write(&m, v);
}
+
+ #[test]
+ fn test_turtle_read() {
+ env_logger::init();
+
+ let f = File::open("testdata/meta.ttl").unwrap();
+ read(&f);
+ }
}
diff --git a/testdata/meta.ttl b/testdata/meta.ttl
@@ -1,10 +1,10 @@
@prefix dcterms: <https://purl.org/dc/terms/> .
@prefix dcmi: <https://purl.org/dc/dcmi/> .
-<URN:sha512:2ac531ee521cf93f8419c2018f770fbb42c65396178e079a416e7038d3f9ab9fc2c35c4d838bc8b5dd68f4c13759fe9cdf90a46528412fefe1294cb26beabf4e>
+<urn:sha512:2ac531ee521cf93f8419c2018f770fbb42c65396178e079a416e7038d3f9ab9fc2c35c4d838bc8b5dd68f4c13759fe9cdf90a46528412fefe1294cb26beabf4e>
dcterms:title "Bitcoin: A Peer-to-Peer Electronic Cash System" ;
dcterms:subject "bitcoin,cryptocurrency,cryptography" ;
dcterms:creator "Satoshi Nakamoto" ;
- dcterms:type "Whitepaper" ;
+ dcterms:type "article" ;
dcterms:MediaType "application/pdf" ;
- dcterms:language "en" .
+ dcterms:language "en-US" .