kitab

Unnamed repository; edit this file 'description' to name the repository.
Info | Log | Files | Refs | LICENSE

commit 9efbbf90a3328f613540f91e92ca0a1d8b6b0654
Author: lash <dev@holbrook.no>
Date:   Sat, 25 Jun 2022 07:06:53 +0000

Initial commit

Diffstat:
A.gitignore | 2++
ACargo.toml | 28++++++++++++++++++++++++++++
Asrc/dc/mod.rs | 5+++++
Asrc/lib.rs | 12++++++++++++
Asrc/meta.rs | 114+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/rdf.rs | 60++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atestdata/bitcoin.pdf | 0
Atestdata/meta.ttl | 10++++++++++
8 files changed, 231 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,2 @@ +/target +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "katab" +version = "0.0.1" +authors = ["nolash <dev@holbrook.no>"] +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +xattr = "0.2.2" +#inotify = "0.8.0" +regex = "1.5.5" +rio_turtle = "~0.7.1" +rio_api = "~0.7.1" +hex = "^0.4" + +[dependencies.biblatex] +biblatex = "0.5.0" +optional = true + +#[dependencies.rdf] +#rio_turtle = "~0.7.1" +#rio_api = "~0.7.1" +#optional = true + +[features] +#dump_rdf = ["rdf"] +#dump_bibtex = ["biblatex"] diff --git a/src/dc/mod.rs b/src/dc/mod.rs @@ -0,0 +1,5 @@ +pub struct DCMetaData { + pub title: String, + pub author: String, + pub subject: Option<String>, +} diff --git a/src/lib.rs b/src/lib.rs @@ -0,0 +1,12 @@ +#![crate_name = "katab"] + +pub mod meta; + +pub mod dc; + +//#[cfg(feature = "dump_rdf")] +pub mod rdf; + +#[cfg(test)] +mod tests { +} diff --git a/src/meta.rs b/src/meta.rs @@ -0,0 +1,114 @@ +use std::path; +use std::fmt; +use xattr; +use hex; + +use crate::dc::DCMetaData; + +pub type Digest = Vec<u8>; + +pub type PublishDate = (u8, u8, u32); + +pub type FileName = String; + +pub type FilePath = String; + +enum ResourceType { + Unknown, + Article, + Whitepaper, + Book, + Report, +} + +pub struct MetaData { + dc: DCMetaData, + typ: ResourceType, + digest: Digest, + local_name: Option<FileName>, + comment: String, + publish_date: PublishDate, + retrieval_timestamp: u32, +} + +impl MetaData { + + pub fn new(title: &str, author: &str, digest: Vec<u8>, filename: Option<FileName>) -> MetaData { + let dc:DCMetaData = DCMetaData{ + title: String::from(title), + author: String::from(author), + subject: None, + }; + + MetaData{ + dc: dc, + typ: ResourceType::Unknown, + digest: vec!(), + comment: String::new(), + //local_name: filepath.to_str().unwrap().to_string(), + local_name: filename, + publish_date: (0, 0, 0), + retrieval_timestamp: 0, + } + } + + pub fn title(&self) -> String { + self.dc.title.clone() + } + + pub fn author(&self) -> String { + self.dc.author.clone() + } + + pub fn fingerprint(&self) -> String { + hex::encode(&self.digest) + } + + pub fn from_xattr(filepath: &path::Path) -> MetaData { + + let mut title: String = String::new(); + let mut author: String = String::new(); + let mut subject: String = String::new(); + let filename: FileName; + + let title_src = xattr::get(filepath, "user.dcterms:title").unwrap(); + match title_src { + Some(v) => { + let s = std::str::from_utf8(&v).unwrap(); + title.push_str(s); + }, + None => {}, + } + + let author_src = xattr::get(filepath, "user.dcterms:creator").unwrap(); + match author_src { + Some(v) => { + let s = std::str::from_utf8(&v).unwrap(); + author.push_str(s); + }, + None => {}, + } + + filename = filepath.file_name() + .unwrap() + .to_os_string() + .into_string() + .unwrap(); + + MetaData::new(title.as_str(), author.as_str(), vec!(), Some(filename)) + } +} + +#[cfg(test)] +mod tests { + use super::MetaData; + use std::path; + + #[test] + fn test_metadata_create() { + let s = path::Path::new("testdata/bitcoin.pdf"); + let meta = MetaData::from_xattr(s); + assert_eq!(meta.dc.title, "Bitcoin: A Peer-to-Peer Electronic Cash System"); + assert_eq!(meta.dc.author, "Satoshi Nakamoto"); + } +} diff --git a/src/rdf.rs b/src/rdf.rs @@ -0,0 +1,60 @@ +use std::fs::File; +use std::io::{ + Read, + Write +}; + +use rio_api::model::{ + NamedNode, + Literal, + Triple, + Subject, +}; +use rio_turtle::TurtleFormatter; +use rio_api::formatter::TriplesFormatter; + +use crate::meta::MetaData; + + +pub fn write_rdf(entry: &MetaData, w: impl Write) -> Result<usize, std::io::Error> { + // TODO: parsers are apparently buggy, cannot decode dublin core rdf + //let mut f = File::open("../dublincore/dublin_core_terms.rdf").unwrap(); + //let mut parser = NTriplesParser::from_reader(f); + //let schema_graph: Graph = parser.decode().unwrap(); + + let mut tfmt = TurtleFormatter::new(w); + + let urn_str = format!("URN:sha512:{}", entry.fingerprint()); + let urn = Subject::NamedNode( + NamedNode{ + iri: urn_str.as_str(), + }, + ); + + tfmt.format(&Triple{ + subject: urn, + predicate: NamedNode { iri: "https://purl.org/dc/terms/title" }.into(), + object: Literal::Simple { value: entry.title().as_str() }.into(), + }); + tfmt.format(&Triple{ + subject: urn, + predicate: NamedNode { iri: "https://purl.org/dc/terms/creator" }.into(), + object: Literal::Simple { value: entry.author().as_str() }.into(), + }); + Ok(0) +} + +#[cfg(test)] +mod tests { + use super::write_rdf; + use super::MetaData; + use std::io::stdout; + + #[test] + fn test_write() { + let m = MetaData::new("foo", "bar", vec!(0x2a), None); + //let v = Vec::default(); + let v = stdout(); + let r = write_rdf(&m, v); + } +} diff --git a/testdata/bitcoin.pdf b/testdata/bitcoin.pdf Binary files differ. diff --git a/testdata/meta.ttl b/testdata/meta.ttl @@ -0,0 +1,10 @@ +@prefix dcterms: <https://purl.org/dc/terms/> . +@prefix dcmi: <https://purl.org/dc/dcmi/> . + +<URN:sha512:2ac531ee521cf93f8419c2018f770fbb42c65396178e079a416e7038d3f9ab9fc2c35c4d838bc8b5dd68f4c13759fe9cdf90a46528412fefe1294cb26beabf4e> + dcterms:title "Bitcoin: A Peer-to-Peer Electronic Cash System" ; + dcterms:subject "bitcoin,cryptocurrency,cryptography" ; + dcterms:creator "Satoshi Nakamoto" ; + dcterms:type "Whitepaper" ; + dcterms:MediaType "application/pdf" ; + dcterms:language "en" .