commit 9efbbf90a3328f613540f91e92ca0a1d8b6b0654
Author: lash <dev@holbrook.no>
Date: Sat, 25 Jun 2022 07:06:53 +0000
Initial commit
Diffstat:
8 files changed, 231 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+/target
+Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -0,0 +1,28 @@
+[package]
+name = "katab"
+version = "0.0.1"
+authors = ["nolash <dev@holbrook.no>"]
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+xattr = "0.2.2"
+#inotify = "0.8.0"
+regex = "1.5.5"
+rio_turtle = "~0.7.1"
+rio_api = "~0.7.1"
+hex = "^0.4"
+
+[dependencies.biblatex]
+biblatex = "0.5.0"
+optional = true
+
+#[dependencies.rdf]
+#rio_turtle = "~0.7.1"
+#rio_api = "~0.7.1"
+#optional = true
+
+[features]
+#dump_rdf = ["rdf"]
+#dump_bibtex = ["biblatex"]
diff --git a/src/dc/mod.rs b/src/dc/mod.rs
@@ -0,0 +1,5 @@
+pub struct DCMetaData {
+ pub title: String,
+ pub author: String,
+ pub subject: Option<String>,
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -0,0 +1,12 @@
+#![crate_name = "katab"]
+
+pub mod meta;
+
+pub mod dc;
+
+//#[cfg(feature = "dump_rdf")]
+pub mod rdf;
+
+#[cfg(test)]
+mod tests {
+}
diff --git a/src/meta.rs b/src/meta.rs
@@ -0,0 +1,114 @@
+use std::path;
+use std::fmt;
+use xattr;
+use hex;
+
+use crate::dc::DCMetaData;
+
+pub type Digest = Vec<u8>;
+
+pub type PublishDate = (u8, u8, u32);
+
+pub type FileName = String;
+
+pub type FilePath = String;
+
+enum ResourceType {
+ Unknown,
+ Article,
+ Whitepaper,
+ Book,
+ Report,
+}
+
+pub struct MetaData {
+ dc: DCMetaData,
+ typ: ResourceType,
+ digest: Digest,
+ local_name: Option<FileName>,
+ comment: String,
+ publish_date: PublishDate,
+ retrieval_timestamp: u32,
+}
+
+impl MetaData {
+
+ pub fn new(title: &str, author: &str, digest: Vec<u8>, filename: Option<FileName>) -> MetaData {
+ let dc:DCMetaData = DCMetaData{
+ title: String::from(title),
+ author: String::from(author),
+ subject: None,
+ };
+
+ MetaData{
+ dc: dc,
+ typ: ResourceType::Unknown,
+ digest: vec!(),
+ comment: String::new(),
+ //local_name: filepath.to_str().unwrap().to_string(),
+ local_name: filename,
+ publish_date: (0, 0, 0),
+ retrieval_timestamp: 0,
+ }
+ }
+
+ pub fn title(&self) -> String {
+ self.dc.title.clone()
+ }
+
+ pub fn author(&self) -> String {
+ self.dc.author.clone()
+ }
+
+ pub fn fingerprint(&self) -> String {
+ hex::encode(&self.digest)
+ }
+
+ pub fn from_xattr(filepath: &path::Path) -> MetaData {
+
+ let mut title: String = String::new();
+ let mut author: String = String::new();
+ let mut subject: String = String::new();
+ let filename: FileName;
+
+ let title_src = xattr::get(filepath, "user.dcterms:title").unwrap();
+ match title_src {
+ Some(v) => {
+ let s = std::str::from_utf8(&v).unwrap();
+ title.push_str(s);
+ },
+ None => {},
+ }
+
+ let author_src = xattr::get(filepath, "user.dcterms:creator").unwrap();
+ match author_src {
+ Some(v) => {
+ let s = std::str::from_utf8(&v).unwrap();
+ author.push_str(s);
+ },
+ None => {},
+ }
+
+ filename = filepath.file_name()
+ .unwrap()
+ .to_os_string()
+ .into_string()
+ .unwrap();
+
+ MetaData::new(title.as_str(), author.as_str(), vec!(), Some(filename))
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::MetaData;
+ use std::path;
+
+ #[test]
+ fn test_metadata_create() {
+ let s = path::Path::new("testdata/bitcoin.pdf");
+ let meta = MetaData::from_xattr(s);
+ assert_eq!(meta.dc.title, "Bitcoin: A Peer-to-Peer Electronic Cash System");
+ assert_eq!(meta.dc.author, "Satoshi Nakamoto");
+ }
+}
diff --git a/src/rdf.rs b/src/rdf.rs
@@ -0,0 +1,60 @@
+use std::fs::File;
+use std::io::{
+ Read,
+ Write
+};
+
+use rio_api::model::{
+ NamedNode,
+ Literal,
+ Triple,
+ Subject,
+};
+use rio_turtle::TurtleFormatter;
+use rio_api::formatter::TriplesFormatter;
+
+use crate::meta::MetaData;
+
+
+pub fn write_rdf(entry: &MetaData, w: impl Write) -> Result<usize, std::io::Error> {
+ // TODO: parsers are apparently buggy, cannot decode dublin core rdf
+ //let mut f = File::open("../dublincore/dublin_core_terms.rdf").unwrap();
+ //let mut parser = NTriplesParser::from_reader(f);
+ //let schema_graph: Graph = parser.decode().unwrap();
+
+ let mut tfmt = TurtleFormatter::new(w);
+
+ let urn_str = format!("URN:sha512:{}", entry.fingerprint());
+ let urn = Subject::NamedNode(
+ NamedNode{
+ iri: urn_str.as_str(),
+ },
+ );
+
+ tfmt.format(&Triple{
+ subject: urn,
+ predicate: NamedNode { iri: "https://purl.org/dc/terms/title" }.into(),
+ object: Literal::Simple { value: entry.title().as_str() }.into(),
+ });
+ tfmt.format(&Triple{
+ subject: urn,
+ predicate: NamedNode { iri: "https://purl.org/dc/terms/creator" }.into(),
+ object: Literal::Simple { value: entry.author().as_str() }.into(),
+ });
+ Ok(0)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::write_rdf;
+ use super::MetaData;
+ use std::io::stdout;
+
+ #[test]
+ fn test_write() {
+ let m = MetaData::new("foo", "bar", vec!(0x2a), None);
+ //let v = Vec::default();
+ let v = stdout();
+ let r = write_rdf(&m, v);
+ }
+}
diff --git a/testdata/bitcoin.pdf b/testdata/bitcoin.pdf
Binary files differ.
diff --git a/testdata/meta.ttl b/testdata/meta.ttl
@@ -0,0 +1,10 @@
+@prefix dcterms: <https://purl.org/dc/terms/> .
+@prefix dcmi: <https://purl.org/dc/dcmi/> .
+
+<URN:sha512:2ac531ee521cf93f8419c2018f770fbb42c65396178e079a416e7038d3f9ab9fc2c35c4d838bc8b5dd68f4c13759fe9cdf90a46528412fefe1294cb26beabf4e>
+ dcterms:title "Bitcoin: A Peer-to-Peer Electronic Cash System" ;
+ dcterms:subject "bitcoin,cryptocurrency,cryptography" ;
+ dcterms:creator "Satoshi Nakamoto" ;
+ dcterms:type "Whitepaper" ;
+ dcterms:MediaType "application/pdf" ;
+ dcterms:language "en" .