kitab

Unnamed repository; edit this file 'description' to name the repository.
Info | Log | Files | Refs | LICENSE

rdf.rs (8600B)


      1 use std::fs::File;
      2 use std::io::{
      3     Read,
      4     Write
      5 };
      6 use std::str::FromStr;
      7 use std::io::{
      8     BufReader,
      9 };
     10 
     11 use rio_turtle::{
     12     TurtleParser,
     13     TurtleError,
     14     TurtleFormatter,
     15 };
     16 use rio_api::parser::TriplesParser;
     17 use rio_api::formatter::TriplesFormatter;
     18 use rio_api::model::{
     19     NamedNode,
     20     Literal,
     21     Triple,
     22     Subject,
     23 };
     24 use urn::{
     25     Urn,
     26     Error as UrnError,
     27 };
     28 
     29 use log::{
     30     debug,
     31     info,
     32     error,
     33 };
     34 
     35 use crate::digest;
     36 use crate::meta::MetaData;
     37 use crate::error::ParseError;
     38 use crate::dc::{
     39     DC_IRI_TITLE,
     40     DC_IRI_CREATOR,
     41     DC_IRI_SUBJECT,
     42     DC_IRI_LANGUAGE,
     43     DC_IRI_TYPE,
     44     DC_IRI_MEDIATYPE,
     45 };
     46 
     47 #[derive(Debug)]
     48 /// Error states when processing RDF data.
     49 pub enum RdfError {
     50     /// Invalid URN string or digest scheme.
     51     UrnError(UrnError),
     52     /// Hash does not match hash in current [crate::meta::MetaData](crate::meta::MetaData)
     53     /// instance.
     54     HashMismatchError,
     55 }
     56 
     57 /// Write metadata entry in the native rdf-turtle format.
     58 ///
     59 /// On success, returns the number of bytes written.
     60 ///
     61 /// # Arguments 
     62 ///
     63 /// * `entry` - metadata to write.
     64 /// * `w` - writer implementation providing the destination.
     65 pub fn write(entry: &MetaData, w: impl Write) -> Result<usize, std::io::Error> {
     66     let mut tfmt = TurtleFormatter::new(w);
     67     
     68     //let urn_str = format!("URN:sha512:{}", entry.fingerprint());
     69     let urn_str = format!("URN:{}", entry.urn());
     70     let urn = Subject::NamedNode(
     71         NamedNode{
     72             iri: urn_str.as_str(),
     73         },
     74     );
     75 
     76     tfmt.format(&Triple{
     77         subject: urn,
     78         predicate: NamedNode { iri: DC_IRI_TITLE }.into(),
     79         object: Literal::Simple { value: entry.title().as_str() }.into(),
     80     });
     81     tfmt.format(&Triple{
     82         subject: urn,
     83         predicate: NamedNode { iri: DC_IRI_CREATOR }.into(),
     84         object: Literal::Simple { value: entry.author().as_str() }.into(),
     85     });
     86     let typ = entry.typ().to_string();
     87     tfmt.format(&Triple{
     88         subject: urn,
     89         predicate: NamedNode { iri: DC_IRI_TYPE }.into(),
     90         object: Literal::Simple { value: typ.as_str() }.into(),
     91     });
     92     match entry.subject() {
     93         Some(v) => {
     94             tfmt.format(&Triple{
     95                 subject: urn,
     96                 predicate: NamedNode { iri: DC_IRI_SUBJECT }.into(),
     97                 object: Literal::Simple { value: v.as_str() }.into(),
     98             });
     99         },
    100         _ => (),
    101     };
    102 
    103     match entry.mime() {
    104         Some(v) => {
    105             let m: String = v.to_string();
    106             tfmt.format(&Triple{
    107                 subject: urn,
    108                 predicate: NamedNode { iri: DC_IRI_MEDIATYPE }.into(),
    109                 object: Literal::Simple { value: m.as_str() }.into(),
    110             });
    111         },
    112         _ => (),
    113     };
    114 
    115     match entry.language() {
    116         Some(v) => {
    117             let m: String = v.to_string();
    118             tfmt.format(&Triple{
    119                 subject: urn,
    120                 predicate: NamedNode { iri: DC_IRI_LANGUAGE }.into(),
    121                 object: Literal::Simple { value: m.as_str() }.into(),
    122             });
    123         },
    124         _ => (),
    125     };
    126 
    127     tfmt.finish();
    128     Ok(0)
    129 }
    130 
    131 
    132 fn handle_parse_match(metadata: &mut MetaData, triple: Triple) -> Result<(), RdfError> {
    133     let subject_iri = triple.subject.to_string();
    134     let l = subject_iri.len()-1;
    135     //let subject = &subject_iri[1..l];
    136     let subject = &subject_iri[1..l];
    137     match subject[0..4].to_lowercase().as_str() {
    138         "urn:"  => {},
    139         _ => {
    140             return Err(RdfError::UrnError(UrnError::InvalidNid));
    141         },
    142     };
    143     let digest_urn = match digest::from_urn(&subject[4..]) {
    144         Err(e) => {
    145             error!("error {:?}", &subject);
    146             return Err(RdfError::UrnError(UrnError::InvalidNid));
    147         },
    148         Ok(v) => {
    149             &subject[4..]
    150         },
    151     };
    152     let subject_urn = Urn::from_str(subject).unwrap();
    153 
    154     let v = subject_urn.nss();
    155     let b = hex::decode(&v).unwrap();
    156     if metadata.fingerprint().len() == 0 {
    157         debug!("setting fingerprint {}", v);
    158         metadata.set_fingerprint_urn(digest_urn);
    159     } else if metadata.fingerprint() != v {
    160         return Err(RdfError::HashMismatchError);
    161     }
    162 
    163     let field = triple.predicate.iri;
    164     match field {
    165         DC_IRI_TITLE => {
    166             let title = triple.object.to_string().replace("\"", "");
    167             metadata.set_title(title.as_str());
    168             debug!("found title: {}", title);
    169         },
    170         DC_IRI_CREATOR => {
    171             let author = triple.object.to_string().replace("\"", "");
    172             metadata.set_author(author.as_str());
    173             debug!("found author: {}", author);
    174         },
    175         DC_IRI_SUBJECT => {
    176             let mut subject = triple.object.to_string().replace("\"", "");
    177             metadata.set_subject(subject.as_str());
    178             debug!("found subject: {}", subject);
    179         },
    180         DC_IRI_LANGUAGE => {
    181             let mut lang = triple.object.to_string().replace("\"", "");
    182             metadata.set_language(lang.as_str());
    183             debug!("found language: {}", lang);
    184         },
    185         DC_IRI_TYPE => {
    186             let mut typ = triple.object.to_string().replace("\"", "");
    187             metadata.set_typ(typ.as_str());
    188             debug!("found entry type: {}", typ);
    189         },
    190         DC_IRI_MEDIATYPE => {
    191             let mut mime_type = triple.object.to_string().replace("\"", "");
    192             metadata.set_mime_str(mime_type.as_str());
    193             debug!("found mime type: {}", mime_type);
    194         },
    195         _ => {
    196             debug!("skipping unknown predicate: {}", field);
    197         },
    198     };
    199     Ok(())
    200 }
    201 
    202 /// Read one or more metadata entries from the rdf-turtle source.
    203 ///
    204 /// Will return `ParseError` if any of the records are invalid.
    205 ///
    206 /// # Arguments 
    207 ///
    208 /// * `r` - reader implementation providing the source.
    209 pub fn read_all(r: impl Read) -> Result<Vec<MetaData>, ParseError> {
    210     let mut rr: Vec<MetaData> = vec!();
    211     let bf = BufReader::new(r);
    212     let mut tp = TurtleParser::new(bf, None);
    213     rr.push(MetaData::empty());
    214     let mut i: usize = 0;
    215     let r: Result<_, TurtleError> = tp.parse_all(&mut |r| {
    216         match r {
    217             Triple{subject, predicate, object } => {
    218                 match handle_parse_match(&mut rr[i], r) {
    219                     Err(HashMismatchError) => {
    220                         rr.push(MetaData::empty());
    221                         i += 1;
    222                         match handle_parse_match(&mut rr[i], r) {
    223                             Err(e) => {
    224                                 error!("{:?}", e);
    225                             },
    226                             _ => {},
    227                         };
    228                     },
    229                     _ => {},
    230                 };
    231             },
    232         }
    233         Ok(())
    234     });
    235     // TODO: should check validity of all records
    236     if rr[0].fingerprint() == "" {
    237         return Err(ParseError::new("empty fingerprint"));
    238     }
    239     Ok(rr)
    240 }
    241 
    242 /// Read a single metadata entry from the rdf-turtle source.
    243 ///
    244 /// # Arguments 
    245 ///
    246 /// * `r` - reader implementation providing the source.
    247 pub fn read(r: impl Read) -> MetaData {
    248     let mut rr: Vec<MetaData> = vec!();
    249     let mut metadata = MetaData::empty();
    250     let bf = BufReader::new(r);
    251     let mut tp = TurtleParser::new(bf, None);
    252     let r: Result<_, TurtleError> = tp.parse_all(&mut |r| {
    253         match r {
    254             Triple{subject, predicate, object } => {
    255                 match handle_parse_match(&mut metadata, r) {
    256                     Err(e) => {
    257                         error!("error parsing rdf source: {:?}", e);
    258                     },
    259                     _ => {},
    260                 };
    261             },
    262             _ => {},
    263         }
    264         Ok(())
    265     });
    266     metadata
    267 }
    268 
    269 #[cfg(test)]
    270 mod tests {
    271     use super::{
    272         write,
    273         read,
    274     };
    275     use super::MetaData;
    276     use crate::digest;
    277     use std::io::stdout;
    278     use std::fs::File;
    279     use std::default::Default;
    280     use biblatex::EntryType;
    281     use env_logger;
    282 
    283     #[test]
    284     fn test_turtle_write() {
    285         let mut digest = Vec::with_capacity(64);
    286         digest.resize(64, 0x2a);
    287         let digest_sha = digest::from_vec(Vec::from(digest)).unwrap();
    288         let mut m = MetaData::new("foo", "bar", EntryType::Article, digest_sha, None);
    289         m.set_subject("baz");
    290         m.set_mime_str("foo/bar");
    291         m.set_language("nb-NO");
    292         //let v = stdout();
    293         let mut v: Vec<u8> = vec!();
    294         let r = write(&m, v);
    295     }
    296 
    297     #[test]
    298     fn test_turtle_read() {
    299         let f = File::open("testdata/meta.ttl").unwrap();
    300         read(&f);
    301     }
    302 }