rdf.rs (8600B)
1 use std::fs::File; 2 use std::io::{ 3 Read, 4 Write 5 }; 6 use std::str::FromStr; 7 use std::io::{ 8 BufReader, 9 }; 10 11 use rio_turtle::{ 12 TurtleParser, 13 TurtleError, 14 TurtleFormatter, 15 }; 16 use rio_api::parser::TriplesParser; 17 use rio_api::formatter::TriplesFormatter; 18 use rio_api::model::{ 19 NamedNode, 20 Literal, 21 Triple, 22 Subject, 23 }; 24 use urn::{ 25 Urn, 26 Error as UrnError, 27 }; 28 29 use log::{ 30 debug, 31 info, 32 error, 33 }; 34 35 use crate::digest; 36 use crate::meta::MetaData; 37 use crate::error::ParseError; 38 use crate::dc::{ 39 DC_IRI_TITLE, 40 DC_IRI_CREATOR, 41 DC_IRI_SUBJECT, 42 DC_IRI_LANGUAGE, 43 DC_IRI_TYPE, 44 DC_IRI_MEDIATYPE, 45 }; 46 47 #[derive(Debug)] 48 /// Error states when processing RDF data. 49 pub enum RdfError { 50 /// Invalid URN string or digest scheme. 51 UrnError(UrnError), 52 /// Hash does not match hash in current [crate::meta::MetaData](crate::meta::MetaData) 53 /// instance. 54 HashMismatchError, 55 } 56 57 /// Write metadata entry in the native rdf-turtle format. 58 /// 59 /// On success, returns the number of bytes written. 60 /// 61 /// # Arguments 62 /// 63 /// * `entry` - metadata to write. 64 /// * `w` - writer implementation providing the destination. 65 pub fn write(entry: &MetaData, w: impl Write) -> Result<usize, std::io::Error> { 66 let mut tfmt = TurtleFormatter::new(w); 67 68 //let urn_str = format!("URN:sha512:{}", entry.fingerprint()); 69 let urn_str = format!("URN:{}", entry.urn()); 70 let urn = Subject::NamedNode( 71 NamedNode{ 72 iri: urn_str.as_str(), 73 }, 74 ); 75 76 tfmt.format(&Triple{ 77 subject: urn, 78 predicate: NamedNode { iri: DC_IRI_TITLE }.into(), 79 object: Literal::Simple { value: entry.title().as_str() }.into(), 80 }); 81 tfmt.format(&Triple{ 82 subject: urn, 83 predicate: NamedNode { iri: DC_IRI_CREATOR }.into(), 84 object: Literal::Simple { value: entry.author().as_str() }.into(), 85 }); 86 let typ = entry.typ().to_string(); 87 tfmt.format(&Triple{ 88 subject: urn, 89 predicate: NamedNode { iri: DC_IRI_TYPE }.into(), 90 object: Literal::Simple { value: typ.as_str() }.into(), 91 }); 92 match entry.subject() { 93 Some(v) => { 94 tfmt.format(&Triple{ 95 subject: urn, 96 predicate: NamedNode { iri: DC_IRI_SUBJECT }.into(), 97 object: Literal::Simple { value: v.as_str() }.into(), 98 }); 99 }, 100 _ => (), 101 }; 102 103 match entry.mime() { 104 Some(v) => { 105 let m: String = v.to_string(); 106 tfmt.format(&Triple{ 107 subject: urn, 108 predicate: NamedNode { iri: DC_IRI_MEDIATYPE }.into(), 109 object: Literal::Simple { value: m.as_str() }.into(), 110 }); 111 }, 112 _ => (), 113 }; 114 115 match entry.language() { 116 Some(v) => { 117 let m: String = v.to_string(); 118 tfmt.format(&Triple{ 119 subject: urn, 120 predicate: NamedNode { iri: DC_IRI_LANGUAGE }.into(), 121 object: Literal::Simple { value: m.as_str() }.into(), 122 }); 123 }, 124 _ => (), 125 }; 126 127 tfmt.finish(); 128 Ok(0) 129 } 130 131 132 fn handle_parse_match(metadata: &mut MetaData, triple: Triple) -> Result<(), RdfError> { 133 let subject_iri = triple.subject.to_string(); 134 let l = subject_iri.len()-1; 135 //let subject = &subject_iri[1..l]; 136 let subject = &subject_iri[1..l]; 137 match subject[0..4].to_lowercase().as_str() { 138 "urn:" => {}, 139 _ => { 140 return Err(RdfError::UrnError(UrnError::InvalidNid)); 141 }, 142 }; 143 let digest_urn = match digest::from_urn(&subject[4..]) { 144 Err(e) => { 145 error!("error {:?}", &subject); 146 return Err(RdfError::UrnError(UrnError::InvalidNid)); 147 }, 148 Ok(v) => { 149 &subject[4..] 150 }, 151 }; 152 let subject_urn = Urn::from_str(subject).unwrap(); 153 154 let v = subject_urn.nss(); 155 let b = hex::decode(&v).unwrap(); 156 if metadata.fingerprint().len() == 0 { 157 debug!("setting fingerprint {}", v); 158 metadata.set_fingerprint_urn(digest_urn); 159 } else if metadata.fingerprint() != v { 160 return Err(RdfError::HashMismatchError); 161 } 162 163 let field = triple.predicate.iri; 164 match field { 165 DC_IRI_TITLE => { 166 let title = triple.object.to_string().replace("\"", ""); 167 metadata.set_title(title.as_str()); 168 debug!("found title: {}", title); 169 }, 170 DC_IRI_CREATOR => { 171 let author = triple.object.to_string().replace("\"", ""); 172 metadata.set_author(author.as_str()); 173 debug!("found author: {}", author); 174 }, 175 DC_IRI_SUBJECT => { 176 let mut subject = triple.object.to_string().replace("\"", ""); 177 metadata.set_subject(subject.as_str()); 178 debug!("found subject: {}", subject); 179 }, 180 DC_IRI_LANGUAGE => { 181 let mut lang = triple.object.to_string().replace("\"", ""); 182 metadata.set_language(lang.as_str()); 183 debug!("found language: {}", lang); 184 }, 185 DC_IRI_TYPE => { 186 let mut typ = triple.object.to_string().replace("\"", ""); 187 metadata.set_typ(typ.as_str()); 188 debug!("found entry type: {}", typ); 189 }, 190 DC_IRI_MEDIATYPE => { 191 let mut mime_type = triple.object.to_string().replace("\"", ""); 192 metadata.set_mime_str(mime_type.as_str()); 193 debug!("found mime type: {}", mime_type); 194 }, 195 _ => { 196 debug!("skipping unknown predicate: {}", field); 197 }, 198 }; 199 Ok(()) 200 } 201 202 /// Read one or more metadata entries from the rdf-turtle source. 203 /// 204 /// Will return `ParseError` if any of the records are invalid. 205 /// 206 /// # Arguments 207 /// 208 /// * `r` - reader implementation providing the source. 209 pub fn read_all(r: impl Read) -> Result<Vec<MetaData>, ParseError> { 210 let mut rr: Vec<MetaData> = vec!(); 211 let bf = BufReader::new(r); 212 let mut tp = TurtleParser::new(bf, None); 213 rr.push(MetaData::empty()); 214 let mut i: usize = 0; 215 let r: Result<_, TurtleError> = tp.parse_all(&mut |r| { 216 match r { 217 Triple{subject, predicate, object } => { 218 match handle_parse_match(&mut rr[i], r) { 219 Err(HashMismatchError) => { 220 rr.push(MetaData::empty()); 221 i += 1; 222 match handle_parse_match(&mut rr[i], r) { 223 Err(e) => { 224 error!("{:?}", e); 225 }, 226 _ => {}, 227 }; 228 }, 229 _ => {}, 230 }; 231 }, 232 } 233 Ok(()) 234 }); 235 // TODO: should check validity of all records 236 if rr[0].fingerprint() == "" { 237 return Err(ParseError::new("empty fingerprint")); 238 } 239 Ok(rr) 240 } 241 242 /// Read a single metadata entry from the rdf-turtle source. 243 /// 244 /// # Arguments 245 /// 246 /// * `r` - reader implementation providing the source. 247 pub fn read(r: impl Read) -> MetaData { 248 let mut rr: Vec<MetaData> = vec!(); 249 let mut metadata = MetaData::empty(); 250 let bf = BufReader::new(r); 251 let mut tp = TurtleParser::new(bf, None); 252 let r: Result<_, TurtleError> = tp.parse_all(&mut |r| { 253 match r { 254 Triple{subject, predicate, object } => { 255 match handle_parse_match(&mut metadata, r) { 256 Err(e) => { 257 error!("error parsing rdf source: {:?}", e); 258 }, 259 _ => {}, 260 }; 261 }, 262 _ => {}, 263 } 264 Ok(()) 265 }); 266 metadata 267 } 268 269 #[cfg(test)] 270 mod tests { 271 use super::{ 272 write, 273 read, 274 }; 275 use super::MetaData; 276 use crate::digest; 277 use std::io::stdout; 278 use std::fs::File; 279 use std::default::Default; 280 use biblatex::EntryType; 281 use env_logger; 282 283 #[test] 284 fn test_turtle_write() { 285 let mut digest = Vec::with_capacity(64); 286 digest.resize(64, 0x2a); 287 let digest_sha = digest::from_vec(Vec::from(digest)).unwrap(); 288 let mut m = MetaData::new("foo", "bar", EntryType::Article, digest_sha, None); 289 m.set_subject("baz"); 290 m.set_mime_str("foo/bar"); 291 m.set_language("nb-NO"); 292 //let v = stdout(); 293 let mut v: Vec<u8> = vec!(); 294 let r = write(&m, v); 295 } 296 297 #[test] 298 fn test_turtle_read() { 299 let f = File::open("testdata/meta.ttl").unwrap(); 300 read(&f); 301 } 302 }