meta.rs (19743B)
1 use std::path; 2 use std::fmt; 3 use xattr; 4 use hex; 5 use mime::{ 6 Mime 7 }; 8 use sha2::{ 9 Sha512, 10 Sha256, 11 Digest, 12 }; 13 use std::fs::{ 14 File, 15 metadata, 16 }; 17 use std::path::Path; 18 use std::io::{ 19 Read, 20 BufRead, 21 BufReader, 22 }; 23 use unic_langid_impl::LanguageIdentifier; 24 use std::str::FromStr; 25 use std::os::linux::fs::MetadataExt; 26 27 use biblatex::EntryType; 28 29 #[cfg(feature = "md5")] 30 use md5::Context; 31 32 #[cfg(feature = "magic")] 33 use tree_magic; 34 35 use crate::dc::{ 36 DCMetaData, 37 DC_XATTR_TITLE, 38 DC_XATTR_CREATOR, 39 DC_XATTR_SUBJECT, 40 DC_XATTR_LANGUAGE, 41 DC_XATTR_TYPE, 42 DC_XATTR_MEDIATYPE, 43 }; 44 use crate::error::ParseError; 45 use crate::digest; 46 47 use log::{ 48 debug, 49 }; 50 51 /// Date elements as d/m/Y tuple. 52 pub type PublishDate = (u8, u8, u32); 53 54 /// Alias for file name (basename). 55 pub type FileName = String; 56 57 /// Alias for absolute file path. 58 pub type FilePath = String; 59 60 /// Represents the full metadata for a media file. 61 pub struct MetaData { 62 /// The Dublin Core vocabulary parts of the metadata. 63 dc: DCMetaData, 64 /// The digest of the file that the metadata is keyed to. 65 digest: digest::RecordDigest, 66 /// Optional local filename, e.g. to use for HTTP `Content-Disposition` header, rename matching files to client's original name, etc. 67 local_name: Option<FileName>, 68 /// Publication date of the content that the media represents. 69 publish_date: PublishDate, 70 } 71 72 pub fn digests_from_path(filepath: &path::Path, digest_types: &Vec<digest::DigestType>) -> Vec<digest::RecordDigest> { 73 let mut r: Vec<digest::RecordDigest> = vec!(); 74 for v in digest_types { 75 match v { 76 digest::DigestType::Sha512 => { 77 let digest = digest_sha512_from_path(filepath); 78 r.push(digest); 79 }, 80 digest::DigestType::Sha256 => { 81 let digest = digest_sha256_from_path(filepath); 82 r.push(digest); 83 }, 84 #[cfg(feature = "md5")] 85 digest::DigestType::MD5 => { 86 let digest = digest_md5_from_path(filepath); 87 r.push(digest); 88 }, 89 }; 90 } 91 r 92 } 93 94 #[cfg(feature = "md5")] 95 pub fn digest_md5_from_path(filepath: &path::Path) -> digest::RecordDigest { 96 let mut ctx = md5::Context::new(); 97 let mut f = File::open(filepath).unwrap(); 98 let mut buf = [0; 512]; 99 100 let mut run = true; 101 while run { 102 let c = f.read(&mut buf[..]).unwrap(); 103 if c < 512 { 104 run = false; 105 } 106 if c > 0 { 107 ctx.consume(&buf[..c]); 108 } 109 } 110 let d = ctx.compute(); 111 digest::RecordDigest::MD5(d.to_vec()) 112 } 113 114 /// Generates the native `sha512` digest of a file. 115 /// 116 /// # Arguments 117 /// 118 /// * `filepath` - Absolute path to file to calculate digest for. 119 pub fn digest_sha512_from_path(filepath: &path::Path) -> digest::RecordDigest { 120 let mut h = Sha512::new(); 121 let st = metadata(filepath).unwrap(); 122 let bs: u64 = st.st_blksize(); 123 let sz: u64 = st.st_size(); 124 let mut b: Vec<u8> = vec!(0; bs as usize); 125 let mut f = File::open(filepath).unwrap(); 126 let mut i: usize = 0; 127 while i < sz as usize { 128 let c = f.read(&mut b).unwrap(); 129 h.update(&b[..c]); 130 i += c; 131 } 132 let r = h.finalize().to_vec(); 133 digest::RecordDigest::Sha512(r) 134 } 135 136 /// Generates the native `sha256` digest of a file. 137 /// 138 /// # Arguments 139 /// 140 /// * `filepath` - Absolute path to file to calculate digest for. 141 pub fn digest_sha256_from_path(filepath: &path::Path) -> digest::RecordDigest { 142 let mut h = Sha256::new(); 143 let st = metadata(filepath).unwrap(); 144 let bs: u64 = st.st_blksize(); 145 let sz: u64 = st.st_size(); 146 let mut b: Vec<u8> = vec!(0; bs as usize); 147 let mut f = File::open(filepath).unwrap(); 148 let mut i: usize = 0; 149 while i < sz as usize { 150 let c = f.read(&mut b).unwrap(); 151 h.update(&b[..c]); 152 i += c; 153 } 154 let r = h.finalize().to_vec(); 155 digest::RecordDigest::Sha256(r) 156 } 157 158 impl MetaData { 159 /// Create a new MetaData instance with basic data. 160 /// 161 /// # Arguments 162 /// 163 /// * `title` - Maps to the [DCMetaData::title] field. 164 /// * `author` - Maps to the [DCMetaData::author] field. 165 /// * `entry_type` - Maps to the [DCMetaData::typ] field. 166 /// * `digest` - The digest of the media file. 167 /// * `filename` - The client's optional local file name for the media. 168 pub fn new(title: &str, author: &str, entry_type: EntryType, digest: digest::RecordDigest, filename: Option<FileName>) -> MetaData { 169 let dc = DCMetaData::new(title, author, entry_type); 170 171 let mut m = MetaData{ 172 dc: dc, 173 digest: digest::RecordDigest::Empty, 174 local_name: filename, 175 publish_date: (0, 0, 0), 176 }; 177 178 m.set_fingerprint(digest); 179 m 180 } 181 182 /// Create an empty MetaData instance. 183 pub fn empty() -> MetaData { 184 let dc = DCMetaData::new("", "", EntryType::Unknown(String::new())); 185 MetaData{ 186 dc: dc, 187 digest: digest::RecordDigest::Empty, 188 //local_name: filepath.to_str().unwrap().to_string(), 189 local_name: None, 190 publish_date: (0, 0, 0), 191 } 192 } 193 194 /// Set the [DCMetaData::title](DCMetaData::title) value. 195 pub fn set_title(&mut self, title: &str) { 196 self.dc.title = String::from(title); 197 } 198 199 /// Set the [DCMetaData::author](DCMetaData::author) value. 200 pub fn set_author(&mut self, author: &str) { 201 self.dc.author = String::from(author); 202 } 203 204 /// Set the digest as [digest::RecordDigest::Sha512](digest::RecordDigest::Sha512) instance of the provided 205 /// fingerprint. 206 pub fn set_fingerprint(&mut self, fingerprint: digest::RecordDigest) { 207 self.digest = fingerprint; //digest::from_vec(fingerprint).unwrap(); 208 } 209 210 /// Set the digest from the given URN string. 211 /// 212 /// The URN must specify a valid supported [digest](digest::from_urn) scheme. 213 pub fn set_fingerprint_urn(&mut self, urn: &str) { 214 self.digest = digest::from_urn(urn).unwrap(); 215 } 216 217 /// Returns the current [DCMetaData::title](DCMetaData::title) value. 218 pub fn title(&self) -> String { 219 self.dc.title.clone() 220 } 221 222 /// Returns the current [DCMetaData::author](DCMetaData::author) value. 223 pub fn author(&self) -> String { 224 self.dc.author.clone() 225 } 226 227 /// Set the [DCMetaData::typ](DCMetaData::typ) value. 228 pub fn set_typ(&mut self, typ: &str) { 229 self.dc.typ = EntryType::from_str(typ).unwrap(); 230 } 231 232 /// Returns the current [DCMetaData::typ](DCMetaData::typ) value. 233 pub fn typ(&self) -> EntryType { 234 self.dc.typ.clone() 235 } 236 237 /// Set the current [DCMetaData::subject](DCMetaData::subject) value. 238 pub fn set_subject(&mut self, v: &str) { 239 self.dc.subject = Some(String::from(v)); 240 } 241 242 /// Returns the current [DCMetaData::subject](DCMetaData::subject) value. 243 pub fn subject(&self) -> Option<String> { 244 return self.dc.subject.clone(); 245 } 246 247 /// Set the current [DCMetaData::mime](DCMetaData::mime) value. 248 pub fn set_mime(&mut self, m: Mime) { 249 self.dc.mime = Some(m); 250 } 251 252 /// Set the current [DCMetaData::mime](DCMetaData::mime) value from the given MIME identifier string. 253 pub fn set_mime_str(&mut self, s: &str) { 254 match Mime::from_str(s) { 255 Ok(v) => { 256 self.set_mime(v); 257 }, 258 Err(e) => { 259 panic!("invalid mime"); 260 }, 261 }; 262 } 263 264 /// Returns the current [DCMetaData::mime](DCMetaData::mime) value. 265 pub fn mime(&self) -> Option<Mime> { 266 self.dc.mime.clone() 267 } 268 269 /// Set the current [DCMetaData::language](DCMetaData::language) value. 270 pub fn set_language(&mut self, s: &str) { 271 let v = s.parse().unwrap(); 272 self.dc.language = Some(v); 273 } 274 275 /// Returns the current [DCMetaData::language](DCMetaData::language) value. 276 pub fn language(&self) -> Option<LanguageIdentifier> { 277 self.dc.language.clone() 278 } 279 280 /// 281 pub fn urn(&self) -> String { 282 self.digest.urn() 283 } 284 285 /// 286 pub fn fingerprint(&self) -> String { 287 let digest_fingerprint = self.digest.fingerprint(); 288 return hex::encode(digest_fingerprint); 289 } 290 291 /// Instantiate metadata from the extended attributes of the file in `filepath`. 292 pub fn from_xattr(filepath: &path::Path) -> Result<MetaData, ParseError> { 293 294 let mut title: String = String::new(); 295 let mut author: String = String::new(); 296 let mut typ: EntryType = EntryType::Unknown(String::new()); 297 let filename: FileName; 298 299 debug!("Calculate digest for file {:?}", &filepath); 300 let digest = digest_sha512_from_path(filepath); 301 debug!("Calculated digest {} for file {:?}", hex::encode(digest.fingerprint()), &filepath); 302 303 filename = filepath.file_name() 304 .unwrap() 305 .to_os_string() 306 .into_string() 307 .unwrap(); 308 309 let title_src = match xattr::get(filepath, "user.dcterms:title") { 310 Ok(v) => { 311 v 312 }, 313 Err(e) => { 314 return Err(ParseError::new("title missing")); 315 } 316 }; 317 match title_src { 318 Some(v) => { 319 let s = std::str::from_utf8(&v).unwrap(); 320 title.push_str(s); 321 }, 322 None => {}, 323 } 324 325 let author_src = xattr::get(filepath, "user.dcterms:creator").unwrap(); 326 match author_src { 327 Some(v) => { 328 let s = std::str::from_utf8(&v).unwrap(); 329 author.push_str(s); 330 }, 331 None => {}, 332 } 333 334 335 let typ_src = xattr::get(filepath, "user.dcterms:type").unwrap(); 336 match typ_src { 337 Some(v) => { 338 let s = std::str::from_utf8(&v).unwrap(); 339 typ = EntryType::new(s); 340 }, 341 None => {}, 342 } 343 344 let mut metadata = MetaData::new(title.as_str(), author.as_str(), typ, digest, Some(filename)); 345 if !metadata.validate() { 346 return Err(ParseError::new("invalid input")); 347 } 348 349 match xattr::get(filepath, "user.dcterms:subject") { 350 Ok(v) => { 351 match v { 352 Some(v) => { 353 let s = std::str::from_utf8(&v).unwrap(); 354 metadata.set_subject(s); 355 }, 356 None => {}, 357 } 358 }, 359 _ => {}, 360 }; 361 362 match xattr::get(filepath, "user.dcterms:MediaType") { 363 Ok(v) => { 364 match v { 365 Some(v) => { 366 let s = std::str::from_utf8(&v).unwrap(); 367 metadata.set_mime_str(s); 368 }, 369 None => {}, 370 } 371 }, 372 _ => {}, 373 } 374 375 match xattr::get(filepath, "user.dcterms:language") { 376 Ok(v) => { 377 match v { 378 Some(v) => { 379 let s = std::str::from_utf8(&v).unwrap(); 380 metadata.set_language(s); 381 }, 382 None => {}, 383 } 384 }, 385 _ => {}, 386 } 387 388 #[cfg(feature = "magic")] 389 metadata.set_mime_magic(filepath); 390 391 Ok(metadata) 392 } 393 394 395 /// Applies the metadata as extended file attributes of the file in `filepath`. 396 /// 397 /// Will always export: 398 /// 399 /// * [title](DCMetaData::DC_XATTR_TITLE) 400 /// * [creator](DCMetaData::DC_XATTR_CREATOR) 401 /// * [category of file contents](DCMetaData::DC_XATTR_TYPE) 402 /// 403 /// Will export, if defined: 404 /// 405 /// * [language](DCMetaData::DC_XATTR_LANGUAGE) 406 /// * [MIME type of file](DCMetaData::DC_XATTR_MEDIATYPE) 407 /// * [A description of the subject matter of the file contents](DCMetaData::DC_XATTR_SUBJECT) 408 pub fn to_xattr(&self, filepath: &path::Path) -> Result<(), std::io::Error> { 409 let filename = filepath.file_name() 410 .unwrap() 411 .to_os_string() 412 .into_string() 413 .unwrap(); 414 415 xattr::set(filepath, DC_XATTR_TITLE, self.dc.title.as_bytes()); 416 xattr::set(filepath, DC_XATTR_CREATOR, self.dc.author.as_bytes()); 417 xattr::set(filepath, DC_XATTR_TYPE, self.dc.typ.to_string().as_bytes()); 418 419 match &self.dc.language { 420 Some(v) => { 421 xattr::set(filepath, DC_XATTR_LANGUAGE, v.to_string().as_bytes()); 422 }, 423 _ => {}, 424 }; 425 426 match &self.dc.mime { 427 Some(v) => { 428 xattr::set(filepath, DC_XATTR_MEDIATYPE, v.to_string().as_bytes()); 429 }, 430 _ => {}, 431 }; 432 433 match &self.dc.subject { 434 Some(v) => { 435 xattr::set(filepath, DC_XATTR_SUBJECT, v.as_bytes()); 436 }, 437 _ => {}, 438 }; 439 440 Ok(()) 441 } 442 443 fn process_predicate(&mut self, predicate: &str, object: &str) -> bool { 444 match predicate.to_lowercase().as_str() { 445 "title" => { 446 self.set_title(object); 447 debug!("found title: {}", object); 448 }, 449 "author" => { 450 self.set_author(object); 451 debug!("found author: {}", object); 452 }, 453 "subject" => { 454 self.set_subject(object); 455 debug!("found subject: {}", object); 456 }, 457 "typ" => { 458 self.set_typ(object); 459 debug!("found typ: {}", object); 460 }, 461 "language" => { 462 self.set_language(object); 463 debug!("found language: {}", object); 464 }, 465 "mime" => { 466 self.set_mime_str(object); 467 debug!("found mime: {}", object); 468 }, 469 _ => { 470 return false; 471 }, 472 } 473 true 474 } 475 476 fn process_line(&mut self, s: &str) { 477 match s.split_once(":") { 478 Some((predicate, object_raw)) => { 479 let object = object_raw.trim(); 480 self.process_predicate(predicate, object); 481 }, 482 None => { 483 }, 484 } 485 } 486 487 #[cfg(feature = "magic")] 488 /// Automatically detect media type of file in `path`. 489 pub fn set_mime_magic(&mut self, path: &path::Path) { 490 if self.mime() == None { 491 let mime = tree_magic::from_filepath(path); 492 self.set_mime_str(&mime); 493 debug!("magic set mime {}", mime); 494 } 495 } 496 497 /// Parse metadata from simplified metadata format contained in file in `path`. 498 /// 499 /// see [MetaData::from_file](MetaData::from_file) 500 pub fn from_path(p: &path::Path) -> Result<MetaData, std::io::Error> { 501 let f = File::open(&p).unwrap(); 502 debug!("openning {}", p.display()); 503 let mut m = MetaData::from_file(f).unwrap(); 504 Ok(m) 505 } 506 507 /// Parse metadata from simplified metadata format contained in the given file instance `f`. 508 /// 509 /// TODO: describe format. 510 pub fn from_file(f: File) -> Result<MetaData, std::io::Error> { 511 let mut m = MetaData::empty(); 512 //let f = File::open(path).unwrap(); 513 let mut fb = BufReader::new(f); 514 loop { 515 let mut s = String::new(); 516 match fb.read_line(&mut s) { 517 Ok(v) => { 518 if v == 0 { 519 break; 520 } 521 m.process_line(s.as_str()); 522 }, 523 Err(e) => { 524 return Err(e); 525 }, 526 } 527 } 528 Ok(m) 529 } 530 531 532 /// Check whether a Metadata instance represents a valid entry. 533 pub fn validate(&self) -> bool { 534 let empty = String::new(); 535 if self.title() == empty { 536 return false; 537 } 538 if self.author() == empty { 539 return false; 540 } 541 true 542 } 543 } 544 545 impl fmt::Debug for MetaData { 546 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 547 write!(f, "{}", format_args!("title \"{}\" author \"{}\" digest {}", self.title(), self.author(), self.urn())) 548 } 549 } 550 551 #[cfg(test)] 552 mod tests { 553 use super::MetaData; 554 use std::path; 555 use tempfile::NamedTempFile; 556 use biblatex::EntryType; 557 use std::fs::{ 558 File, 559 write 560 }; 561 use crate::digest; 562 use env_logger; 563 use crate::dc::{ 564 DC_XATTR_TITLE, 565 DC_XATTR_CREATOR, 566 }; 567 568 #[test] 569 fn test_metadata_create() { 570 let s = path::Path::new("testdata/bitcoin.pdf"); 571 let meta = MetaData::from_xattr(s).unwrap(); 572 assert_eq!(meta.dc.title, "Bitcoin: A Peer-to-Peer Electronic Cash System"); 573 assert_eq!(meta.dc.author, "Satoshi Nakamoto"); 574 assert_eq!(meta.urn(), String::from("sha512:2ac531ee521cf93f8419c2018f770fbb42c65396178e079a416e7038d3f9ab9fc2c35c4d838bc8b5dd68f4c13759fe9cdf90a46528412fefe1294cb26beabf4e")); 575 assert_eq!(meta.fingerprint(), String::from("2ac531ee521cf93f8419c2018f770fbb42c65396178e079a416e7038d3f9ab9fc2c35c4d838bc8b5dd68f4c13759fe9cdf90a46528412fefe1294cb26beabf4e")); 576 } 577 578 #[test] 579 fn test_metadata_set() { 580 let digest_hex = "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e"; 581 let digest = hex::decode(&digest_hex).unwrap(); 582 583 let f = NamedTempFile::new_in(".").unwrap(); 584 let fp = f.path(); 585 let fps = String::from(fp.to_str().unwrap()); 586 587 let digest_sha = digest::from_vec(digest).unwrap(); 588 let mut m = MetaData::new("foo", "bar", EntryType::Article, digest_sha, Some(fps)); 589 m.set_subject("baz"); 590 m.set_mime_str("foo/bar"); 591 m.set_language("nb-NO"); 592 m.to_xattr(fp); 593 594 let m_check = MetaData::from_xattr(fp).unwrap(); 595 assert_eq!(m_check.title(), "foo"); 596 assert_eq!(m_check.author(), "bar"); 597 assert_eq!(m_check.fingerprint(), digest_hex); 598 assert_eq!(m_check.urn(), String::from("sha512:") + digest_hex); 599 assert_eq!(m_check.typ(), EntryType::Article); 600 assert_eq!(m_check.subject().unwrap(), "baz"); 601 assert_eq!(m_check.mime().unwrap(), "foo/bar"); 602 assert_eq!(m_check.language().unwrap(), "nb-NO"); 603 } 604 605 #[test] 606 fn test_metadata_file() { 607 let f = File::open("testdata/meta.txt").unwrap(); 608 let m_check = MetaData::from_file(f).unwrap(); 609 assert_eq!(m_check.title(), "foo"); 610 assert_eq!(m_check.author(), "bar"); 611 assert_eq!(m_check.typ(), EntryType::Report); 612 assert_eq!(m_check.subject().unwrap(), "baz"); 613 assert_eq!(m_check.mime().unwrap(), "text/plain"); 614 assert_eq!(m_check.language().unwrap(), "nb-NO"); 615 } 616 617 #[test] 618 fn test_metadata_xattr_magic() { 619 let s = path::Path::new("testdata/bitcoin.pdf"); 620 let meta = MetaData::from_xattr(s).unwrap(); 621 622 #[cfg(feature = "magic")] 623 { 624 assert_eq!(meta.mime().unwrap(), "application/pdf"); 625 let f = NamedTempFile::new_in(".").unwrap(); 626 let fp = f.path(); 627 write(&f, &[0, 1, 2, 3]); 628 xattr::set(fp, DC_XATTR_TITLE, "foo".as_bytes()); 629 xattr::set(fp, DC_XATTR_CREATOR, "bar".as_bytes()); 630 let meta_empty = MetaData::from_xattr(fp).unwrap(); 631 assert_eq!(meta_empty.mime().unwrap(), "application/octet-stream"); 632 } 633 } 634 }