kitab

Unnamed repository; edit this file 'description' to name the repository.
Info | Log | Files | Refs | LICENSE

meta.rs (19743B)


      1 use std::path;
      2 use std::fmt;
      3 use xattr;
      4 use hex;
      5 use mime::{
      6     Mime
      7 };
      8 use sha2::{
      9     Sha512,
     10     Sha256,
     11     Digest,
     12 };
     13 use std::fs::{
     14     File,
     15     metadata,
     16 };
     17 use std::path::Path;
     18 use std::io::{
     19     Read,
     20     BufRead,
     21     BufReader,
     22 };
     23 use unic_langid_impl::LanguageIdentifier;
     24 use std::str::FromStr;
     25 use std::os::linux::fs::MetadataExt;
     26 
     27 use biblatex::EntryType;
     28 
     29 #[cfg(feature = "md5")]
     30 use md5::Context;
     31 
     32 #[cfg(feature = "magic")]
     33 use tree_magic;
     34 
     35 use crate::dc::{
     36     DCMetaData,
     37     DC_XATTR_TITLE,
     38     DC_XATTR_CREATOR,
     39     DC_XATTR_SUBJECT,
     40     DC_XATTR_LANGUAGE,
     41     DC_XATTR_TYPE,
     42     DC_XATTR_MEDIATYPE,
     43 };
     44 use crate::error::ParseError;
     45 use crate::digest;
     46 
     47 use log::{
     48     debug,
     49 };
     50 
     51 /// Date elements as d/m/Y tuple.
     52 pub type PublishDate = (u8, u8, u32);
     53 
     54 /// Alias for file name (basename).
     55 pub type FileName = String;
     56 
     57 /// Alias for absolute file path.
     58 pub type FilePath = String;
     59 
     60 /// Represents the full metadata for a media file.
     61 pub struct MetaData {
     62     /// The Dublin Core vocabulary parts of the metadata.
     63     dc: DCMetaData,
     64     /// The digest of the file that the metadata is keyed to.
     65     digest: digest::RecordDigest,
     66     /// Optional local filename, e.g. to use for HTTP `Content-Disposition` header, rename matching files to client's original name, etc.
     67     local_name: Option<FileName>,
     68     /// Publication date of the content that the media represents.
     69     publish_date: PublishDate,
     70 }
     71 
     72 pub fn digests_from_path(filepath: &path::Path, digest_types: &Vec<digest::DigestType>) -> Vec<digest::RecordDigest> {
     73     let mut r: Vec<digest::RecordDigest> = vec!();
     74     for v in digest_types {
     75         match v {
     76             digest::DigestType::Sha512 => {
     77                 let digest = digest_sha512_from_path(filepath);
     78                 r.push(digest);
     79             },
     80             digest::DigestType::Sha256 => {
     81                 let digest = digest_sha256_from_path(filepath);
     82                 r.push(digest);
     83             },
     84             #[cfg(feature = "md5")]
     85             digest::DigestType::MD5 => {
     86                 let digest = digest_md5_from_path(filepath);
     87                 r.push(digest);
     88             },
     89         };
     90     }
     91     r
     92 }
     93 
     94 #[cfg(feature = "md5")]
     95 pub fn digest_md5_from_path(filepath: &path::Path) -> digest::RecordDigest {
     96     let mut ctx = md5::Context::new();
     97     let mut f = File::open(filepath).unwrap();
     98     let mut buf = [0; 512];
     99 
    100     let mut run = true;
    101     while run {
    102         let c = f.read(&mut buf[..]).unwrap();
    103         if c < 512 {
    104             run = false;
    105         }
    106         if c > 0 {
    107             ctx.consume(&buf[..c]);
    108         }
    109     }
    110     let d = ctx.compute();
    111     digest::RecordDigest::MD5(d.to_vec())
    112 }
    113 
    114 /// Generates the native `sha512` digest of a file.
    115 ///
    116 /// # Arguments
    117 ///
    118 /// * `filepath` - Absolute path to file to calculate digest for.
    119 pub fn digest_sha512_from_path(filepath: &path::Path) -> digest::RecordDigest {
    120     let mut h = Sha512::new();
    121     let st = metadata(filepath).unwrap();
    122     let bs: u64 = st.st_blksize();
    123     let sz: u64 = st.st_size();
    124     let mut b: Vec<u8> = vec!(0; bs as usize);
    125     let mut f = File::open(filepath).unwrap();
    126     let mut i: usize = 0;
    127     while i < sz as usize {
    128         let c = f.read(&mut b).unwrap();
    129         h.update(&b[..c]);
    130         i += c;
    131     }
    132     let r = h.finalize().to_vec();
    133     digest::RecordDigest::Sha512(r)
    134 }
    135 
    136 /// Generates the native `sha256` digest of a file.
    137 ///
    138 /// # Arguments
    139 ///
    140 /// * `filepath` - Absolute path to file to calculate digest for.
    141 pub fn digest_sha256_from_path(filepath: &path::Path) -> digest::RecordDigest {
    142     let mut h = Sha256::new();
    143     let st = metadata(filepath).unwrap();
    144     let bs: u64 = st.st_blksize();
    145     let sz: u64 = st.st_size();
    146     let mut b: Vec<u8> = vec!(0; bs as usize);
    147     let mut f = File::open(filepath).unwrap();
    148     let mut i: usize = 0;
    149     while i < sz as usize {
    150         let c = f.read(&mut b).unwrap();
    151         h.update(&b[..c]);
    152         i += c;
    153     }
    154     let r = h.finalize().to_vec();
    155     digest::RecordDigest::Sha256(r)
    156 }
    157 
    158 impl MetaData {
    159     /// Create a new MetaData instance with basic data.
    160     ///
    161     /// # Arguments
    162     ///
    163     /// * `title` - Maps to the [DCMetaData::title] field.
    164     /// * `author` - Maps to the [DCMetaData::author] field.
    165     /// * `entry_type` - Maps to the [DCMetaData::typ] field.
    166     /// * `digest` - The digest of the media file.
    167     /// * `filename` - The client's optional local file name for the media.
    168     pub fn new(title: &str, author: &str, entry_type: EntryType, digest: digest::RecordDigest, filename: Option<FileName>) -> MetaData {
    169         let dc = DCMetaData::new(title, author, entry_type);
    170 
    171         let mut m = MetaData{
    172                 dc: dc,
    173                 digest: digest::RecordDigest::Empty,
    174                 local_name: filename,
    175                 publish_date: (0, 0, 0),
    176         };
    177 
    178         m.set_fingerprint(digest);
    179         m
    180     }
    181 
    182     /// Create an empty MetaData instance.
    183     pub fn empty() -> MetaData {
    184         let dc = DCMetaData::new("", "", EntryType::Unknown(String::new()));
    185         MetaData{
    186                 dc: dc,
    187                 digest: digest::RecordDigest::Empty,
    188                 //local_name: filepath.to_str().unwrap().to_string(),
    189                 local_name: None,
    190                 publish_date: (0, 0, 0),
    191         }
    192     }
    193 
    194     /// Set the [DCMetaData::title](DCMetaData::title) value.
    195     pub fn set_title(&mut self, title: &str) {
    196         self.dc.title = String::from(title);
    197     }
    198 
    199     /// Set the [DCMetaData::author](DCMetaData::author) value.
    200     pub fn set_author(&mut self, author: &str) {
    201         self.dc.author = String::from(author);
    202     }
    203 
    204     /// Set the digest as [digest::RecordDigest::Sha512](digest::RecordDigest::Sha512) instance of the provided
    205     /// fingerprint.
    206     pub fn set_fingerprint(&mut self, fingerprint: digest::RecordDigest) {
    207         self.digest = fingerprint; //digest::from_vec(fingerprint).unwrap();
    208     }
    209 
    210     /// Set the digest from the given URN string.
    211     ///
    212     /// The URN must specify a valid supported [digest](digest::from_urn) scheme.
    213     pub fn set_fingerprint_urn(&mut self, urn: &str) {
    214         self.digest = digest::from_urn(urn).unwrap();
    215     }
    216 
    217     /// Returns the current [DCMetaData::title](DCMetaData::title) value.
    218     pub fn title(&self) -> String {
    219         self.dc.title.clone()
    220     }
    221 
    222     /// Returns the current [DCMetaData::author](DCMetaData::author) value.
    223     pub fn author(&self) -> String {
    224         self.dc.author.clone()
    225     }
    226 
    227     /// Set the [DCMetaData::typ](DCMetaData::typ) value.
    228     pub fn set_typ(&mut self, typ: &str) {
    229         self.dc.typ = EntryType::from_str(typ).unwrap();
    230     }
    231 
    232     /// Returns the current [DCMetaData::typ](DCMetaData::typ) value.
    233     pub fn typ(&self) -> EntryType {
    234         self.dc.typ.clone()
    235     }
    236 
    237     /// Set the current [DCMetaData::subject](DCMetaData::subject) value.
    238     pub fn set_subject(&mut self, v: &str) {
    239         self.dc.subject = Some(String::from(v));
    240     }
    241 
    242     /// Returns the current [DCMetaData::subject](DCMetaData::subject) value.
    243     pub fn subject(&self) -> Option<String> {
    244         return self.dc.subject.clone();
    245     }
    246 
    247     /// Set the current [DCMetaData::mime](DCMetaData::mime) value.
    248     pub fn set_mime(&mut self, m: Mime) {
    249         self.dc.mime = Some(m);
    250     }
    251 
    252     /// Set the current [DCMetaData::mime](DCMetaData::mime) value from the given MIME identifier string.
    253     pub fn set_mime_str(&mut self, s: &str) {
    254         match Mime::from_str(s) {
    255             Ok(v) => {
    256                 self.set_mime(v);
    257             },
    258             Err(e) => {
    259                 panic!("invalid mime");
    260             },
    261         };
    262     }
    263 
    264     /// Returns the current [DCMetaData::mime](DCMetaData::mime) value.
    265     pub fn mime(&self) -> Option<Mime> {
    266         self.dc.mime.clone()
    267     }
    268 
    269     /// Set the current [DCMetaData::language](DCMetaData::language) value.
    270     pub fn set_language(&mut self, s: &str) {
    271         let v = s.parse().unwrap();
    272         self.dc.language = Some(v);
    273     }
    274 
    275     /// Returns the current [DCMetaData::language](DCMetaData::language) value.
    276     pub fn language(&self) -> Option<LanguageIdentifier> {
    277         self.dc.language.clone()
    278     }
    279 
    280     ///
    281     pub fn urn(&self) -> String {
    282         self.digest.urn()
    283     }
    284 
    285     ///
    286     pub fn fingerprint(&self) -> String {
    287         let digest_fingerprint = self.digest.fingerprint();
    288         return hex::encode(digest_fingerprint);
    289     }
    290 
    291     /// Instantiate metadata from the extended attributes of the file in `filepath`.
    292     pub fn from_xattr(filepath: &path::Path) -> Result<MetaData, ParseError> {
    293 
    294         let mut title: String = String::new();
    295         let mut author: String = String::new();
    296         let mut typ: EntryType = EntryType::Unknown(String::new());
    297         let filename: FileName; 
    298 
    299         debug!("Calculate digest for file {:?}",  &filepath);
    300         let digest = digest_sha512_from_path(filepath);
    301         debug!("Calculated digest {} for file {:?}", hex::encode(digest.fingerprint()), &filepath);
    302 
    303         filename = filepath.file_name()
    304             .unwrap()
    305             .to_os_string()
    306             .into_string()
    307             .unwrap();
    308 
    309         let title_src = match xattr::get(filepath, "user.dcterms:title") {
    310             Ok(v) => {
    311                 v
    312             },
    313             Err(e) => {
    314                 return Err(ParseError::new("title missing"));
    315             }
    316         };
    317         match title_src {
    318             Some(v) => {
    319                 let s = std::str::from_utf8(&v).unwrap();
    320                 title.push_str(s);
    321             },
    322             None => {},
    323         }
    324 
    325         let author_src = xattr::get(filepath, "user.dcterms:creator").unwrap();
    326         match author_src {
    327             Some(v) => {
    328                 let s = std::str::from_utf8(&v).unwrap();
    329                 author.push_str(s);
    330             },
    331             None => {},
    332         }
    333 
    334 
    335         let typ_src = xattr::get(filepath, "user.dcterms:type").unwrap();
    336         match typ_src {
    337             Some(v) => {
    338                 let s = std::str::from_utf8(&v).unwrap();
    339                 typ = EntryType::new(s);
    340             },
    341             None => {},
    342         }
    343 
    344         let mut metadata = MetaData::new(title.as_str(), author.as_str(), typ, digest, Some(filename));
    345         if !metadata.validate() {
    346             return Err(ParseError::new("invalid input"));
    347         }
    348 
    349         match xattr::get(filepath, "user.dcterms:subject") {
    350             Ok(v) => {
    351                 match v {
    352                     Some(v) => {
    353                         let s = std::str::from_utf8(&v).unwrap();
    354                         metadata.set_subject(s);
    355                     },
    356                     None => {},
    357                 }
    358             },
    359             _ => {},
    360         };
    361 
    362         match xattr::get(filepath, "user.dcterms:MediaType") {
    363             Ok(v) => {
    364                 match v {
    365                     Some(v) => {
    366                         let s = std::str::from_utf8(&v).unwrap();
    367                         metadata.set_mime_str(s);
    368                     },
    369                     None => {},
    370                 }
    371             },
    372             _ => {},
    373         }
    374 
    375         match xattr::get(filepath, "user.dcterms:language") {
    376             Ok(v) => {
    377                 match v {
    378                     Some(v) => {
    379                         let s = std::str::from_utf8(&v).unwrap();
    380                         metadata.set_language(s);
    381                     },
    382                     None => {},
    383                 }
    384             },
    385             _ => {},
    386         }
    387 
    388         #[cfg(feature = "magic")]
    389         metadata.set_mime_magic(filepath);
    390 
    391         Ok(metadata)
    392     }
    393 
    394 
    395     /// Applies the metadata as extended file attributes of the file in `filepath`.
    396     ///
    397     /// Will always export:
    398     ///
    399     /// * [title](DCMetaData::DC_XATTR_TITLE)
    400     /// * [creator](DCMetaData::DC_XATTR_CREATOR)
    401     /// * [category of file contents](DCMetaData::DC_XATTR_TYPE)
    402     ///
    403     /// Will export, if defined:
    404     ///
    405     /// * [language](DCMetaData::DC_XATTR_LANGUAGE)
    406     /// * [MIME type of file](DCMetaData::DC_XATTR_MEDIATYPE)
    407     /// * [A description of the subject matter of the file contents](DCMetaData::DC_XATTR_SUBJECT)
    408     pub fn to_xattr(&self, filepath: &path::Path) -> Result<(), std::io::Error> {
    409         let filename = filepath.file_name()
    410             .unwrap()
    411             .to_os_string()
    412             .into_string()
    413             .unwrap();
    414 
    415         xattr::set(filepath, DC_XATTR_TITLE, self.dc.title.as_bytes());
    416         xattr::set(filepath, DC_XATTR_CREATOR, self.dc.author.as_bytes());
    417         xattr::set(filepath, DC_XATTR_TYPE, self.dc.typ.to_string().as_bytes());
    418 
    419         match &self.dc.language {
    420             Some(v) => {
    421                 xattr::set(filepath, DC_XATTR_LANGUAGE, v.to_string().as_bytes());
    422             },
    423             _ => {},
    424         };
    425 
    426         match &self.dc.mime {
    427             Some(v) => {
    428                 xattr::set(filepath, DC_XATTR_MEDIATYPE, v.to_string().as_bytes());
    429             },
    430             _ => {},
    431         };
    432 
    433         match &self.dc.subject {
    434             Some(v) => {
    435                 xattr::set(filepath, DC_XATTR_SUBJECT, v.as_bytes());
    436             },
    437             _ => {},
    438         };
    439 
    440         Ok(())
    441     }
    442 
    443     fn process_predicate(&mut self, predicate: &str, object: &str) -> bool {
    444         match predicate.to_lowercase().as_str() {
    445             "title" => {
    446                 self.set_title(object);
    447                 debug!("found title: {}", object);
    448             },
    449             "author" => {
    450                 self.set_author(object);
    451                 debug!("found author: {}", object);
    452             },
    453             "subject" => {
    454                 self.set_subject(object);
    455                 debug!("found subject: {}", object);
    456             },
    457             "typ" => {
    458                 self.set_typ(object);
    459                 debug!("found typ: {}", object);
    460             },
    461             "language" => {
    462                 self.set_language(object);
    463                 debug!("found language: {}", object);
    464             },
    465             "mime" => {
    466                 self.set_mime_str(object);
    467                 debug!("found mime: {}", object);
    468             },
    469             _ => {
    470                 return false;
    471             },
    472         }
    473         true
    474     }
    475 
    476     fn process_line(&mut self, s: &str) {
    477         match s.split_once(":") {
    478             Some((predicate, object_raw)) => {
    479                 let object = object_raw.trim();
    480                 self.process_predicate(predicate, object);
    481             },
    482             None => {
    483             },
    484         }
    485     }
    486 
    487     #[cfg(feature = "magic")]
    488     /// Automatically detect media type of file in `path`.
    489     pub fn set_mime_magic(&mut self, path: &path::Path) {
    490         if self.mime() == None {
    491             let mime = tree_magic::from_filepath(path);
    492             self.set_mime_str(&mime);
    493             debug!("magic set mime {}", mime);
    494         }
    495     }
    496 
    497     /// Parse metadata from simplified metadata format contained in file in `path`.
    498     ///
    499     /// see [MetaData::from_file](MetaData::from_file)
    500     pub fn from_path(p: &path::Path) -> Result<MetaData, std::io::Error> {
    501         let f = File::open(&p).unwrap();
    502         debug!("openning {}", p.display());
    503         let mut m = MetaData::from_file(f).unwrap();
    504         Ok(m)
    505     }
    506 
    507     /// Parse metadata from simplified metadata format contained in the given file instance `f`.
    508     ///
    509     /// TODO: describe format.
    510     pub fn from_file(f: File) -> Result<MetaData, std::io::Error> {
    511         let mut m = MetaData::empty();
    512         //let f = File::open(path).unwrap();
    513         let mut fb = BufReader::new(f);
    514         loop {
    515             let mut s = String::new();
    516             match fb.read_line(&mut s) {
    517                 Ok(v) => {
    518                     if v == 0 {
    519                         break;
    520                     }
    521                     m.process_line(s.as_str());
    522                 },
    523                 Err(e) => {
    524                     return Err(e);
    525                 },
    526             }
    527         }
    528         Ok(m)
    529     }
    530 
    531 
    532     /// Check whether a Metadata instance represents a valid entry.
    533     pub fn validate(&self) -> bool {
    534         let empty = String::new();
    535         if self.title() == empty {
    536             return false;
    537         }
    538         if self.author() == empty {
    539             return false;
    540         }
    541         true
    542     }
    543 }
    544 
    545 impl fmt::Debug for MetaData {
    546     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
    547         write!(f, "{}", format_args!("title \"{}\" author \"{}\" digest {}", self.title(), self.author(), self.urn()))
    548     }
    549 }
    550 
    551 #[cfg(test)]
    552 mod tests {
    553     use super::MetaData;
    554     use std::path;
    555     use tempfile::NamedTempFile;
    556     use biblatex::EntryType;
    557     use std::fs::{
    558         File,
    559         write
    560     };
    561     use crate::digest;
    562     use env_logger;
    563     use crate::dc::{
    564         DC_XATTR_TITLE,
    565         DC_XATTR_CREATOR,
    566     };
    567 
    568     #[test]
    569     fn test_metadata_create() {
    570         let s = path::Path::new("testdata/bitcoin.pdf");
    571         let meta = MetaData::from_xattr(s).unwrap();
    572         assert_eq!(meta.dc.title, "Bitcoin: A Peer-to-Peer Electronic Cash System");
    573         assert_eq!(meta.dc.author, "Satoshi Nakamoto");
    574         assert_eq!(meta.urn(), String::from("sha512:2ac531ee521cf93f8419c2018f770fbb42c65396178e079a416e7038d3f9ab9fc2c35c4d838bc8b5dd68f4c13759fe9cdf90a46528412fefe1294cb26beabf4e"));
    575         assert_eq!(meta.fingerprint(), String::from("2ac531ee521cf93f8419c2018f770fbb42c65396178e079a416e7038d3f9ab9fc2c35c4d838bc8b5dd68f4c13759fe9cdf90a46528412fefe1294cb26beabf4e"));
    576     }
    577 
    578     #[test]
    579     fn test_metadata_set() {
    580         let digest_hex = "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e";
    581         let digest = hex::decode(&digest_hex).unwrap();
    582 
    583         let f = NamedTempFile::new_in(".").unwrap();
    584         let fp = f.path();
    585         let fps = String::from(fp.to_str().unwrap());
    586 
    587         let digest_sha = digest::from_vec(digest).unwrap();
    588         let mut m = MetaData::new("foo", "bar", EntryType::Article, digest_sha, Some(fps));
    589         m.set_subject("baz");
    590         m.set_mime_str("foo/bar");
    591         m.set_language("nb-NO");
    592         m.to_xattr(fp);
    593         
    594         let m_check = MetaData::from_xattr(fp).unwrap();
    595         assert_eq!(m_check.title(), "foo");
    596         assert_eq!(m_check.author(), "bar");
    597         assert_eq!(m_check.fingerprint(), digest_hex);
    598         assert_eq!(m_check.urn(), String::from("sha512:") + digest_hex);
    599         assert_eq!(m_check.typ(), EntryType::Article);
    600         assert_eq!(m_check.subject().unwrap(), "baz");
    601         assert_eq!(m_check.mime().unwrap(), "foo/bar");
    602         assert_eq!(m_check.language().unwrap(), "nb-NO");
    603     }
    604 
    605     #[test]
    606     fn test_metadata_file() {
    607         let f = File::open("testdata/meta.txt").unwrap();
    608         let m_check = MetaData::from_file(f).unwrap();
    609         assert_eq!(m_check.title(), "foo");
    610         assert_eq!(m_check.author(), "bar");
    611         assert_eq!(m_check.typ(), EntryType::Report);
    612         assert_eq!(m_check.subject().unwrap(), "baz");
    613         assert_eq!(m_check.mime().unwrap(), "text/plain");
    614         assert_eq!(m_check.language().unwrap(), "nb-NO");
    615     }
    616 
    617     #[test]
    618     fn test_metadata_xattr_magic() {
    619         let s = path::Path::new("testdata/bitcoin.pdf");
    620         let meta = MetaData::from_xattr(s).unwrap();
    621 
    622         #[cfg(feature = "magic")]
    623         {
    624             assert_eq!(meta.mime().unwrap(), "application/pdf");
    625             let f = NamedTempFile::new_in(".").unwrap();
    626             let fp = f.path();
    627             write(&f, &[0, 1, 2, 3]);
    628             xattr::set(fp, DC_XATTR_TITLE, "foo".as_bytes());
    629             xattr::set(fp, DC_XATTR_CREATOR, "bar".as_bytes());
    630             let meta_empty = MetaData::from_xattr(fp).unwrap();
    631             assert_eq!(meta_empty.mime().unwrap(), "application/octet-stream"); 
    632         }
    633     }
    634 }