kitab

Unnamed repository; edit this file 'description' to name the repository.
Info | Log | Files | Refs | LICENSE

main.rs (11115B)


      1 use std::default;
      2 use std::fs::{
      3     File,
      4     create_dir_all,
      5     metadata,
      6 };
      7 use std::io::Write;
      8 use std::path::{
      9     Path,
     10     PathBuf,
     11 };
     12 use std::str::FromStr;
     13 use env_logger;
     14 use clap::{
     15     App, 
     16     Arg,
     17     ArgMatches,
     18     SubCommand,
     19 };
     20 use directories::{
     21     BaseDirs,
     22 };
     23 use walkdir::WalkDir;
     24 use hex;
     25 use log::{
     26     debug,
     27     info,
     28     warn,
     29 };
     30 
     31 use biblatex::EntryType;
     32 use kitab::rdf::{
     33     read as rdf_read,
     34     read_all as rdf_read_all,
     35     write as rdf_write,
     36 };
     37 use kitab::biblatex::{
     38     read_all as biblatex_read_all,
     39 };
     40 use kitab::meta::{
     41     MetaData,
     42     digests_from_path,
     43 };
     44 use kitab::digest::from_urn;
     45 use kitab::digest::RecordDigest;
     46 use kitab::digest::DigestType;
     47 
     48 
     49 fn args_setup() -> ArgMatches<'static> {
     50     let mut o = App::new("kitab");
     51     o = o.version("0.0.1");
     52     o = o.author("Louis Holbrook <dev@holbrook.no>");
     53 
     54     o = o.arg(clap::Arg::with_name("store")
     55         .short("s")
     56         .long("store")
     57         .value_name("Store location")
     58         .takes_value(true)
     59         );
     60         
     61     let mut o_import = (
     62         SubCommand::with_name("import")
     63         .about("import information from file")
     64         .version("0.0.1")
     65         );
     66     o_import = o_import.arg(
     67         Arg::with_name("setdigest")
     68         .short("d")
     69         .long("digest")
     70         .help("Explicitly set digest")
     71         .multiple(true)
     72         .takes_value(true)
     73         .number_of_values(1)
     74         );
     75     o_import = o_import.arg(
     76         Arg::with_name("PATH")
     77         .help("Path to operate on")
     78         .required(true)
     79         );
     80     o = o.subcommand(o_import);
     81 
     82     let mut o_apply = (
     83         SubCommand::with_name("apply")
     84         .about("Apply metadata on matching files")
     85         .version("0.0.1")
     86         );
     87     o_apply = o_apply.arg(
     88         Arg::with_name("PATH")
     89         .help("Path to operate on")
     90         .required(true)
     91         .index(1)
     92         );
     93     o_apply = o_apply.arg(
     94         Arg::with_name("adddigest")
     95         .short("d")
     96         .long("digest")
     97         .help("Additional digest to store")
     98         .multiple(true)
     99         .takes_value(true)
    100         .number_of_values(1)
    101         );
    102     o = o.subcommand(o_apply);
    103 
    104 //    let mut o_entry = (
    105 //       SubCommand::with_name("new")
    106 //        .about("add metadata for file")
    107 //        .version("0.0.1")
    108 //        );
    109 //
    110 //    o_entry = o_entry.arg(clap::Arg::with_name("validators")
    111 //         .long("validator")
    112 //         .value_name("Add given validator engine")
    113 //         .multiple(true)
    114 //         .takes_value(true)
    115 //         );
    116 //
    117 //    o_entry = o_entry.arg(
    118 //        Arg::with_name("PATH")
    119 //        .help("Path to operate on")
    120 //        .required(true)
    121 //        .index(1)
    122 //        );
    123 //    o = o.subcommand(o_entry);
    124 
    125     o.get_matches()
    126 }
    127 
    128 // commands
    129 // kitab import <file> - attempt in order import rdf, import spec
    130 // kitab apply <path> - recursively 
    131 
    132     fn resolve_directory(args: &ArgMatches) -> PathBuf {
    133         let r = match args.value_of("store") {
    134             Some(v) => {
    135                 v
    136             },
    137             _ => {
    138                 ""
    139             },
    140         };
    141         if r.len() != 0 {
    142             return PathBuf::from(r)
    143         }
    144         
    145 
    146         match BaseDirs::new() {
    147             Some(v) => {
    148             let d = v.data_dir();
    149             d.join("kitab")
    150                     .join("idx")
    151             },
    152             _ => {
    153                 PathBuf::from(".")
    154                     .join(".kitab")
    155                     .join("/idx")
    156         },
    157     }
    158 }
    159 
    160 fn str_to_path(args: &ArgMatches) -> PathBuf {
    161     let mut p_canon: PathBuf;
    162     match args.value_of("PATH") {
    163         Some(v) => {
    164             let p = &Path::new(v);
    165             match p.canonicalize() {
    166                 Ok(v) => {
    167                     p_canon = v.clone();
    168                 },
    169                 Err(e) => {
    170                     panic!("path error: {}", e);
    171                 },
    172             };
    173         },
    174         None => {
    175             panic!("path required"); 
    176         },
    177     }
    178     p_canon
    179 }
    180 
    181 fn store(index_path: &Path, m: &MetaData) {
    182     let fp = index_path.join(m.fingerprint());
    183     create_dir_all(&index_path);
    184     debug!("writing record for title {} to {:?}", m.title(), &fp);
    185 
    186     let ff = File::create(&fp).unwrap();
    187     rdf_write(&m, &ff).unwrap();
    188     debug!("stored as rdf {:?}", fp);
    189 }
    190 
    191 fn exec_import_xattr(f: &Path, index_path: &Path, digests: &Vec<RecordDigest>) -> bool {
    192     let mut m = match MetaData::from_xattr(f) {
    193         Ok(r) => {
    194             r
    195         }
    196         Err(e) => {
    197             return false;
    198         }
    199     };
    200 
    201     debug!("successfully processed xattr import source");
    202 
    203     info!("importing xattr source {:?}", &m);
    204 
    205     let mut digest_types: Vec<DigestType> = vec!();
    206 
    207     for v in digests.iter() {
    208         match v {
    209             RecordDigest::EmptyWithType(digest_typ) => {
    210                 digest_types.push(*digest_typ);
    211             },
    212             RecordDigest::Empty => {
    213                 digest_types.push(DigestType::Sha512);
    214             },
    215             _ => {
    216                 warn!("digest specifier {:?} is invalid in xattr import context.", v);
    217             },
    218         };
    219     }
    220 
    221     for v in digests_from_path(f, &digest_types) {
    222         m.set_fingerprint(v);
    223         store(index_path, &m);
    224     }
    225     true
    226 }
    227 
    228 fn exec_import_rdf(f: &Path, index_path: &Path) -> bool {
    229     let f = File::open(f).unwrap();
    230     let entries = match rdf_read_all(&f) {
    231         Ok(v) => {
    232             v
    233         },
    234         Err(e) => {
    235             return false;
    236         }
    237     };
    238 
    239     debug!("successfully processed rdf import source");
    240 
    241     for m in entries {
    242         info!("importing rdf source {:?}", &m);
    243         store(index_path, &m);
    244     }
    245     true
    246 }
    247 
    248 fn exec_import_biblatex(f: &Path, index_path: &Path, digests: &Vec<RecordDigest>) -> bool {
    249     let f = File::open(f).unwrap();
    250     let entries = match biblatex_read_all(&f, digests) {
    251         Ok(v) => {
    252             v
    253         },
    254         Err(e) => {
    255             return false;
    256         }       
    257     };
    258 
    259     debug!("successfully processed biblatex import source");
    260 
    261     for m in entries {
    262         info!("importing biblatex source {:?}", &m);
    263         store(index_path, &m);
    264     }
    265 
    266     true
    267 }
    268 
    269 fn exec_apply(p: &Path, index_path: &Path, mut extra_digest_types: Vec<DigestType>) -> bool {
    270     let mut digest_types: Vec<DigestType> = vec!(DigestType::Sha512);
    271     digest_types.append(&mut extra_digest_types);
    272     for entry in WalkDir::new(&p)
    273         .into_iter()
    274         .filter_map(Result::ok)
    275         .filter(|e| !e.file_type().is_dir()) {
    276             let ep = entry.path();
    277             for digest in digests_from_path(ep, &digest_types) {
    278                 let z_hex = hex::encode(digest.fingerprint());
    279 
    280                 let fp = index_path.join(&z_hex);
    281                 match fp.canonicalize() {
    282                     Ok(v) => {
    283                         let f = File::open(&v).unwrap();
    284                         let m = rdf_read(f);
    285                         info!("apply {:?} -> {:?}", entry, &m);
    286                         m.to_xattr(&ep);
    287                     },
    288                     Err(e) => {
    289                         debug!("metadata not found for {:?} -> {:?}", entry, z_hex);
    290                     },
    291                 };
    292             }
    293     }
    294     true
    295 }
    296 
    297 fn exec_import(p: &Path, index_path: &Path, digests: Vec<RecordDigest>) {
    298     for entry in WalkDir::new(&p)
    299         .into_iter()
    300         .filter_map(Result::ok)
    301         .filter(|e| !e.file_type().is_dir()) {
    302 
    303         let fp = entry.path();
    304         debug!("attempt xattr import {:?}", fp);
    305         if exec_import_xattr(fp, index_path, &digests) {
    306             continue;
    307         }
    308 
    309         let st = entry.metadata().unwrap();
    310         if st.len() > 1048576 {
    311             warn!("skipping metadata content probe for file >1MB");
    312             continue;
    313         }
    314 
    315         debug!("attempt rdf import {:?}", fp);
    316         if exec_import_rdf(fp, index_path) { 
    317             continue;
    318         } 
    319 
    320         debug!("attempt biblatex import {:?}", fp);
    321         if exec_import_biblatex(fp, index_path, &digests) {
    322             continue;
    323         }
    324     }
    325 }
    326 
    327 fn exec_entry(p: &Path, index_path: &Path) -> bool {
    328     if !p.is_file() {
    329         return false; 
    330     }
    331     true
    332 }
    333 
    334 fn main() {
    335     env_logger::init();
    336 
    337     let args = args_setup();
    338 
    339     let index_dir = resolve_directory(&args);
    340     info!("have index directory {:?}", &index_dir);
    341    
    342     match args.subcommand_matches("import") {
    343         Some(arg) => {
    344             let p = str_to_path(&arg);
    345             let mut digests: Vec<RecordDigest> = Vec::new();
    346             match arg.values_of("setdigest") {
    347                 Some(r) => {
    348                     for digest_str in r {
    349                         match from_urn(&digest_str) {
    350                             Ok(digest) => {
    351                                 info!("using digest {}", digest_str);
    352                                 digests.push(digest);
    353                             },
    354                             Err(e) => {
    355                                 let digest_type = match DigestType::from_str(digest_str) {
    356                                     Ok(v) => {
    357                                         v
    358                                     },
    359                                     Err(e) => {
    360                                         panic!("invalid digest specifier: {:?}", e);
    361                                     },
    362                                 };
    363                                 let digest_empty = RecordDigest::EmptyWithType(digest_type);
    364                                 digests.push(digest_empty);
    365                             },
    366                         }
    367                     }
    368                 },
    369                 None => {},
    370             };
    371             info!("import from path {:?}", &p);
    372             return exec_import(&p, index_dir.as_path(), digests);
    373         },
    374         _ => {},
    375     };
    376 
    377 
    378     let mut r = true;
    379     match args.subcommand_matches("apply") {
    380         Some(arg) => {
    381             let p = str_to_path(&arg);
    382             let mut digests: Vec<DigestType> = Vec::new();
    383             match arg.values_of("adddigest") {
    384                 Some(r) => {
    385                     for digest_str in r {
    386                         match DigestType::from_str(digest_str.clone()) {
    387                             Ok(digest) => {
    388                                 info!("using digest type {}", digest_str);
    389                                 digests.push(digest);
    390                             },
    391                             Err(e) => {
    392                                 panic!("invalid digest URN: {:?}", e);
    393                             },
    394                         }
    395                     }
    396                 },
    397                 None => {},
    398             };
    399 
    400             info!("apply from path {:?}", &p);
    401             if !exec_apply(p.as_path(), index_dir.as_path(), digests) {
    402                 r = false; 
    403             }
    404         },
    405         _ => {},
    406     }
    407 
    408 //    match args.subcommand_matches("new") {
    409 //        Some(v) => {
    410 //            let p = str_to_path(v);
    411 //            info!("new metadata for path {:?}", &p);
    412 //            if !exec_entry(p.as_path(), index_dir.as_path()) {
    413 //                r = false; 
    414 //            }
    415 //        },
    416 //        _ => {},
    417 //    }
    418 }