main.rs (11115B)
1 use std::default; 2 use std::fs::{ 3 File, 4 create_dir_all, 5 metadata, 6 }; 7 use std::io::Write; 8 use std::path::{ 9 Path, 10 PathBuf, 11 }; 12 use std::str::FromStr; 13 use env_logger; 14 use clap::{ 15 App, 16 Arg, 17 ArgMatches, 18 SubCommand, 19 }; 20 use directories::{ 21 BaseDirs, 22 }; 23 use walkdir::WalkDir; 24 use hex; 25 use log::{ 26 debug, 27 info, 28 warn, 29 }; 30 31 use biblatex::EntryType; 32 use kitab::rdf::{ 33 read as rdf_read, 34 read_all as rdf_read_all, 35 write as rdf_write, 36 }; 37 use kitab::biblatex::{ 38 read_all as biblatex_read_all, 39 }; 40 use kitab::meta::{ 41 MetaData, 42 digests_from_path, 43 }; 44 use kitab::digest::from_urn; 45 use kitab::digest::RecordDigest; 46 use kitab::digest::DigestType; 47 48 49 fn args_setup() -> ArgMatches<'static> { 50 let mut o = App::new("kitab"); 51 o = o.version("0.0.1"); 52 o = o.author("Louis Holbrook <dev@holbrook.no>"); 53 54 o = o.arg(clap::Arg::with_name("store") 55 .short("s") 56 .long("store") 57 .value_name("Store location") 58 .takes_value(true) 59 ); 60 61 let mut o_import = ( 62 SubCommand::with_name("import") 63 .about("import information from file") 64 .version("0.0.1") 65 ); 66 o_import = o_import.arg( 67 Arg::with_name("setdigest") 68 .short("d") 69 .long("digest") 70 .help("Explicitly set digest") 71 .multiple(true) 72 .takes_value(true) 73 .number_of_values(1) 74 ); 75 o_import = o_import.arg( 76 Arg::with_name("PATH") 77 .help("Path to operate on") 78 .required(true) 79 ); 80 o = o.subcommand(o_import); 81 82 let mut o_apply = ( 83 SubCommand::with_name("apply") 84 .about("Apply metadata on matching files") 85 .version("0.0.1") 86 ); 87 o_apply = o_apply.arg( 88 Arg::with_name("PATH") 89 .help("Path to operate on") 90 .required(true) 91 .index(1) 92 ); 93 o_apply = o_apply.arg( 94 Arg::with_name("adddigest") 95 .short("d") 96 .long("digest") 97 .help("Additional digest to store") 98 .multiple(true) 99 .takes_value(true) 100 .number_of_values(1) 101 ); 102 o = o.subcommand(o_apply); 103 104 // let mut o_entry = ( 105 // SubCommand::with_name("new") 106 // .about("add metadata for file") 107 // .version("0.0.1") 108 // ); 109 // 110 // o_entry = o_entry.arg(clap::Arg::with_name("validators") 111 // .long("validator") 112 // .value_name("Add given validator engine") 113 // .multiple(true) 114 // .takes_value(true) 115 // ); 116 // 117 // o_entry = o_entry.arg( 118 // Arg::with_name("PATH") 119 // .help("Path to operate on") 120 // .required(true) 121 // .index(1) 122 // ); 123 // o = o.subcommand(o_entry); 124 125 o.get_matches() 126 } 127 128 // commands 129 // kitab import <file> - attempt in order import rdf, import spec 130 // kitab apply <path> - recursively 131 132 fn resolve_directory(args: &ArgMatches) -> PathBuf { 133 let r = match args.value_of("store") { 134 Some(v) => { 135 v 136 }, 137 _ => { 138 "" 139 }, 140 }; 141 if r.len() != 0 { 142 return PathBuf::from(r) 143 } 144 145 146 match BaseDirs::new() { 147 Some(v) => { 148 let d = v.data_dir(); 149 d.join("kitab") 150 .join("idx") 151 }, 152 _ => { 153 PathBuf::from(".") 154 .join(".kitab") 155 .join("/idx") 156 }, 157 } 158 } 159 160 fn str_to_path(args: &ArgMatches) -> PathBuf { 161 let mut p_canon: PathBuf; 162 match args.value_of("PATH") { 163 Some(v) => { 164 let p = &Path::new(v); 165 match p.canonicalize() { 166 Ok(v) => { 167 p_canon = v.clone(); 168 }, 169 Err(e) => { 170 panic!("path error: {}", e); 171 }, 172 }; 173 }, 174 None => { 175 panic!("path required"); 176 }, 177 } 178 p_canon 179 } 180 181 fn store(index_path: &Path, m: &MetaData) { 182 let fp = index_path.join(m.fingerprint()); 183 create_dir_all(&index_path); 184 debug!("writing record for title {} to {:?}", m.title(), &fp); 185 186 let ff = File::create(&fp).unwrap(); 187 rdf_write(&m, &ff).unwrap(); 188 debug!("stored as rdf {:?}", fp); 189 } 190 191 fn exec_import_xattr(f: &Path, index_path: &Path, digests: &Vec<RecordDigest>) -> bool { 192 let mut m = match MetaData::from_xattr(f) { 193 Ok(r) => { 194 r 195 } 196 Err(e) => { 197 return false; 198 } 199 }; 200 201 debug!("successfully processed xattr import source"); 202 203 info!("importing xattr source {:?}", &m); 204 205 let mut digest_types: Vec<DigestType> = vec!(); 206 207 for v in digests.iter() { 208 match v { 209 RecordDigest::EmptyWithType(digest_typ) => { 210 digest_types.push(*digest_typ); 211 }, 212 RecordDigest::Empty => { 213 digest_types.push(DigestType::Sha512); 214 }, 215 _ => { 216 warn!("digest specifier {:?} is invalid in xattr import context.", v); 217 }, 218 }; 219 } 220 221 for v in digests_from_path(f, &digest_types) { 222 m.set_fingerprint(v); 223 store(index_path, &m); 224 } 225 true 226 } 227 228 fn exec_import_rdf(f: &Path, index_path: &Path) -> bool { 229 let f = File::open(f).unwrap(); 230 let entries = match rdf_read_all(&f) { 231 Ok(v) => { 232 v 233 }, 234 Err(e) => { 235 return false; 236 } 237 }; 238 239 debug!("successfully processed rdf import source"); 240 241 for m in entries { 242 info!("importing rdf source {:?}", &m); 243 store(index_path, &m); 244 } 245 true 246 } 247 248 fn exec_import_biblatex(f: &Path, index_path: &Path, digests: &Vec<RecordDigest>) -> bool { 249 let f = File::open(f).unwrap(); 250 let entries = match biblatex_read_all(&f, digests) { 251 Ok(v) => { 252 v 253 }, 254 Err(e) => { 255 return false; 256 } 257 }; 258 259 debug!("successfully processed biblatex import source"); 260 261 for m in entries { 262 info!("importing biblatex source {:?}", &m); 263 store(index_path, &m); 264 } 265 266 true 267 } 268 269 fn exec_apply(p: &Path, index_path: &Path, mut extra_digest_types: Vec<DigestType>) -> bool { 270 let mut digest_types: Vec<DigestType> = vec!(DigestType::Sha512); 271 digest_types.append(&mut extra_digest_types); 272 for entry in WalkDir::new(&p) 273 .into_iter() 274 .filter_map(Result::ok) 275 .filter(|e| !e.file_type().is_dir()) { 276 let ep = entry.path(); 277 for digest in digests_from_path(ep, &digest_types) { 278 let z_hex = hex::encode(digest.fingerprint()); 279 280 let fp = index_path.join(&z_hex); 281 match fp.canonicalize() { 282 Ok(v) => { 283 let f = File::open(&v).unwrap(); 284 let m = rdf_read(f); 285 info!("apply {:?} -> {:?}", entry, &m); 286 m.to_xattr(&ep); 287 }, 288 Err(e) => { 289 debug!("metadata not found for {:?} -> {:?}", entry, z_hex); 290 }, 291 }; 292 } 293 } 294 true 295 } 296 297 fn exec_import(p: &Path, index_path: &Path, digests: Vec<RecordDigest>) { 298 for entry in WalkDir::new(&p) 299 .into_iter() 300 .filter_map(Result::ok) 301 .filter(|e| !e.file_type().is_dir()) { 302 303 let fp = entry.path(); 304 debug!("attempt xattr import {:?}", fp); 305 if exec_import_xattr(fp, index_path, &digests) { 306 continue; 307 } 308 309 let st = entry.metadata().unwrap(); 310 if st.len() > 1048576 { 311 warn!("skipping metadata content probe for file >1MB"); 312 continue; 313 } 314 315 debug!("attempt rdf import {:?}", fp); 316 if exec_import_rdf(fp, index_path) { 317 continue; 318 } 319 320 debug!("attempt biblatex import {:?}", fp); 321 if exec_import_biblatex(fp, index_path, &digests) { 322 continue; 323 } 324 } 325 } 326 327 fn exec_entry(p: &Path, index_path: &Path) -> bool { 328 if !p.is_file() { 329 return false; 330 } 331 true 332 } 333 334 fn main() { 335 env_logger::init(); 336 337 let args = args_setup(); 338 339 let index_dir = resolve_directory(&args); 340 info!("have index directory {:?}", &index_dir); 341 342 match args.subcommand_matches("import") { 343 Some(arg) => { 344 let p = str_to_path(&arg); 345 let mut digests: Vec<RecordDigest> = Vec::new(); 346 match arg.values_of("setdigest") { 347 Some(r) => { 348 for digest_str in r { 349 match from_urn(&digest_str) { 350 Ok(digest) => { 351 info!("using digest {}", digest_str); 352 digests.push(digest); 353 }, 354 Err(e) => { 355 let digest_type = match DigestType::from_str(digest_str) { 356 Ok(v) => { 357 v 358 }, 359 Err(e) => { 360 panic!("invalid digest specifier: {:?}", e); 361 }, 362 }; 363 let digest_empty = RecordDigest::EmptyWithType(digest_type); 364 digests.push(digest_empty); 365 }, 366 } 367 } 368 }, 369 None => {}, 370 }; 371 info!("import from path {:?}", &p); 372 return exec_import(&p, index_dir.as_path(), digests); 373 }, 374 _ => {}, 375 }; 376 377 378 let mut r = true; 379 match args.subcommand_matches("apply") { 380 Some(arg) => { 381 let p = str_to_path(&arg); 382 let mut digests: Vec<DigestType> = Vec::new(); 383 match arg.values_of("adddigest") { 384 Some(r) => { 385 for digest_str in r { 386 match DigestType::from_str(digest_str.clone()) { 387 Ok(digest) => { 388 info!("using digest type {}", digest_str); 389 digests.push(digest); 390 }, 391 Err(e) => { 392 panic!("invalid digest URN: {:?}", e); 393 }, 394 } 395 } 396 }, 397 None => {}, 398 }; 399 400 info!("apply from path {:?}", &p); 401 if !exec_apply(p.as_path(), index_dir.as_path(), digests) { 402 r = false; 403 } 404 }, 405 _ => {}, 406 } 407 408 // match args.subcommand_matches("new") { 409 // Some(v) => { 410 // let p = str_to_path(v); 411 // info!("new metadata for path {:?}", &p); 412 // if !exec_entry(p.as_path(), index_dir.as_path()) { 413 // r = false; 414 // } 415 // }, 416 // _ => {}, 417 // } 418 }