crier

RSS and Atom feed aggregator
Info | Log | Files | Refs | README

lib.rs (7287B)


      1 use std::collections::HashMap;
      2 use std::hash::Hasher;
      3 use std::hash::Hash;
      4 use std::iter::Iterator;
      5 use std::io::Write;
      6 use std::fmt::Debug;
      7 use std::io::BufWriter;
      8 use std::str::FromStr;
      9 use log::error;
     10 use uuid::Uuid;
     11 
     12 use rs_sha512::Sha512Hasher;
     13 use chrono::Local;
     14 use atom_syndication::Feed as Feed;
     15 use atom_syndication::Entry as Entry;
     16 use atom_syndication::TextType as OutTextType;
     17 use atom_syndication::Text as OutText;
     18 use atom_syndication::Content as OutContent;
     19 use atom_syndication::Person as OutPerson;
     20 use atom_syndication::Category as OutCategory;
     21 use atom_syndication::FixedDateTime;
     22 use atom_syndication::Person;
     23 use atom_syndication::Generator;
     24 use itertools::Itertools;
     25 
     26 
     27 pub mod io;
     28 pub mod mem;
     29 
     30 mod meta;
     31 mod cache;
     32 mod rss;
     33 use meta::FeedMetadata;
     34 use mem::CacheWriter;
     35 use cache::Cache;
     36 
     37 static NAMESPACE_URL_CRIER: &[u8] = b"defalsify.org/src/crier";
     38 
     39 #[derive(Debug)]
     40 pub enum Error {
     41     WriteError,
     42     CacheError,
     43     ParseError,
     44     IncompleteError,
     45 }
     46 
     47 pub struct Sequencer<'a> {
     48     metadata: FeedMetadata,
     49     pub items: HashMap<u64, Vec<u8>>,
     50     item_keys: Vec<u64>,
     51     crsr: usize,
     52     limit: usize,
     53     default_cache: CacheWriter, //HashMap<String, Vec<u8>>,
     54     cache: Option<&'a mut dyn Cache>,
     55     guuid: Uuid,
     56 }
     57 
     58 pub struct SequencerEntry {
     59     pub digest: u64,
     60     entry: Entry,
     61     out: Vec<u8>,
     62 }
     63 
     64 impl<'a> Sequencer<'a> {
     65     pub fn new(guuid_value: Vec<u8>) -> Sequencer<'a> {
     66         let namespace_crier = Uuid::new_v5(&Uuid::NAMESPACE_URL, NAMESPACE_URL_CRIER);
     67         let mut o = Sequencer {
     68             metadata: FeedMetadata::default(),
     69             items: HashMap::new(),
     70             crsr: 0,
     71             limit: 0,
     72             item_keys: Vec::new(),
     73             default_cache: CacheWriter::new(), //HashMap::new(),
     74             cache: None,
     75             guuid: Uuid::new_v5(&namespace_crier, guuid_value.as_ref()),
     76         };
     77 
     78         #[cfg(test)]
     79         o.metadata.force();
     80 
     81         o
     82     }
     83 
     84     pub fn with_cache(mut self, w: &'a mut impl Cache) -> Sequencer<'a> {
     85         self.cache = Some(w);
     86         return self;
     87     }
     88 
     89     pub fn set_author(&mut self, name: &str) -> bool {
     90         self.metadata.set_author(Person{
     91             name: String::from(name),
     92             email: None,
     93             uri: None,
     94         })
     95     }
     96 
     97     pub fn set_title(&mut self, title: &str) -> bool {
     98         self.metadata.set_title(String::from(title))
     99     }
    100 
    101     pub fn add(&mut self, entry: Entry) -> bool {
    102         let w: &mut dyn Write;
    103         let mut id: String;
    104 
    105         id = entry.id.to_string();
    106         match &mut self.cache {
    107             Some(v) => {
    108                 w = v.open(id);
    109             },
    110             None => {
    111                 w = &mut self.default_cache;
    112             },
    113         }
    114 
    115         id = entry.id.to_string();
    116         let o = SequencerEntry::new(entry, w);
    117         if self.items.contains_key(&o.digest) {
    118             return false;
    119         }
    120         self.items.insert(o.digest, o.into());
    121         match &mut self.cache {
    122             Some(v) => {
    123                 v.close(id);
    124             },
    125             None => {
    126             },
    127         }
    128         return true;
    129     }
    130 
    131     pub fn add_from(&mut self, feed: Feed) -> i64 {
    132         let mut c: i64;
    133 
    134         c = 0;
    135         for v in feed.entries.iter() {
    136             self.add(v.clone());
    137             c += 1;
    138         }
    139         c
    140     }
    141 
    142     pub fn write_to(&mut self, w: impl Write) -> Result<usize, Error> {
    143         let mut r: usize;
    144         let mut feed = Feed::default();
    145         let mut entry: Entry;
    146         let mut entries: Vec<Entry>;
    147         let mut b: &str;
    148         let id: String = self.guuid.into();
    149         feed.set_id(id);
    150         feed.set_updated(Local::now().to_utc());
    151 
    152         let g = Generator{
    153             value: String::from("Crier"),
    154             uri: Some(String::from(env!("CARGO_PKG_HOMEPAGE"))),
    155             version: Some(String::from(env!("CARGO_PKG_VERSION"))),
    156         };
    157         feed.set_generator(g);
    158 
    159         match self.metadata.apply(&mut feed) {
    160             Err(_v) => {
    161                 return Err(Error::WriteError);
    162             },
    163             Ok(_) => {
    164             },
    165         }
    166 
    167         entries = Vec::new();
    168         r = 0;
    169         for v in self {
    170             b = std::str::from_utf8(v.as_slice()).unwrap();
    171             match Entry::from_str(b) {
    172                 Err(e) => {
    173                     error!("fromstrerr {:?}", e);
    174                     return Err(Error::CacheError);
    175                 },
    176                 Ok(o) => {
    177                     entries.push(o);
    178                 },
    179             }
    180             r += 1;
    181         }
    182         feed.set_entries(entries);
    183 
    184         match feed.write_to(w) {
    185             Err(_v) => {
    186                 return Err(Error::WriteError);
    187             },
    188             Ok(_) => {
    189             },
    190         }
    191 
    192         Ok(r)
    193     }
    194 }
    195 
    196 impl<'a> Iterator for Sequencer<'a> {
    197     type Item = Vec<u8>;
    198 
    199     fn next(&mut self) -> Option<Self::Item> {
    200         let c: u64;
    201 
    202         if self.limit == 0 {
    203             self.item_keys = Vec::new();
    204             for k in  self.items.keys().sorted() {
    205                 self.item_keys.push(k.clone());
    206                 self.limit += 1;
    207             }
    208         }
    209 
    210         if self.limit == 0 {
    211             return None;
    212         }
    213 
    214         if self.crsr == self.limit {
    215             self.limit = 0;
    216             self.crsr = 0;
    217             return None;
    218         }
    219 
    220         c = self.item_keys[self.crsr];
    221         self.crsr += 1;
    222         return Some(self.items[&c].clone());
    223     }
    224 }
    225 
    226 impl SequencerEntry {
    227     pub fn new(entry: Entry, exporter: &mut dyn Write) -> SequencerEntry {
    228         let mut have_date: bool;
    229         let mut id_part: u32;
    230         let mut o = SequencerEntry {
    231             entry: entry,
    232             digest: 0,
    233             out: Vec::new(),
    234         };
    235 
    236         have_date = false;
    237         match &o.entry.published {
    238             Some(v) => {
    239                 id_part = v.timestamp() as u32;
    240                 o.digest = id_part as u64;
    241                 o.digest <<= 32;
    242                 have_date = true;
    243             },
    244             None => {
    245             },
    246         }
    247 
    248         if !have_date {
    249             id_part = o.entry.updated.timestamp() as u32;
    250             o.digest = id_part as u64;
    251             o.digest <<= 32;
    252             have_date = true;
    253         }
    254         
    255         let mut h = Sha512Hasher::default();
    256         o.hash(&mut h);
    257         id_part = h.finish() as u32;
    258         o.digest += id_part as u64;
    259         o
    260     }
    261 
    262     /// TODO: get size heuristics from already written values (either that or replace underlying
    263     /// in-memory writer implementation with something that doesnt wrap on flush.
    264     fn to_writer(&self, v: Vec<u8>) -> BufWriter<Vec<u8>> {
    265         BufWriter::with_capacity(10241024, v)
    266     }
    267 
    268 }
    269 
    270 /// TODO: split out field translations to separate module
    271 impl Into<Vec<u8>> for SequencerEntry {
    272     fn into(self) -> Vec<u8> {
    273         let mut out_entry: Entry;
    274         let mut b: Vec<u8>;
    275         let mut w: BufWriter<Vec<u8>>;
    276         let o: &SequencerEntry;
    277 
    278         o = &self;
    279         b = Vec::new();
    280         w = o.to_writer(b);
    281 
    282         w = self.entry.write_to(w).unwrap();
    283         b = Vec::from(w.buffer());
    284         b
    285     }
    286 }
    287 
    288 impl Hash for SequencerEntry {
    289     fn hash<H: Hasher>(&self, h: &mut H) {
    290             h.write(self.entry.id.as_bytes());
    291     }
    292 }
    293 
    294 #[cfg(test)]
    295 mod tests;