crier

RSS and Atom feed aggregator
Info | Log | Files | Refs | README

rss.rs (6164B)


      1 use std::path::Path;
      2 use std::fs::File;
      3 use std::io::BufReader;
      4 use std::io::BufRead;
      5 use crate::Error;
      6 
      7 use log::info;
      8 use log::debug;
      9 use log::error;
     10 
     11 use rss::Channel;
     12 use rss::Item;
     13 use rss::extension::dublincore::DublinCoreExtension;
     14 use atom_syndication::Feed;
     15 use atom_syndication::Entry;
     16 use atom_syndication::Text;
     17 use atom_syndication::TextType;
     18 use atom_syndication::FixedDateTime;
     19 use atom_syndication::Content;
     20 use atom_syndication::Category;
     21 use chrono::naive::NaiveDateTime;
     22 use chrono::Local;
     23 use chrono::offset::Utc;
     24 
     25 /// try to coerce the item field into a valid date
     26 fn parse_date(v: &String) -> Result<FixedDateTime, Error> {
     27     match FixedDateTime::parse_from_rfc2822(v.as_str()) {
     28         Ok(r) => {
     29             return Ok(r);
     30         },
     31         Err(e) => {},
     32     };
     33     match FixedDateTime::parse_from_rfc3339(v.as_str()) {
     34         Ok(r) => {
     35             return Ok(r);
     36         },
     37         Err(e) => {},
     38     };
     39     match FixedDateTime::parse_from_str(v.as_str(), "%Y-%m-%dT%H:%M:%S") {
     40         Ok(r) => {
     41             return Ok(r);
     42         },
     43         Err(e) => {
     44         },
     45     };
     46     match NaiveDateTime::parse_from_str(v.as_str(), "%Y-%m-%dT%H:%M:%S") {
     47         Ok(r) => {
     48             return Ok(r.and_utc().fixed_offset());
     49         },
     50         Err(e) => {
     51         },
     52     };
     53 
     54 
     55     Err(Error::ParseError)
     56 }
     57 
     58 /// try different item fields to determine the date
     59 fn get_base_date(ipt: &Item) -> Result<FixedDateTime, Error> {
     60     let mut ds = String::new();
     61 
     62     match &ipt.pub_date {
     63         Some(v) => {
     64             ds.push_str(v.as_str());
     65         },
     66         _ => {},
     67     };
     68     match parse_date(&ds) {
     69         Ok(v) => {
     70             return Ok(v);
     71         },
     72         Err(e) => {},
     73     };
     74 
     75     match &ipt.dublin_core_ext {
     76         Some(v) => {
     77             for vv in v.dates() {
     78                 match parse_date(vv) {
     79                     Ok(vvv) => {
     80                         return Ok(vvv);
     81                     },
     82                     Err(e) => {
     83                         debug!("no date");
     84                     },
     85                 }
     86             }
     87         },
     88         _ => {},
     89     }
     90 
     91     Err(Error::IncompleteError)
     92 }
     93 
     94 /// coerce the rss item into an atom entry
     95 fn translate_item(ipt: Item) -> Result<Entry, Error> {
     96     let mut opt = Entry::default();
     97 
     98     match &ipt.title {
     99         Some(v) => {
    100             opt.set_title(Text::plain(v));
    101         },
    102         _ => {},
    103     };
    104 
    105     match get_base_date(&ipt) {
    106         Ok(v) => {
    107             opt.set_published(v.clone());
    108             opt.set_updated(v);
    109         },
    110         Err(e) => {
    111             return Err(e);
    112         }
    113     };
    114    
    115     match ipt.description {
    116         Some(v) => {
    117             opt.set_summary(Some(Text::xhtml(v)));
    118         },
    119         _ => {
    120             match ipt.content {
    121                 Some(v) => {
    122                     let mut r = Content::default();
    123                     r.set_content_type(Some(String::from("text/html")));
    124                     r.set_value(Some(v));
    125                     match ipt.source {
    126                         Some(v) => {
    127                             r.set_src(v.url);
    128                         },
    129                         _ => {},
    130                     }
    131                     opt.set_content(Some(r));
    132                 },
    133                 _ => {
    134                     error!("have neither summary nor content");
    135                     return Err(Error::IncompleteError);
    136                 },
    137             };
    138         },
    139     };
    140 
    141     match ipt.guid {
    142         Some(v) => {
    143             if v.is_permalink() {
    144                 opt.set_id(String::from(v.value()));
    145             }
    146         },
    147         _ => {
    148             match ipt.link {
    149                 Some(v) => {
    150                     opt.set_id(v.clone());
    151                 },
    152                 _ => {},
    153             }
    154         },
    155     };
    156 
    157     for v in ipt.categories {
    158         let mut cat = Category::default();
    159         cat.set_term(String::from(v.name()));
    160         cat.set_label(Some(v.name));
    161         match v.domain {
    162             Some(v) => {
    163                 cat.set_scheme(Some(v));
    164             },
    165             _ => {},
    166         };
    167         opt.categories.push(cat);
    168     }
    169 
    170     Ok(opt)
    171 }
    172 
    173 
    174 fn translate(ipt: Channel, allow_fail: bool) -> Result<Feed, Error> {
    175     let mut entries: Vec<Entry>;
    176     let mut opt = Feed::default();
    177     
    178     opt.set_title(Text::plain(&ipt.title));
    179 
    180     opt.set_subtitle(Some(Text::plain(&ipt.description)));
    181 
    182     entries = vec!();
    183     for v in ipt.into_items() {
    184         match translate_item(v) {
    185             Ok(v) => {
    186                 entries.push(v);
    187             },
    188             Err(e) => {
    189                 if !allow_fail {
    190                     return Err(Error::IncompleteError);
    191                 }
    192             },
    193         }
    194     }
    195 
    196     opt.set_entries(entries);
    197     opt.set_updated(Local::now().to_utc());
    198     Ok(opt)
    199 }
    200 
    201 pub fn from_file(fp: &str, allow_entry_fail: bool) -> Result<Feed, Error> {
    202     let mut o: Channel;
    203     let r: Feed;
    204     let p: &Path; 
    205     let mut f: File;
    206     //let mut b: BufReader; // how to explicitly declare 
    207 
    208     p = Path::new(fp);
    209     f = File::open(p).unwrap();
    210     let mut b = BufReader::new(f);
    211 
    212     match Feed::read_from(b) {
    213         Ok(v) => {
    214             debug!("have atom feed");
    215             return Ok(v);
    216         },
    217         Err(e) => {},
    218     };
    219 
    220     f = File::open(p).unwrap();
    221     b = BufReader::new(f);
    222 
    223     match Channel::read_from(b) {
    224         Ok(v) => {
    225             debug!("have RSS feed");
    226             o = v;
    227         },
    228         Err(e) => {
    229             return Err(Error::ParseError);
    230         },
    231     };
    232     o.set_dublin_core_ext(DublinCoreExtension::default());
    233     translate(o, allow_entry_fail)
    234 }
    235 
    236 mod test {
    237     use std::path::Path;
    238     use atom_syndication::Feed;
    239     use env_logger;
    240 
    241     #[test]
    242     fn test_rss_from_file() {
    243         env_logger::init();
    244         let mut r: Feed;
    245         match super::from_file("testdata/test.rss.xml", false) {
    246             Ok(v) => {
    247             },
    248             Err(e) => {
    249                 panic!("{:?}", e);
    250             },
    251         };
    252         match super::from_file("testdata/test.atom.xml", false) {
    253             Ok(v) => {
    254             },
    255             Err(e) => {
    256                 panic!("expected fail");
    257             },
    258         };
    259     }
    260 }