rss.rs (6164B)
1 use std::path::Path; 2 use std::fs::File; 3 use std::io::BufReader; 4 use std::io::BufRead; 5 use crate::Error; 6 7 use log::info; 8 use log::debug; 9 use log::error; 10 11 use rss::Channel; 12 use rss::Item; 13 use rss::extension::dublincore::DublinCoreExtension; 14 use atom_syndication::Feed; 15 use atom_syndication::Entry; 16 use atom_syndication::Text; 17 use atom_syndication::TextType; 18 use atom_syndication::FixedDateTime; 19 use atom_syndication::Content; 20 use atom_syndication::Category; 21 use chrono::naive::NaiveDateTime; 22 use chrono::Local; 23 use chrono::offset::Utc; 24 25 /// try to coerce the item field into a valid date 26 fn parse_date(v: &String) -> Result<FixedDateTime, Error> { 27 match FixedDateTime::parse_from_rfc2822(v.as_str()) { 28 Ok(r) => { 29 return Ok(r); 30 }, 31 Err(e) => {}, 32 }; 33 match FixedDateTime::parse_from_rfc3339(v.as_str()) { 34 Ok(r) => { 35 return Ok(r); 36 }, 37 Err(e) => {}, 38 }; 39 match FixedDateTime::parse_from_str(v.as_str(), "%Y-%m-%dT%H:%M:%S") { 40 Ok(r) => { 41 return Ok(r); 42 }, 43 Err(e) => { 44 }, 45 }; 46 match NaiveDateTime::parse_from_str(v.as_str(), "%Y-%m-%dT%H:%M:%S") { 47 Ok(r) => { 48 return Ok(r.and_utc().fixed_offset()); 49 }, 50 Err(e) => { 51 }, 52 }; 53 54 55 Err(Error::ParseError) 56 } 57 58 /// try different item fields to determine the date 59 fn get_base_date(ipt: &Item) -> Result<FixedDateTime, Error> { 60 let mut ds = String::new(); 61 62 match &ipt.pub_date { 63 Some(v) => { 64 ds.push_str(v.as_str()); 65 }, 66 _ => {}, 67 }; 68 match parse_date(&ds) { 69 Ok(v) => { 70 return Ok(v); 71 }, 72 Err(e) => {}, 73 }; 74 75 match &ipt.dublin_core_ext { 76 Some(v) => { 77 for vv in v.dates() { 78 match parse_date(vv) { 79 Ok(vvv) => { 80 return Ok(vvv); 81 }, 82 Err(e) => { 83 debug!("no date"); 84 }, 85 } 86 } 87 }, 88 _ => {}, 89 } 90 91 Err(Error::IncompleteError) 92 } 93 94 /// coerce the rss item into an atom entry 95 fn translate_item(ipt: Item) -> Result<Entry, Error> { 96 let mut opt = Entry::default(); 97 98 match &ipt.title { 99 Some(v) => { 100 opt.set_title(Text::plain(v)); 101 }, 102 _ => {}, 103 }; 104 105 match get_base_date(&ipt) { 106 Ok(v) => { 107 opt.set_published(v.clone()); 108 opt.set_updated(v); 109 }, 110 Err(e) => { 111 return Err(e); 112 } 113 }; 114 115 match ipt.description { 116 Some(v) => { 117 opt.set_summary(Some(Text::xhtml(v))); 118 }, 119 _ => { 120 match ipt.content { 121 Some(v) => { 122 let mut r = Content::default(); 123 r.set_content_type(Some(String::from("text/html"))); 124 r.set_value(Some(v)); 125 match ipt.source { 126 Some(v) => { 127 r.set_src(v.url); 128 }, 129 _ => {}, 130 } 131 opt.set_content(Some(r)); 132 }, 133 _ => { 134 error!("have neither summary nor content"); 135 return Err(Error::IncompleteError); 136 }, 137 }; 138 }, 139 }; 140 141 match ipt.guid { 142 Some(v) => { 143 if v.is_permalink() { 144 opt.set_id(String::from(v.value())); 145 } 146 }, 147 _ => { 148 match ipt.link { 149 Some(v) => { 150 opt.set_id(v.clone()); 151 }, 152 _ => {}, 153 } 154 }, 155 }; 156 157 for v in ipt.categories { 158 let mut cat = Category::default(); 159 cat.set_term(String::from(v.name())); 160 cat.set_label(Some(v.name)); 161 match v.domain { 162 Some(v) => { 163 cat.set_scheme(Some(v)); 164 }, 165 _ => {}, 166 }; 167 opt.categories.push(cat); 168 } 169 170 Ok(opt) 171 } 172 173 174 fn translate(ipt: Channel, allow_fail: bool) -> Result<Feed, Error> { 175 let mut entries: Vec<Entry>; 176 let mut opt = Feed::default(); 177 178 opt.set_title(Text::plain(&ipt.title)); 179 180 opt.set_subtitle(Some(Text::plain(&ipt.description))); 181 182 entries = vec!(); 183 for v in ipt.into_items() { 184 match translate_item(v) { 185 Ok(v) => { 186 entries.push(v); 187 }, 188 Err(e) => { 189 if !allow_fail { 190 return Err(Error::IncompleteError); 191 } 192 }, 193 } 194 } 195 196 opt.set_entries(entries); 197 opt.set_updated(Local::now().to_utc()); 198 Ok(opt) 199 } 200 201 pub fn from_file(fp: &str, allow_entry_fail: bool) -> Result<Feed, Error> { 202 let mut o: Channel; 203 let r: Feed; 204 let p: &Path; 205 let mut f: File; 206 //let mut b: BufReader; // how to explicitly declare 207 208 p = Path::new(fp); 209 f = File::open(p).unwrap(); 210 let mut b = BufReader::new(f); 211 212 match Feed::read_from(b) { 213 Ok(v) => { 214 debug!("have atom feed"); 215 return Ok(v); 216 }, 217 Err(e) => {}, 218 }; 219 220 f = File::open(p).unwrap(); 221 b = BufReader::new(f); 222 223 match Channel::read_from(b) { 224 Ok(v) => { 225 debug!("have RSS feed"); 226 o = v; 227 }, 228 Err(e) => { 229 return Err(Error::ParseError); 230 }, 231 }; 232 o.set_dublin_core_ext(DublinCoreExtension::default()); 233 translate(o, allow_entry_fail) 234 } 235 236 mod test { 237 use std::path::Path; 238 use atom_syndication::Feed; 239 use env_logger; 240 241 #[test] 242 fn test_rss_from_file() { 243 env_logger::init(); 244 let mut r: Feed; 245 match super::from_file("testdata/test.rss.xml", false) { 246 Ok(v) => { 247 }, 248 Err(e) => { 249 panic!("{:?}", e); 250 }, 251 }; 252 match super::from_file("testdata/test.atom.xml", false) { 253 Ok(v) => { 254 }, 255 Err(e) => { 256 panic!("expected fail"); 257 }, 258 }; 259 } 260 }