From 91c0c03d58d26b775a6e9c634f482be18163b8ee Mon Sep 17 00:00:00 2001 From: Mark Pritchard Date: Sat, 27 Apr 2019 15:59:51 +1000 Subject: [PATCH] Parse via namespace and prefix Documents are free to define a local prefix for a given namespace so parsing should not hard-code a specific prefix (e.g. "dc" for Dublin Core). This commit records all configured namespaces during the initial parse and uses these namespaces to instantiate the appropriate extensions as they are encountered. --- src/channel.rs | 37 +++++++------- .../itunes/itunes_channel_extension.rs | 9 ++-- src/extension/itunes/mod.rs | 48 ++++++++---------- src/item.rs | 20 +++++--- tests/data/dublincore_altprefix.xml | 39 +++++++++++++++ tests/read.rs | 50 ++++++++++++------- 6 files changed, 129 insertions(+), 74 deletions(-) create mode 100644 tests/data/dublincore_altprefix.xml diff --git a/src/channel.rs b/src/channel.rs index cf278b57d8..5344ad0e73 100644 --- a/src/channel.rs +++ b/src/channel.rs @@ -955,15 +955,11 @@ impl Channel { if element.name() == b"rss" || element.name() == b"rdf:RDF" { for attr in element.attributes().with_checks(false) { if let Ok(attr) = attr { - if !attr.key.starts_with(b"xmlns:") || attr.key == b"xmlns:itunes" - || attr.key == b"xmlns:dc" - { - continue; + if attr.key.starts_with(b"xmlns:") { + let prefix = str::from_utf8(&attr.key[6..])?.to_string(); + let namespace = attr.unescape_and_decode_value(&reader)?; + namespaces.insert(prefix, namespace); } - - let key = str::from_utf8(&attr.key[6..])?.to_string(); - let value = attr.unescape_and_decode_value(&reader)?; - namespaces.insert(key, value); } } @@ -981,11 +977,11 @@ impl Channel { match reader.read_event(&mut buf)? { Event::Start(element) => match element.name() { b"channel" => { - let inner = Channel::from_xml(&mut reader, element.attributes())?; + let inner = Channel::from_xml(&namespaces, &mut reader, element.attributes())?; channel = Some(inner); } b"item" => { - let item = Item::from_xml(&mut reader, element.attributes())?; + let item = Item::from_xml(&namespaces, &mut reader, element.attributes())?; if items.is_none() { items = Some(Vec::new()); } @@ -1117,7 +1113,7 @@ impl ToString for Channel { } impl Channel { - pub fn from_xml(reader: &mut Reader, _: Attributes) -> Result { + pub fn from_xml(namespaces: &HashMap, reader: &mut Reader, _: Attributes) -> Result { let mut channel = Channel::default(); let mut buf = Vec::new(); let mut skip_buf = Vec::new(); @@ -1142,7 +1138,7 @@ impl Channel { channel.text_input = Some(text_input); } b"item" => { - let item = Item::from_xml(reader, element.attributes())?; + let item = Item::from_xml(&namespaces, reader, element.attributes())?; channel.items.push(item); } b"title" => { @@ -1229,12 +1225,17 @@ impl Channel { } if !channel.extensions.is_empty() { - if let Some(map) = channel.extensions.remove("itunes") { - channel.itunes_ext = Some(ITunesChannelExtension::from_map(map)?); - } - - if let Some(map) = channel.extensions.remove("dc") { - channel.dublin_core_ext = Some(DublinCoreExtension::from_map(map)); + // Process each of the namespaces we know (note that the values are not removed prior and reused to support pass-through of unknown extensions) + for (prefix, namespace) in namespaces { + match namespace.as_ref() { + "http://www.itunes.com/dtds/podcast-1.0.dtd" => { + channel.extensions.remove(prefix).map(|v| channel.itunes_ext = Some(ITunesChannelExtension::from_map(v))) + }, + "http://purl.org/dc/elements/1.1/" => { + channel.extensions.remove(prefix).map(|v| channel.dublin_core_ext = Some(DublinCoreExtension::from_map(v))) + }, + _ => None + }; } } diff --git a/src/extension/itunes/itunes_channel_extension.rs b/src/extension/itunes/itunes_channel_extension.rs index d4d415b97c..e8e460b7a2 100644 --- a/src/extension/itunes/itunes_channel_extension.rs +++ b/src/extension/itunes/itunes_channel_extension.rs @@ -13,7 +13,6 @@ use quick_xml::events::{BytesStart, Event}; use quick_xml::Writer; use super::{parse_categories, parse_image, parse_owner}; -use error::Error; use extension::Extension; use extension::itunes::{ITunesCategory, ITunesOwner}; use extension::util::remove_extension_value; @@ -436,20 +435,20 @@ impl ITunesChannelExtension { impl ITunesChannelExtension { /// Create an `ITunesChannelExtension` from a `HashMap`. - pub fn from_map(mut map: HashMap>) -> Result { + pub fn from_map(mut map: HashMap>) -> Self { let mut ext = ITunesChannelExtension::default(); ext.author = remove_extension_value(&mut map, "author"); ext.block = remove_extension_value(&mut map, "block"); - ext.categories = parse_categories(&mut map)?; + ext.categories = parse_categories(&mut map); ext.image = parse_image(&mut map); ext.explicit = remove_extension_value(&mut map, "explicit"); ext.complete = remove_extension_value(&mut map, "complete"); ext.new_feed_url = remove_extension_value(&mut map, "new-feed-url"); - ext.owner = parse_owner(&mut map)?; + ext.owner = parse_owner(&mut map); ext.subtitle = remove_extension_value(&mut map, "subtitle"); ext.summary = remove_extension_value(&mut map, "summary"); ext.keywords = remove_extension_value(&mut map, "keywords"); - Ok(ext) + ext } } diff --git a/src/extension/itunes/mod.rs b/src/extension/itunes/mod.rs index 4292300019..e722b48379 100644 --- a/src/extension/itunes/mod.rs +++ b/src/extension/itunes/mod.rs @@ -7,7 +7,6 @@ use std::collections::HashMap; -use error::Error; use extension::Extension; mod itunes_category; @@ -32,12 +31,10 @@ fn parse_image(map: &mut HashMap>) -> Option { element.attrs.remove("href") } -fn parse_categories( - map: &mut HashMap>, -) -> Result, Error> { +fn parse_categories(map: &mut HashMap>) -> Vec { let mut elements = match map.remove("category") { Some(elements) => elements, - None => return Ok(Vec::new()), + None => return Vec::new(), }; let mut categories = Vec::with_capacity(elements.len()); @@ -62,27 +59,26 @@ fn parse_categories( categories.push(category); } - Ok(categories) + categories } -fn parse_owner(map: &mut HashMap>) -> Result, Error> { - let mut element = match map.remove("owner").map(|mut v| v.remove(0)) { - Some(element) => element, - None => return Ok(None), - }; - - let name = element - .children - .remove("name") - .and_then(|mut v| v.remove(0).value); - - let email = element - .children - .remove("email") - .and_then(|mut v| v.remove(0).value); - - let mut owner = ITunesOwner::default(); - owner.set_name(name); - owner.set_email(email); - Ok(Some(owner)) +fn parse_owner(map: &mut HashMap>) -> Option { + if let Some(mut element) = map.remove("owner").map(|mut v| v.remove(0)) { + let name = element + .children + .remove("name") + .and_then(|mut v| v.remove(0).value); + + let email = element + .children + .remove("email") + .and_then(|mut v| v.remove(0).value); + + let mut owner = ITunesOwner::default(); + owner.set_name(name); + owner.set_email(email); + Some(owner) + } else { + None + } } diff --git a/src/item.rs b/src/item.rs index 53e8e7b691..e21640d3b3 100644 --- a/src/item.rs +++ b/src/item.rs @@ -24,6 +24,7 @@ use guid::Guid; use source::Source; use toxml::{ToXml, WriterExt}; use util::element_text; +use std::collections::HashMap; /// Represents an item in an RSS feed. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -536,7 +537,7 @@ impl Item { } impl Item { - pub fn from_xml(reader: &mut Reader, _: Attributes) -> Result { + pub fn from_xml(namespaces: &HashMap, reader: &mut Reader, _: Attributes) -> Result { let mut item = Item::default(); let mut buf = Vec::new(); @@ -588,12 +589,17 @@ impl Item { } if !item.extensions.is_empty() { - if let Some(map) = item.extensions.remove("itunes") { - item.itunes_ext = Some(ITunesItemExtension::from_map(map)); - } - - if let Some(map) = item.extensions.remove("dc") { - item.dublin_core_ext = Some(DublinCoreExtension::from_map(map)); + // Process each of the namespaces we know (note that the values are not removed prior and reused to support pass-through of unknown extensions) + for (prefix, namespace) in namespaces { + match namespace.as_ref() { + "http://www.itunes.com/dtds/podcast-1.0.dtd" => { + item.extensions.remove(prefix).map(|v| item.itunes_ext = Some(ITunesItemExtension::from_map(v))) + }, + "http://purl.org/dc/elements/1.1/" => { + item.extensions.remove(prefix).map(|v| item.dublin_core_ext = Some(DublinCoreExtension::from_map(v))) + }, + _ => None + }; } } diff --git a/tests/data/dublincore_altprefix.xml b/tests/data/dublincore_altprefix.xml new file mode 100644 index 0000000000..e5df8b0906 --- /dev/null +++ b/tests/data/dublincore_altprefix.xml @@ -0,0 +1,39 @@ + + + + Contributor 1 + Contributor 2 + Coverage + Creator + 2016-08-27 + Description + text/plain + Identifier + en-US + Publisher + Relation + Company + Source + Subject + Title + Type + + Contributor 1 + Contributor 2 + Coverage + Creator + 2016-08-27 + Description + text/plain + Identifier + en-US + Publisher + Relation + Company + Source + Subject + Title + Type + + + diff --git a/tests/read.rs b/tests/read.rs index 98ac321edb..e769d344dc 100644 --- a/tests/read.rs +++ b/tests/read.rs @@ -260,21 +260,21 @@ fn read_source() { assert_eq!( channel .items() - .get(0,) + .get(0) .unwrap() .source() .as_ref() - .map(|v| v.url(),), + .map(|v| v.url()), Some("http://example.com/feed/") ); assert_eq!( channel .items() - .get(0,) + .get(0) .unwrap() .source() .as_ref() - .and_then(|v| v.title(),), + .and_then(|v| v.title()), Some("Feed") ); } @@ -297,11 +297,11 @@ fn read_guid() { assert_eq!( channel .items() - .get(0,) + .get(0) .unwrap() .guid() .as_ref() - .map(|v| v.value(),), + .map(|v| v.value()), Some("abc") ); @@ -318,11 +318,11 @@ fn read_guid() { assert_eq!( channel .items() - .get(1,) + .get(1) .unwrap() .guid() .as_ref() - .map(|v| v.value(),), + .map(|v| v.value()), Some("def") ); } @@ -335,31 +335,31 @@ fn read_enclosure() { assert_eq!( channel .items() - .get(0,) + .get(0) .unwrap() .enclosure() .as_ref() - .map(|v| v.url(),), + .map(|v| v.url()), Some("http://example.com/media.mp3") ); assert_eq!( channel .items() - .get(0,) + .get(0) .unwrap() .enclosure() .as_ref() - .map(|v| v.length(),), + .map(|v| v.length()), Some("4992349") ); assert_eq!( channel .items() - .get(0,) + .get(0) .unwrap() .enclosure() .as_ref() - .map(|v| v.mime_type(),), + .map(|v| v.mime_type()), Some("audio/mpeg") ); } @@ -480,7 +480,11 @@ fn read_extension() { channel.namespaces().get("ext").unwrap(), "http://example.com/" ); - assert_eq!(channel.namespaces().len(), 1); + assert_eq!( + channel.namespaces().get("dc").unwrap(), + "http://purl.org/dc/elements/1.1/" + ); + assert_eq!(channel.namespaces().len(), 2); assert_eq!( get_extension_values( @@ -599,7 +603,7 @@ fn read_itunes() { .unwrap() .owner() .as_ref() - .and_then(|v| v.name(),), + .and_then(|v| v.name()), Some("Name") ); assert_eq!( @@ -608,7 +612,7 @@ fn read_itunes() { .unwrap() .owner() .as_ref() - .and_then(|v| v.email(),), + .and_then(|v| v.email()), Some("example@example.com") ); assert_eq!(channel.itunes_ext().unwrap().subtitle(), Some("Subtitle")); @@ -721,7 +725,16 @@ fn read_itunes() { #[test] fn read_dublincore() { - let input = include_str!("data/dublincore.xml"); + run_dublincore_test(include_str!("data/dublincore.xml")); +} + +#[test] +fn read_dublincore_altprefix() { + run_dublincore_test(include_str!("data/dublincore_altprefix.xml")); +} + +#[cfg(test)] +fn run_dublincore_test(input: &str) { let channel = input.parse::().expect("failed to parse xml"); fn test_ext(dc: &DublinCoreExtension) { @@ -814,6 +827,7 @@ fn read_dublincore() { .as_ref() .expect("dc extension missing"), ); + test_ext( channel .items()