Skip to content

Commit

Permalink
Parse via namespace and prefix
Browse files Browse the repository at this point in the history
Documents are free to define a local prefix for a given namespace so
parsing should not hard-code a specific prefix (e.g. "dc" for Dublin
Core).

This commit records all configured namespaces during the initial parse
and uses these namespaces to instantiate the appropriate extensions as
they are encountered.
  • Loading branch information
markpritchard committed Apr 27, 2019
1 parent 7f4c387 commit 91c0c03
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 74 deletions.
37 changes: 19 additions & 18 deletions src/channel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -955,15 +955,11 @@ impl Channel {
if element.name() == b"rss" || element.name() == b"rdf:RDF" {
for attr in element.attributes().with_checks(false) {
if let Ok(attr) = attr {
if !attr.key.starts_with(b"xmlns:") || attr.key == b"xmlns:itunes"
|| attr.key == b"xmlns:dc"
{
continue;
if attr.key.starts_with(b"xmlns:") {
let prefix = str::from_utf8(&attr.key[6..])?.to_string();
let namespace = attr.unescape_and_decode_value(&reader)?;
namespaces.insert(prefix, namespace);
}

let key = str::from_utf8(&attr.key[6..])?.to_string();
let value = attr.unescape_and_decode_value(&reader)?;
namespaces.insert(key, value);
}
}

Expand All @@ -981,11 +977,11 @@ impl Channel {
match reader.read_event(&mut buf)? {
Event::Start(element) => match element.name() {
b"channel" => {
let inner = Channel::from_xml(&mut reader, element.attributes())?;
let inner = Channel::from_xml(&namespaces, &mut reader, element.attributes())?;
channel = Some(inner);
}
b"item" => {
let item = Item::from_xml(&mut reader, element.attributes())?;
let item = Item::from_xml(&namespaces, &mut reader, element.attributes())?;
if items.is_none() {
items = Some(Vec::new());
}
Expand Down Expand Up @@ -1117,7 +1113,7 @@ impl ToString for Channel {
}

impl Channel {
pub fn from_xml<R: BufRead>(reader: &mut Reader<R>, _: Attributes) -> Result<Self, Error> {
pub fn from_xml<R: BufRead>(namespaces: &HashMap<String, String>, reader: &mut Reader<R>, _: Attributes) -> Result<Self, Error> {
let mut channel = Channel::default();
let mut buf = Vec::new();
let mut skip_buf = Vec::new();
Expand All @@ -1142,7 +1138,7 @@ impl Channel {
channel.text_input = Some(text_input);
}
b"item" => {
let item = Item::from_xml(reader, element.attributes())?;
let item = Item::from_xml(&namespaces, reader, element.attributes())?;
channel.items.push(item);
}
b"title" => {
Expand Down Expand Up @@ -1229,12 +1225,17 @@ impl Channel {
}

if !channel.extensions.is_empty() {
if let Some(map) = channel.extensions.remove("itunes") {
channel.itunes_ext = Some(ITunesChannelExtension::from_map(map)?);
}

if let Some(map) = channel.extensions.remove("dc") {
channel.dublin_core_ext = Some(DublinCoreExtension::from_map(map));
// Process each of the namespaces we know (note that the values are not removed prior and reused to support pass-through of unknown extensions)
for (prefix, namespace) in namespaces {
match namespace.as_ref() {
"http://www.itunes.com/dtds/podcast-1.0.dtd" => {
channel.extensions.remove(prefix).map(|v| channel.itunes_ext = Some(ITunesChannelExtension::from_map(v)))
},
"http://purl.org/dc/elements/1.1/" => {
channel.extensions.remove(prefix).map(|v| channel.dublin_core_ext = Some(DublinCoreExtension::from_map(v)))
},
_ => None
};
}
}

Expand Down
9 changes: 4 additions & 5 deletions src/extension/itunes/itunes_channel_extension.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ use quick_xml::events::{BytesStart, Event};
use quick_xml::Writer;

use super::{parse_categories, parse_image, parse_owner};
use error::Error;
use extension::Extension;
use extension::itunes::{ITunesCategory, ITunesOwner};
use extension::util::remove_extension_value;
Expand Down Expand Up @@ -436,20 +435,20 @@ impl ITunesChannelExtension {

impl ITunesChannelExtension {
/// Create an `ITunesChannelExtension` from a `HashMap`.
pub fn from_map(mut map: HashMap<String, Vec<Extension>>) -> Result<Self, Error> {
pub fn from_map(mut map: HashMap<String, Vec<Extension>>) -> Self {
let mut ext = ITunesChannelExtension::default();
ext.author = remove_extension_value(&mut map, "author");
ext.block = remove_extension_value(&mut map, "block");
ext.categories = parse_categories(&mut map)?;
ext.categories = parse_categories(&mut map);
ext.image = parse_image(&mut map);
ext.explicit = remove_extension_value(&mut map, "explicit");
ext.complete = remove_extension_value(&mut map, "complete");
ext.new_feed_url = remove_extension_value(&mut map, "new-feed-url");
ext.owner = parse_owner(&mut map)?;
ext.owner = parse_owner(&mut map);
ext.subtitle = remove_extension_value(&mut map, "subtitle");
ext.summary = remove_extension_value(&mut map, "summary");
ext.keywords = remove_extension_value(&mut map, "keywords");
Ok(ext)
ext
}
}

Expand Down
48 changes: 22 additions & 26 deletions src/extension/itunes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

use std::collections::HashMap;

use error::Error;
use extension::Extension;

mod itunes_category;
Expand All @@ -32,12 +31,10 @@ fn parse_image(map: &mut HashMap<String, Vec<Extension>>) -> Option<String> {
element.attrs.remove("href")
}

fn parse_categories(
map: &mut HashMap<String, Vec<Extension>>,
) -> Result<Vec<ITunesCategory>, Error> {
fn parse_categories(map: &mut HashMap<String, Vec<Extension>>) -> Vec<ITunesCategory> {
let mut elements = match map.remove("category") {
Some(elements) => elements,
None => return Ok(Vec::new()),
None => return Vec::new(),
};

let mut categories = Vec::with_capacity(elements.len());
Expand All @@ -62,27 +59,26 @@ fn parse_categories(
categories.push(category);
}

Ok(categories)
categories
}

fn parse_owner(map: &mut HashMap<String, Vec<Extension>>) -> Result<Option<ITunesOwner>, Error> {
let mut element = match map.remove("owner").map(|mut v| v.remove(0)) {
Some(element) => element,
None => return Ok(None),
};

let name = element
.children
.remove("name")
.and_then(|mut v| v.remove(0).value);

let email = element
.children
.remove("email")
.and_then(|mut v| v.remove(0).value);

let mut owner = ITunesOwner::default();
owner.set_name(name);
owner.set_email(email);
Ok(Some(owner))
fn parse_owner(map: &mut HashMap<String, Vec<Extension>>) -> Option<ITunesOwner> {
if let Some(mut element) = map.remove("owner").map(|mut v| v.remove(0)) {
let name = element
.children
.remove("name")
.and_then(|mut v| v.remove(0).value);

let email = element
.children
.remove("email")
.and_then(|mut v| v.remove(0).value);

let mut owner = ITunesOwner::default();
owner.set_name(name);
owner.set_email(email);
Some(owner)
} else {
None
}
}
20 changes: 13 additions & 7 deletions src/item.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use guid::Guid;
use source::Source;
use toxml::{ToXml, WriterExt};
use util::element_text;
use std::collections::HashMap;

/// Represents an item in an RSS feed.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
Expand Down Expand Up @@ -536,7 +537,7 @@ impl Item {
}

impl Item {
pub fn from_xml<R: BufRead>(reader: &mut Reader<R>, _: Attributes) -> Result<Self, Error> {
pub fn from_xml<R: BufRead>(namespaces: &HashMap<String, String>, reader: &mut Reader<R>, _: Attributes) -> Result<Self, Error> {
let mut item = Item::default();
let mut buf = Vec::new();

Expand Down Expand Up @@ -588,12 +589,17 @@ impl Item {
}

if !item.extensions.is_empty() {
if let Some(map) = item.extensions.remove("itunes") {
item.itunes_ext = Some(ITunesItemExtension::from_map(map));
}

if let Some(map) = item.extensions.remove("dc") {
item.dublin_core_ext = Some(DublinCoreExtension::from_map(map));
// Process each of the namespaces we know (note that the values are not removed prior and reused to support pass-through of unknown extensions)
for (prefix, namespace) in namespaces {
match namespace.as_ref() {
"http://www.itunes.com/dtds/podcast-1.0.dtd" => {
item.extensions.remove(prefix).map(|v| item.itunes_ext = Some(ITunesItemExtension::from_map(v)))
},
"http://purl.org/dc/elements/1.1/" => {
item.extensions.remove(prefix).map(|v| item.dublin_core_ext = Some(DublinCoreExtension::from_map(v)))
},
_ => None
};
}
}

Expand Down
39 changes: 39 additions & 0 deletions tests/data/dublincore_altprefix.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dublincore="http://purl.org/dc/elements/1.1/">
<channel>
<dublincore:contributor>Contributor 1</dublincore:contributor>
<dublincore:contributor>Contributor 2</dublincore:contributor>
<dublincore:coverage>Coverage</dublincore:coverage>
<dublincore:creator>Creator</dublincore:creator>
<dublincore:date>2016-08-27</dublincore:date>
<dublincore:description>Description</dublincore:description>
<dublincore:format>text/plain</dublincore:format>
<dublincore:identifier>Identifier</dublincore:identifier>
<dublincore:language>en-US</dublincore:language>
<dublincore:publisher>Publisher</dublincore:publisher>
<dublincore:relation>Relation</dublincore:relation>
<dublincore:rights>Company</dublincore:rights>
<dublincore:source>Source</dublincore:source>
<dublincore:subject>Subject</dublincore:subject>
<dublincore:title>Title</dublincore:title>
<dublincore:type>Type</dublincore:type>
<item>
<dublincore:contributor>Contributor 1</dublincore:contributor>
<dublincore:contributor>Contributor 2</dublincore:contributor>
<dublincore:coverage>Coverage</dublincore:coverage>
<dublincore:creator>Creator</dublincore:creator>
<dublincore:date>2016-08-27</dublincore:date>
<dublincore:description>Description</dublincore:description>
<dublincore:format>text/plain</dublincore:format>
<dublincore:identifier>Identifier</dublincore:identifier>
<dublincore:language>en-US</dublincore:language>
<dublincore:publisher>Publisher</dublincore:publisher>
<dublincore:relation>Relation</dublincore:relation>
<dublincore:rights>Company</dublincore:rights>
<dublincore:source>Source</dublincore:source>
<dublincore:subject>Subject</dublincore:subject>
<dublincore:title>Title</dublincore:title>
<dublincore:type>Type</dublincore:type>
</item>
</channel>
</rss>
Loading

0 comments on commit 91c0c03

Please sign in to comment.