Skip to content

Commit

Permalink
Backport pull request #376
Browse files Browse the repository at this point in the history
support a standard api for parsing media types
  • Loading branch information
digitalresistor committed Oct 15, 2018
1 parent 4b4ef75 commit 70ab88b
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 54 deletions.
5 changes: 5 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ Feature

- Add Request.remote_host, exposing REMOTE_HOST environment variable.

- Added ``acceptparse.Accept.parse_offer`` to codify what types of offers
are compatible with ``acceptparse.AcceptValidHeader.acceptable_offers``,
``acceptparse.AcceptMissingHeader.acceptable_offers``, and
``acceptparse.AcceptInvalidHeader.acceptable_offers``.
See https://github.com/Pylons/webob/pull/376

1.8.2 (2018-06-05)
------------------
Expand Down
101 changes: 54 additions & 47 deletions src/webob/acceptparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
``Accept-Language``.
"""

from collections import namedtuple
import re
import textwrap
import warnings
Expand Down Expand Up @@ -74,6 +75,9 @@ def _list_1_or_more__compiled_re(element_re):
)


AcceptOffer = namedtuple('AcceptOffer', ['type', 'subtype', 'params'])


class Accept(object):
"""
Represent an ``Accept`` header.
Expand Down Expand Up @@ -408,25 +412,51 @@ def generator(value):
)
return generator(value=value)

def _parse_and_normalize_offers(self, offers):
@classmethod
def parse_offer(cls, offer):
"""
Throw out any offers that do not match the media type ABNF.
Parse an offer into its component parts.
:param offer: A media type or range in the format
``type/subtype[;params]``.
:return: A named tuple containing ``(*type*, *subtype*, *params*)``.
| *params* is a list containing ``(*parameter name*, *value*)``
values.
:raises ValueError: If the offer does not match the required format.
"""
match = cls.media_type_compiled_re.match(offer.lower())
if not match:
raise ValueError('Invalid value for an Accept offer.')

groups = match.groups()
offer_type, offer_subtype = groups[0].split('/')
offer_params = cls._parse_media_type_params(
media_type_params_segment=groups[1],
)
if offer_type == '*' or offer_subtype == '*':
raise ValueError('Invalid value for an Accept offer.')
return AcceptOffer(offer_type, offer_subtype, offer_params)

@classmethod
def _parse_and_normalize_offers(cls, offers):
"""
Throw out any offers that do not match the media range ABNF.
:return: A list of offers split into the format ``[offer_index,
offer_type_subtype, offer_media_type_params]``.
parsed_offer]``.
"""
lowercased_offers_parsed = []
parsed_offers = []
for index, offer in enumerate(offers):
match = self.media_type_compiled_re.match(offer.lower())
# we're willing to try to match any offer that matches the
# media type grammar can parse, but we'll throw out anything
# that doesn't fit the correct syntax - this is not saying that
# the media type is actually a real media type, just that it looks
# like one
if match:
lowercased_offers_parsed.append([index] + list(match.groups()))
return lowercased_offers_parsed
try:
parsed_offer = cls.parse_offer(offer)
except ValueError:
continue
parsed_offers.append([index, parsed_offer])
return parsed_offers


class AcceptValidHeader(Accept):
Expand Down Expand Up @@ -791,12 +821,8 @@ def acceptable_offers(self, offers):
This uses the matching rules described in :rfc:`RFC 7231, section 5.3.2
<7231#section-5.3.2>`.
Any offers that do not match the media type grammar will be ignored.
This function also supports media ranges (without media type
parameters) but without any specificity. An offered media range is
assigned the highest q-value of any media range from the header that
would match any media type that could be derived from the offer.
Any offers that cannot be parsed via
:meth:`.Accept.parse_offer` will be ignored.
:param offers: ``iterable`` of ``str`` media types (media types can
include media type parameters)
Expand All @@ -823,45 +849,25 @@ def acceptable_offers(self, offers):
lowercased_offers_parsed = self._parse_and_normalize_offers(offers)

acceptable_offers_n_quality_factors = {}
for (
offer_index, offer_type_subtype, offer_media_type_params
) in lowercased_offers_parsed:
for offer_index, parsed_offer in lowercased_offers_parsed:
offer = offers[offer_index]
offer_type, offer_subtype = offer_type_subtype.split('/', 1)
offer_media_type_params = self._parse_media_type_params(
media_type_params_segment=offer_media_type_params,
)
offer_is_range = '*' in offer
offer_type, offer_subtype, offer_media_type_params = parsed_offer
for (
range_type_subtype, range_qvalue, range_media_type_params, __,
) in lowercased_ranges:
range_type, range_subtype = range_type_subtype.split('/', 1)

# if a media range is supplied as an offer then specificity is
# unimportant, we'll just compare for match and use the
# highest matching qvalue
if offer_is_range:
if (
offer_type_subtype == '*/*'
or offer_type == range_type and offer_subtype == '*'
):
prev_match = acceptable_offers_n_quality_factors.get(offer)
if not prev_match or prev_match[0] < range_qvalue:
acceptable_offers_n_quality_factors[offer] = (
range_qvalue, # qvalue of matched range
offer_index,
4, # unused for offers that are media ranges
)
continue

# The specificity values below are based on the list in the
# example in RFC 7231 section 5.3.2 explaining how "media
# ranges can be overridden by more specific media ranges or
# specific media types". We assign specificity to the list
# items in reverse order, so specificity 4, 3, 2, 1 correspond
# to 1, 2, 3, 4 in the list, respectively (so that higher
# specificity has higher precedence).
elif offer_type_subtype == range_type_subtype:
if (
offer_type == range_type
and offer_subtype == range_subtype
):
if range_media_type_params == []:
# If offer_media_type_params == [], the offer and the
# range match exactly, with neither having media type
Expand Down Expand Up @@ -1280,7 +1286,8 @@ def acceptable_offers(self, offers):
"""
Return the offers that are acceptable according to the header.
Any offers that do not match the media type grammar will be ignored.
Any offers that cannot be parsed via
:meth:`.Accept.parse_offer` will be ignored.
:param offers: ``iterable`` of ``str`` media types (media types can
include media type parameters)
Expand All @@ -1292,7 +1299,7 @@ def acceptable_offers(self, offers):
"""
return [
(offers[offer_index], 1.0)
for offer_index, _, _
for offer_index, _
# avoid returning any offers that don't match the grammar so
# that the return values here are consistent with what would be
# returned in AcceptValidHeader
Expand Down
45 changes: 38 additions & 7 deletions tests/test_acceptparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,37 @@ def test_parse__valid_header(self, value, expected_list):
list_of_returned = list(returned)
assert list_of_returned == expected_list

@pytest.mark.parametrize('offer, expected_return', [
['text/html', ('text', 'html', [])],
[
'text/html;charset=utf8',
('text', 'html', [('charset', 'utf8')]),
],
[
'text/html;charset=utf8;x-version=1',
('text', 'html', [('charset', 'utf8'), ('x-version', '1')]),
],
])
def test_parse_offer__valid(self, offer, expected_return):
result = Accept.parse_offer(offer)
assert result == expected_return

@pytest.mark.parametrize('offer', [
'',
'foo',
'foo/bar/baz',
'*/plain',
'*/plain;charset=utf8',
'*/plain;charset=utf8;x-version=1',
'*/*;charset=utf8',
'text/*;charset=utf8',
'text/*',
'*/*',
])
def test_parse_offer__invalid(self, offer):
with pytest.raises(ValueError):
Accept.parse_offer(offer)


class TestAcceptValidHeader(object):
def test_parse__inherited(self):
Expand Down Expand Up @@ -1057,20 +1088,20 @@ def test_acceptable_offers__invalid_offers(
('text/plain', 0.3),
],
),
(
'text/*;q=0.3, text/html;q=0.5, text/html;level=1;q=0.7',
['*/*', 'text/*', 'text/html', 'image/*'],
[('*/*', 0.7), ('text/*', 0.7), ('text/html', 0.5)],
),
(
'text/*;q=0.3, text/html;q=0.5, text/html;level=1;q=0.7',
['text/*', '*/*', 'text/html', 'image/*'],
[('text/*', 0.7), ('*/*', 0.7), ('text/html', 0.5)],
[('text/html', 0.5)],
),
(
'text/html;level=1;q=0.7',
['text/*', '*/*', 'text/html', 'text/html;level=1', 'image/*'],
[('text/*', 0.7), ('*/*', 0.7), ('text/html;level=1', 0.7)],
[('text/html;level=1', 0.7)],
),
(
'*/*',
['text/*'],
[],
),
(
'',
Expand Down

0 comments on commit 70ab88b

Please sign in to comment.