From 70ab88b76fedb7ab726326c9391d55c09fe1e655 Mon Sep 17 00:00:00 2001 From: Bert JW Regeer Date: Tue, 9 Oct 2018 21:31:31 -0600 Subject: [PATCH] Backport pull request #376 support a standard api for parsing media types --- CHANGES.txt | 5 ++ src/webob/acceptparse.py | 101 ++++++++++++++++++++------------------ tests/test_acceptparse.py | 45 ++++++++++++++--- 3 files changed, 97 insertions(+), 54 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index b38c6c04..efcc839c 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -16,6 +16,11 @@ Feature - Add Request.remote_host, exposing REMOTE_HOST environment variable. +- Added ``acceptparse.Accept.parse_offer`` to codify what types of offers + are compatible with ``acceptparse.AcceptValidHeader.acceptable_offers``, + ``acceptparse.AcceptMissingHeader.acceptable_offers``, and + ``acceptparse.AcceptInvalidHeader.acceptable_offers``. + See https://github.com/Pylons/webob/pull/376 1.8.2 (2018-06-05) ------------------ diff --git a/src/webob/acceptparse.py b/src/webob/acceptparse.py index 8f496cbd..99e6f30c 100644 --- a/src/webob/acceptparse.py +++ b/src/webob/acceptparse.py @@ -5,6 +5,7 @@ ``Accept-Language``. """ +from collections import namedtuple import re import textwrap import warnings @@ -74,6 +75,9 @@ def _list_1_or_more__compiled_re(element_re): ) +AcceptOffer = namedtuple('AcceptOffer', ['type', 'subtype', 'params']) + + class Accept(object): """ Represent an ``Accept`` header. @@ -408,25 +412,51 @@ def generator(value): ) return generator(value=value) - def _parse_and_normalize_offers(self, offers): + @classmethod + def parse_offer(cls, offer): """ - Throw out any offers that do not match the media type ABNF. + Parse an offer into its component parts. + + :param offer: A media type or range in the format + ``type/subtype[;params]``. + :return: A named tuple containing ``(*type*, *subtype*, *params*)``. + + | *params* is a list containing ``(*parameter name*, *value*)`` + values. + + :raises ValueError: If the offer does not match the required format. + + """ + match = cls.media_type_compiled_re.match(offer.lower()) + if not match: + raise ValueError('Invalid value for an Accept offer.') + + groups = match.groups() + offer_type, offer_subtype = groups[0].split('/') + offer_params = cls._parse_media_type_params( + media_type_params_segment=groups[1], + ) + if offer_type == '*' or offer_subtype == '*': + raise ValueError('Invalid value for an Accept offer.') + return AcceptOffer(offer_type, offer_subtype, offer_params) + + @classmethod + def _parse_and_normalize_offers(cls, offers): + """ + Throw out any offers that do not match the media range ABNF. :return: A list of offers split into the format ``[offer_index, - offer_type_subtype, offer_media_type_params]``. + parsed_offer]``. """ - lowercased_offers_parsed = [] + parsed_offers = [] for index, offer in enumerate(offers): - match = self.media_type_compiled_re.match(offer.lower()) - # we're willing to try to match any offer that matches the - # media type grammar can parse, but we'll throw out anything - # that doesn't fit the correct syntax - this is not saying that - # the media type is actually a real media type, just that it looks - # like one - if match: - lowercased_offers_parsed.append([index] + list(match.groups())) - return lowercased_offers_parsed + try: + parsed_offer = cls.parse_offer(offer) + except ValueError: + continue + parsed_offers.append([index, parsed_offer]) + return parsed_offers class AcceptValidHeader(Accept): @@ -791,12 +821,8 @@ def acceptable_offers(self, offers): This uses the matching rules described in :rfc:`RFC 7231, section 5.3.2 <7231#section-5.3.2>`. - Any offers that do not match the media type grammar will be ignored. - - This function also supports media ranges (without media type - parameters) but without any specificity. An offered media range is - assigned the highest q-value of any media range from the header that - would match any media type that could be derived from the offer. + Any offers that cannot be parsed via + :meth:`.Accept.parse_offer` will be ignored. :param offers: ``iterable`` of ``str`` media types (media types can include media type parameters) @@ -823,37 +849,14 @@ def acceptable_offers(self, offers): lowercased_offers_parsed = self._parse_and_normalize_offers(offers) acceptable_offers_n_quality_factors = {} - for ( - offer_index, offer_type_subtype, offer_media_type_params - ) in lowercased_offers_parsed: + for offer_index, parsed_offer in lowercased_offers_parsed: offer = offers[offer_index] - offer_type, offer_subtype = offer_type_subtype.split('/', 1) - offer_media_type_params = self._parse_media_type_params( - media_type_params_segment=offer_media_type_params, - ) - offer_is_range = '*' in offer + offer_type, offer_subtype, offer_media_type_params = parsed_offer for ( range_type_subtype, range_qvalue, range_media_type_params, __, ) in lowercased_ranges: range_type, range_subtype = range_type_subtype.split('/', 1) - # if a media range is supplied as an offer then specificity is - # unimportant, we'll just compare for match and use the - # highest matching qvalue - if offer_is_range: - if ( - offer_type_subtype == '*/*' - or offer_type == range_type and offer_subtype == '*' - ): - prev_match = acceptable_offers_n_quality_factors.get(offer) - if not prev_match or prev_match[0] < range_qvalue: - acceptable_offers_n_quality_factors[offer] = ( - range_qvalue, # qvalue of matched range - offer_index, - 4, # unused for offers that are media ranges - ) - continue - # The specificity values below are based on the list in the # example in RFC 7231 section 5.3.2 explaining how "media # ranges can be overridden by more specific media ranges or @@ -861,7 +864,10 @@ def acceptable_offers(self, offers): # items in reverse order, so specificity 4, 3, 2, 1 correspond # to 1, 2, 3, 4 in the list, respectively (so that higher # specificity has higher precedence). - elif offer_type_subtype == range_type_subtype: + if ( + offer_type == range_type + and offer_subtype == range_subtype + ): if range_media_type_params == []: # If offer_media_type_params == [], the offer and the # range match exactly, with neither having media type @@ -1280,7 +1286,8 @@ def acceptable_offers(self, offers): """ Return the offers that are acceptable according to the header. - Any offers that do not match the media type grammar will be ignored. + Any offers that cannot be parsed via + :meth:`.Accept.parse_offer` will be ignored. :param offers: ``iterable`` of ``str`` media types (media types can include media type parameters) @@ -1292,7 +1299,7 @@ def acceptable_offers(self, offers): """ return [ (offers[offer_index], 1.0) - for offer_index, _, _ + for offer_index, _ # avoid returning any offers that don't match the grammar so # that the return values here are consistent with what would be # returned in AcceptValidHeader diff --git a/tests/test_acceptparse.py b/tests/test_acceptparse.py index c8d15817..eae65ef4 100644 --- a/tests/test_acceptparse.py +++ b/tests/test_acceptparse.py @@ -382,6 +382,37 @@ def test_parse__valid_header(self, value, expected_list): list_of_returned = list(returned) assert list_of_returned == expected_list + @pytest.mark.parametrize('offer, expected_return', [ + ['text/html', ('text', 'html', [])], + [ + 'text/html;charset=utf8', + ('text', 'html', [('charset', 'utf8')]), + ], + [ + 'text/html;charset=utf8;x-version=1', + ('text', 'html', [('charset', 'utf8'), ('x-version', '1')]), + ], + ]) + def test_parse_offer__valid(self, offer, expected_return): + result = Accept.parse_offer(offer) + assert result == expected_return + + @pytest.mark.parametrize('offer', [ + '', + 'foo', + 'foo/bar/baz', + '*/plain', + '*/plain;charset=utf8', + '*/plain;charset=utf8;x-version=1', + '*/*;charset=utf8', + 'text/*;charset=utf8', + 'text/*', + '*/*', + ]) + def test_parse_offer__invalid(self, offer): + with pytest.raises(ValueError): + Accept.parse_offer(offer) + class TestAcceptValidHeader(object): def test_parse__inherited(self): @@ -1057,20 +1088,20 @@ def test_acceptable_offers__invalid_offers( ('text/plain', 0.3), ], ), - ( - 'text/*;q=0.3, text/html;q=0.5, text/html;level=1;q=0.7', - ['*/*', 'text/*', 'text/html', 'image/*'], - [('*/*', 0.7), ('text/*', 0.7), ('text/html', 0.5)], - ), ( 'text/*;q=0.3, text/html;q=0.5, text/html;level=1;q=0.7', ['text/*', '*/*', 'text/html', 'image/*'], - [('text/*', 0.7), ('*/*', 0.7), ('text/html', 0.5)], + [('text/html', 0.5)], ), ( 'text/html;level=1;q=0.7', ['text/*', '*/*', 'text/html', 'text/html;level=1', 'image/*'], - [('text/*', 0.7), ('*/*', 0.7), ('text/html;level=1', 0.7)], + [('text/html;level=1', 0.7)], + ), + ( + '*/*', + ['text/*'], + [], ), ( '',