Backport pull request #376

support a standard api for parsing media types
Pylons · Oct 15, 2018 · 70ab88b · 70ab88b
1 parent 4b4ef75
commit 70ab88b
Show file tree

Hide file tree

Showing 3 changed files with 97 additions and 54 deletions.
diff --git a/CHANGES.txt b/CHANGES.txt
@@ -16,6 +16,11 @@ Feature
 
 - Add Request.remote_host, exposing REMOTE_HOST environment variable.
 
+- Added ``acceptparse.Accept.parse_offer`` to codify what types of offers
+  are compatible with ``acceptparse.AcceptValidHeader.acceptable_offers``,
+  ``acceptparse.AcceptMissingHeader.acceptable_offers``, and
+  ``acceptparse.AcceptInvalidHeader.acceptable_offers``.
+  See https://github.com/Pylons/webob/pull/376
 
 1.8.2 (2018-06-05)
 ------------------

diff --git a/src/webob/acceptparse.py b/src/webob/acceptparse.py
@@ -5,6 +5,7 @@
 ``Accept-Language``.
 """
 
+from collections import namedtuple
 import re
 import textwrap
 import warnings
@@ -74,6 +75,9 @@ def _list_1_or_more__compiled_re(element_re):
     )
 
 
+AcceptOffer = namedtuple('AcceptOffer', ['type', 'subtype', 'params'])
+
+
 class Accept(object):
     """
     Represent an ``Accept`` header.
@@ -408,25 +412,51 @@ def generator(value):
                 )
         return generator(value=value)
 
-    def _parse_and_normalize_offers(self, offers):
+    @classmethod
+    def parse_offer(cls, offer):
         """
-        Throw out any offers that do not match the media type ABNF.
+        Parse an offer into its component parts.
+
+        :param offer: A media type or range in the format
+                      ``type/subtype[;params]``.
+        :return: A named tuple containing ``(*type*, *subtype*, *params*)``.
+
+                 | *params* is a list containing ``(*parameter name*, *value*)``
+                   values.
+
+        :raises ValueError: If the offer does not match the required format.
+
+        """
+        match = cls.media_type_compiled_re.match(offer.lower())
+        if not match:
+            raise ValueError('Invalid value for an Accept offer.')
+
+        groups = match.groups()
+        offer_type, offer_subtype = groups[0].split('/')
+        offer_params = cls._parse_media_type_params(
+            media_type_params_segment=groups[1],
+        )
+        if offer_type == '*' or offer_subtype == '*':
+            raise ValueError('Invalid value for an Accept offer.')
+        return AcceptOffer(offer_type, offer_subtype, offer_params)
+
+    @classmethod
+    def _parse_and_normalize_offers(cls, offers):
+        """
+        Throw out any offers that do not match the media range ABNF.
 
         :return: A list of offers split into the format ``[offer_index,
-                 offer_type_subtype, offer_media_type_params]``.
+                 parsed_offer]``.
 
         """
-        lowercased_offers_parsed = []
+        parsed_offers = []
         for index, offer in enumerate(offers):
-            match = self.media_type_compiled_re.match(offer.lower())
-            # we're willing to try to match any offer that matches the
-            # media type grammar can parse, but we'll throw out anything
-            # that doesn't fit the correct syntax - this is not saying that
-            # the media type is actually a real media type, just that it looks
-            # like one
-            if match:
-                lowercased_offers_parsed.append([index] + list(match.groups()))
-        return lowercased_offers_parsed
+            try:
+                parsed_offer = cls.parse_offer(offer)
+            except ValueError:
+                continue
+            parsed_offers.append([index, parsed_offer])
+        return parsed_offers
 
 
 class AcceptValidHeader(Accept):
@@ -791,12 +821,8 @@ def acceptable_offers(self, offers):
         This uses the matching rules described in :rfc:`RFC 7231, section 5.3.2
         <7231#section-5.3.2>`.
 
-        Any offers that do not match the media type grammar will be ignored.
-
-        This function also supports media ranges (without media type
-        parameters) but without any specificity. An offered media range is
-        assigned the highest q-value of any media range from the header that
-        would match any media type that could be derived from the offer.
+        Any offers that cannot be parsed via
+        :meth:`.Accept.parse_offer` will be ignored.
 
         :param offers: ``iterable`` of ``str`` media types (media types can
                        include media type parameters)
@@ -823,45 +849,25 @@ def acceptable_offers(self, offers):
         lowercased_offers_parsed = self._parse_and_normalize_offers(offers)
 
         acceptable_offers_n_quality_factors = {}
-        for (
-            offer_index, offer_type_subtype, offer_media_type_params
-        ) in lowercased_offers_parsed:
+        for offer_index, parsed_offer in lowercased_offers_parsed:
             offer = offers[offer_index]
-            offer_type, offer_subtype = offer_type_subtype.split('/', 1)
-            offer_media_type_params = self._parse_media_type_params(
-                media_type_params_segment=offer_media_type_params,
-            )
-            offer_is_range = '*' in offer
+            offer_type, offer_subtype, offer_media_type_params = parsed_offer
             for (
                 range_type_subtype, range_qvalue, range_media_type_params, __,
             ) in lowercased_ranges:
                 range_type, range_subtype = range_type_subtype.split('/', 1)
 
-                # if a media range is supplied as an offer then specificity is
-                # unimportant, we'll just compare for match and use the
-                # highest matching qvalue
-                if offer_is_range:
-                    if (
-                        offer_type_subtype == '*/*'
-                        or offer_type == range_type and offer_subtype == '*'
-                    ):
-                        prev_match = acceptable_offers_n_quality_factors.get(offer)
-                        if not prev_match or prev_match[0] < range_qvalue:
-                            acceptable_offers_n_quality_factors[offer] = (
-                                range_qvalue,  # qvalue of matched range
-                                offer_index,
-                                4,  # unused for offers that are media ranges
-                            )
-                    continue
-
                 # The specificity values below are based on the list in the
                 # example in RFC 7231 section 5.3.2 explaining how "media
                 # ranges can be overridden by more specific media ranges or
                 # specific media types". We assign specificity to the list
                 # items in reverse order, so specificity 4, 3, 2, 1 correspond
                 # to 1, 2, 3, 4 in the list, respectively (so that higher
                 # specificity has higher precedence).
-                elif offer_type_subtype == range_type_subtype:
+                if (
+                    offer_type == range_type
+                    and offer_subtype == range_subtype
+                ):
                     if range_media_type_params == []:
                         # If offer_media_type_params == [], the offer and the
                         # range match exactly, with neither having media type
@@ -1280,7 +1286,8 @@ def acceptable_offers(self, offers):
         """
         Return the offers that are acceptable according to the header.
 
-        Any offers that do not match the media type grammar will be ignored.
+        Any offers that cannot be parsed via
+        :meth:`.Accept.parse_offer` will be ignored.
 
         :param offers: ``iterable`` of ``str`` media types (media types can
                        include media type parameters)
@@ -1292,7 +1299,7 @@ def acceptable_offers(self, offers):
         """
         return [
             (offers[offer_index], 1.0)
-            for offer_index, _, _
+            for offer_index, _
             # avoid returning any offers that don't match the grammar so
             # that the return values here are consistent with what would be
             # returned in AcceptValidHeader

diff --git a/tests/test_acceptparse.py b/tests/test_acceptparse.py
@@ -382,6 +382,37 @@ def test_parse__valid_header(self, value, expected_list):
         list_of_returned = list(returned)
         assert list_of_returned == expected_list
 
+    @pytest.mark.parametrize('offer, expected_return', [
+        ['text/html', ('text', 'html', [])],
+        [
+            'text/html;charset=utf8',
+            ('text', 'html', [('charset', 'utf8')]),
+        ],
+        [
+            'text/html;charset=utf8;x-version=1',
+            ('text', 'html', [('charset', 'utf8'), ('x-version', '1')]),
+        ],
+    ])
+    def test_parse_offer__valid(self, offer, expected_return):
+        result = Accept.parse_offer(offer)
+        assert result == expected_return
+
+    @pytest.mark.parametrize('offer', [
+        '',
+        'foo',
+        'foo/bar/baz',
+        '*/plain',
+        '*/plain;charset=utf8',
+        '*/plain;charset=utf8;x-version=1',
+        '*/*;charset=utf8',
+        'text/*;charset=utf8',
+        'text/*',
+        '*/*',
+    ])
+    def test_parse_offer__invalid(self, offer):
+        with pytest.raises(ValueError):
+            Accept.parse_offer(offer)
+
 
 class TestAcceptValidHeader(object):
     def test_parse__inherited(self):
@@ -1057,20 +1088,20 @@ def test_acceptable_offers__invalid_offers(
                 ('text/plain', 0.3),
             ],
         ),
-        (
-            'text/*;q=0.3, text/html;q=0.5, text/html;level=1;q=0.7',
-            ['*/*', 'text/*', 'text/html', 'image/*'],
-            [('*/*', 0.7), ('text/*', 0.7), ('text/html', 0.5)],
-        ),
         (
             'text/*;q=0.3, text/html;q=0.5, text/html;level=1;q=0.7',
             ['text/*', '*/*', 'text/html', 'image/*'],
-            [('text/*', 0.7), ('*/*', 0.7), ('text/html', 0.5)],
+            [('text/html', 0.5)],
         ),
         (
             'text/html;level=1;q=0.7',
             ['text/*', '*/*', 'text/html', 'text/html;level=1', 'image/*'],
-            [('text/*', 0.7), ('*/*', 0.7), ('text/html;level=1', 0.7)],
+            [('text/html;level=1', 0.7)],
+        ),
+        (
+            '*/*',
+            ['text/*'],
+            [],
         ),
         (
             '',