Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cookie filter #799

Closed
wants to merge 28 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
e84757a
Implemented simple sending cookie domain filter
panda73111 Feb 22, 2016
67f278d
Added tests for simple sending cookie domain filter
panda73111 Feb 22, 2016
eb8ff5c
Fixed flake8 errors
panda73111 Feb 22, 2016
1def5b0
Using ipaddress module to determine if a netloc is an IP address
panda73111 Feb 22, 2016
fbe82c4
Added workaround somewhere else commented "use dict method because Si…
panda73111 Feb 22, 2016
4bd4a4e
Fixed errors with ports in URLs by using hostname instead of netloc; …
panda73111 Feb 23, 2016
87bad1e
Fixed not accepting cookies from IPs
panda73111 Feb 23, 2016
3088e09
Removed impossible condition
panda73111 Feb 23, 2016
c7a9efe
Changed an obsolete test, where explicit cookie sharing is now required
panda73111 Feb 23, 2016
8720b8e
Added tests for filtering of received cookies
panda73111 Feb 23, 2016
f8812d5
Implemented host-only-flag
panda73111 Feb 23, 2016
90bb61b
Refactoring; Moved new code over to the new class helpers.SessionCook…
panda73111 Feb 24, 2016
7a19859
Split the tests into testing calls to SessionCookieStore and the filt…
panda73111 Feb 24, 2016
5d5dd5e
Modified test_cookie_store_usage, now using mock.patch
panda73111 Feb 29, 2016
0d3c2be
Renamed SessionCookieStore to CookieJar
panda73111 Feb 29, 2016
1d28248
Implemented filtering by secure-flag
panda73111 Feb 29, 2016
a648d14
Added test for secure-flag
panda73111 Feb 29, 2016
bb559cd
Added rudimentary test for is_ip_address()
panda73111 Feb 29, 2016
cdf703f
Implemented filtering by path-attribute
panda73111 Mar 18, 2016
1a409ea
Added tests for path-attribute
panda73111 Mar 18, 2016
3be84c2
Added test for path-attributes of received cookies
panda73111 Mar 18, 2016
4959234
Using TestCase's assertEqual()
panda73111 Mar 18, 2016
2d21ff4
Implemented expires- and max-age-attribute
panda73111 Mar 19, 2016
2832ea4
Added tests for expires- and max-age-attribute
panda73111 Mar 19, 2016
1cfb2c6
Passing the ClientSession's loop through to its CookieJar
panda73111 Mar 19, 2016
7917132
Added more tests
panda73111 Apr 13, 2016
f8c0e9a
Merge remote-tracking branch 'origin/master' into cookie_filter
panda73111 Apr 24, 2016
f6a5da4
Fixed flake8 "Missing whitespace around operator"
panda73111 Apr 24, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 11 additions & 20 deletions aiohttp/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
import sys
import traceback
import warnings
import http.cookies
import urllib.parse

from multidict import MultiDictProxy, MultiDict, CIMultiDict, upstr

import aiohttp
from .client_reqrep import ClientRequest, ClientResponse
from .errors import WSServerHandshakeError
from .helpers import CookieJar
from .websocket import WS_KEY, WebSocketParser, WebSocketWriter
from .websocket_client import ClientWebSocketResponse
from . import hdrs
Expand Down Expand Up @@ -52,13 +52,13 @@ def __init__(self, *, connector=None, loop=None, cookies=None,
if loop.get_debug():
self._source_traceback = traceback.extract_stack(sys._getframe(1))

self._cookies = http.cookies.SimpleCookie()
self._cookie_jar = CookieJar(loop=loop)

# For Backward compatability with `share_cookies` connectors
if connector._share_cookies:
self._update_cookies(connector.cookies)
self._cookie_jar.update_cookies(connector.cookies)
if cookies is not None:
self._update_cookies(cookies)
self._cookie_jar.update_cookies(cookies)
self._connector = connector
self._default_auth = auth
self._version = version
Expand Down Expand Up @@ -173,10 +173,13 @@ def _request(self, method, url, *,
skip_headers.add(upstr(i))

while True:

cookies = self._cookie_jar.filter_cookies(url)

req = self._request_class(
method, url, params=params, headers=headers,
skip_auto_headers=skip_headers, data=data,
cookies=self.cookies, encoding=encoding,
cookies=cookies, encoding=encoding,
auth=auth, version=version, compress=compress, chunked=chunked,
expect100=expect100,
loop=self._loop, response_class=self._response_class)
Expand All @@ -196,7 +199,8 @@ def _request(self, method, url, *,
except OSError as exc:
raise aiohttp.ClientOSError(*exc.args) from exc

self._update_cookies(resp.cookies)
self._cookie_jar.update_cookies(resp.cookies, resp.url)

# For Backward compatability with `share_cookie` connectors
if self._connector._share_cookies:
self._connector.update_cookies(resp.cookies)
Expand Down Expand Up @@ -345,19 +349,6 @@ def _ws_connect(self, url, *,
autoping,
self._loop)

def _update_cookies(self, cookies):
"""Update shared cookies."""
if isinstance(cookies, dict):
cookies = cookies.items()

for name, value in cookies:
if isinstance(value, http.cookies.Morsel):
# use dict method because SimpleCookie class modifies value
# before Python 3.4
dict.__setitem__(self.cookies, name, value)
else:
self.cookies[name] = value

def _prepare_headers(self, headers):
""" Add default headers and transform it to CIMultiDict
"""
Expand Down Expand Up @@ -451,7 +442,7 @@ def connector(self):
@property
def cookies(self):
"""The session cookies."""
return self._cookies
return self._cookie_jar.cookies

@property
def version(self):
Expand Down
266 changes: 265 additions & 1 deletion aiohttp/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
import datetime
import functools
import io
import ipaddress
import os
import re
from urllib.parse import quote, urlencode
from urllib.parse import quote, urlencode, urlsplit
from http.cookies import SimpleCookie, Morsel
from collections import namedtuple
from pathlib import Path

Expand Down Expand Up @@ -451,6 +453,14 @@ def requote_uri(uri):
return quote(uri, safe=safe_without_percent)


def is_ip_address(hostname):
try:
ipaddress.ip_address(hostname)
except ValueError:
return False
return True


class Timeout:
"""Timeout context manager.

Expand Down Expand Up @@ -496,3 +506,257 @@ def __exit__(self, exc_type, exc_val, exc_tb):

def _cancel_task(self):
self._cancelled = self._task.cancel()


class CookieJar:
"""Implements cookie storage adhering to RFC 6265."""

DATE_TOKENS_RE = re.compile(
"[\x09\x20-\x2F\x3B-\x40\x5B-\x60\x7B-\x7E]*"
"(?P<token>[\x00-\x08\x0A-\x1F\d:a-zA-Z\x7F-\xFF]+)")

DATE_HMS_TIME_RE = re.compile("(\d{1,2}):(\d{1,2}):(\d{1,2})")

DATE_DAY_OF_MONTH_RE = re.compile("(\d{1,2})")

DATE_MONTH_RE = re.compile(
"(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)", re.I)

DATE_YEAR_RE = re.compile("(\d{2,4})")

def __init__(self, cookies=None, loop=None):
self._cookies = SimpleCookie()
self._loop = loop or asyncio.get_event_loop()
self._host_only_cookies = set()

if cookies is not None:
self.update_cookies(cookies)

@property
def cookies(self):
"""The session cookies."""
return self._cookies

def _expire_cookie(self, name):
if name in self._cookies:
del self._cookies[name]

def update_cookies(self, cookies, response_url=None):
"""Update cookies."""
url_parsed = urlsplit(response_url or "")
hostname = url_parsed.hostname

if is_ip_address(hostname):
# Don't accept cookies from IPs
return

if isinstance(cookies, dict):
cookies = cookies.items()

for name, value in cookies:
if isinstance(value, Morsel):

if not self._add_morsel(name, value, hostname):
continue

else:
self._cookies[name] = value

cookie = self._cookies[name]

if not cookie["domain"] and hostname is not None:
# Set the cookie's domain to the response hostname
# and set its host-only-flag
self._host_only_cookies.add(name)
cookie["domain"] = hostname

if not cookie["path"] or not cookie["path"].startswith("/"):
# Set the cookie's path to the response path
path = url_parsed.path
if not path.startswith("/"):
path = "/"
else:
# Cut everything from the last slash to the end
path = "/" + path[1:path.rfind("/")]
cookie["path"] = path

max_age = cookie["max-age"]
if max_age:
try:
delta_seconds = int(max_age)
self._loop.call_later(
delta_seconds, self._expire_cookie, name)
except ValueError:
cookie["max-age"] = ""

expires = cookie["expires"]
if not cookie["max-age"] and expires:
expire_time = self._parse_date(expires)
if expire_time:
self._loop.call_at(
expire_time.timestamp(),
self._expire_cookie, name)
else:
cookie["expires"] = ""

# Remove the host-only flags of nonexistent cookies
self._host_only_cookies -= (
self._host_only_cookies.difference(self._cookies.keys()))

def _add_morsel(self, name, value, hostname):
"""Add a Morsel to the cookie jar."""
cookie_domain = value["domain"]
if cookie_domain.startswith("."):
# Remove leading dot
cookie_domain = cookie_domain[1:]
value["domain"] = cookie_domain

if not cookie_domain or not hostname:
dict.__setitem__(self._cookies, name, value)
return True

if not self._is_domain_match(cookie_domain, hostname):
# Setting cookies for different domains is not allowed
return False

# use dict method because SimpleCookie class modifies value
# before Python 3.4
dict.__setitem__(self._cookies, name, value)
return True

def filter_cookies(self, request_url):
"""Returns this jar's cookies filtered by their attributes."""
url_parsed = urlsplit(request_url)
filtered = SimpleCookie()

for name, cookie in self._cookies.items():
cookie_domain = cookie["domain"]

# Send shared cookies
if not cookie_domain:
dict.__setitem__(filtered, name, cookie)
continue

hostname = url_parsed.hostname or ""

if is_ip_address(hostname):
continue

if name in self._host_only_cookies:
if cookie_domain != hostname:
continue
elif not self._is_domain_match(cookie_domain, hostname):
continue

if not self._is_path_match(url_parsed.path, cookie["path"]):
continue

is_secure = url_parsed.scheme in ("https", "wss")

if cookie["secure"] and not is_secure:
continue

dict.__setitem__(filtered, name, cookie)

return filtered

@staticmethod
def _is_domain_match(domain, hostname):
"""Implements domain matching adhering to RFC 6265."""
if hostname == domain:
return True

if not hostname.endswith(domain):
return False

non_matching = hostname[:-len(domain)]

if not non_matching.endswith("."):
return False

return not is_ip_address(hostname)

@staticmethod
def _is_path_match(req_path, cookie_path):
"""Implements path matching adhering to RFC 6265."""
if req_path == cookie_path:
return True

if not req_path.startswith(cookie_path):
return False

if cookie_path.endswith("/"):
return True

non_matching = req_path[len(cookie_path):]

return non_matching.startswith("/")

@classmethod
def _parse_date(cls, date_str):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can't we reuse some existing function?

"""Implements date string parsing adhering to RFC 6265."""
if not date_str:
return

found_time = False
found_day_of_month = False
found_month = False
found_year = False

hour = minute = second = 0
day_of_month = 0
month = ""
year = 0

for token_match in cls.DATE_TOKENS_RE.finditer(date_str):

token = token_match.group("token")

if not found_time:
time_match = cls.DATE_HMS_TIME_RE.match(token)
if time_match:
found_time = True
hour, minute, second = [
int(s) for s in time_match.groups()]
continue

if not found_day_of_month:
day_of_month_match = cls.DATE_DAY_OF_MONTH_RE.match(token)
if day_of_month_match:
found_day_of_month = True
day_of_month = int(day_of_month_match.group())
continue

if not found_month:
month_match = cls.DATE_MONTH_RE.match(token)
if month_match:
found_month = True
month = month_match.group()
continue

if not found_year:
year_match = cls.DATE_YEAR_RE.match(token)
if year_match:
found_year = True
year = int(year_match.group())

if 70 <= year <= 99:
year += 1900
elif 0 <= year <= 69:
year += 2000

if False in (found_day_of_month, found_month, found_year, found_time):
return

if not 1 <= day_of_month <= 31:
return

if year < 1601 or hour > 23 or minute > 59 or second > 59:
return

dt = datetime.datetime.strptime(
"%s %d %d:%d:%d %d" % (
month, day_of_month, hour, minute, second, year
), "%b %d %H:%M:%S %Y")

return dt.replace(tzinfo=datetime.timezone.utc)
5 changes: 4 additions & 1 deletion tests/test_client_functional_oldstyle.py
Original file line number Diff line number Diff line change
Expand Up @@ -1145,9 +1145,12 @@ def test_session_cookies(self, m_log):
session.request('get', httpd.url('cookies')))
self.assertEqual(resp.cookies['c1'].value, 'cookie1')
self.assertEqual(resp.cookies['c2'].value, 'cookie2')
self.assertEqual(session.cookies, resp.cookies)
resp.close()

# Add the received cookies as shared for sending them to the test
# server, which is only accessible via IP
session.cookies.update(resp.cookies)

# Assert, that we send those cookies in next requests
r = self.loop.run_until_complete(
session.request('get', httpd.url('method', 'get')))
Expand Down
Loading