From fac9f5dc426ad8a6650a41d06d64d8400e8a9cf7 Mon Sep 17 00:00:00 2001
From: Alexander Shorin <kxepal@gmail.com>
Date: Sat, 14 Feb 2015 16:02:54 +0300
Subject: [PATCH 1/5] Fix unfinished code for handling extended params in
 content disposition

---
 aiohttp/multipart.py | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/aiohttp/multipart.py b/aiohttp/multipart.py
index c4da1802dd2..ea2575bd2ca 100644
--- a/aiohttp/multipart.py
+++ b/aiohttp/multipart.py
@@ -53,8 +53,7 @@ def is_quoted(string):
         return string[0] == string[-1] == '"'
 
     def is_rfc5987(string):
-        # this isn't very correct
-        return "''" in string
+        return is_token(string) and string.count("'") == 2
 
     def is_extended_param(string):
         return string.endswith('*')
@@ -103,20 +102,12 @@ def unescape(text, *, chars=''.join(map(re.escape, CHAR))):
                 continue
 
         elif is_extended_param(key):
-            if is_quoted(value):
-                warnings.warn(BadContentDispositionParam(item))
-                continue
-            elif is_rfc5987(value):
+            if is_rfc5987(value):
                 encoding, _, value = value.split("'", 2)
                 encoding = encoding or 'utf-8'
-            elif "'":
-                warnings.warn(BadContentDispositionParam(item))
-                continue
-            elif not is_token(value):
+            else:
                 warnings.warn(BadContentDispositionParam(item))
                 continue
-            else:
-                encoding = 'utf-8'
 
             try:
                 value = unquote(value, encoding, 'strict')

From 0783d154db85202573dde7730cd9e7fa430490e2 Mon Sep 17 00:00:00 2001
From: Alexander Shorin <kxepal@gmail.com>
Date: Sat, 14 Feb 2015 16:36:00 +0300
Subject: [PATCH 2/5] Resolve body part content type before content length

This will allow us to apply more smart rules on determining content
length.
---
 aiohttp/multipart.py    | 11 +++++------
 tests/test_multipart.py |  4 ++--
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/aiohttp/multipart.py b/aiohttp/multipart.py
index ea2575bd2ca..fdad53d8e48 100644
--- a/aiohttp/multipart.py
+++ b/aiohttp/multipart.py
@@ -560,17 +560,16 @@ def __init__(self, obj, headers=None, *, chunk_size=8192):
         }
 
     def _fill_headers_with_defaults(self):
-        """Updates part headers by """
-        if CONTENT_LENGTH not in self.headers:
-            content_length = self._guess_content_length(self.obj)
-            if content_length is not None:
-                self.headers[CONTENT_LENGTH] = str(content_length)
-
         if CONTENT_TYPE not in self.headers:
             content_type = self._guess_content_type(self.obj)
             if content_type is not None:
                 self.headers[CONTENT_TYPE] = content_type
 
+        if CONTENT_LENGTH not in self.headers:
+            content_length = self._guess_content_length(self.obj)
+            if content_length is not None:
+                self.headers[CONTENT_LENGTH] = str(content_length)
+
         if CONTENT_DISPOSITION not in self.headers:
             filename = self._guess_filename(self.obj)
             if filename is not None:
diff --git a/tests/test_multipart.py b/tests/test_multipart.py
index e16072cdda3..4681e1f02bd 100644
--- a/tests/test_multipart.py
+++ b/tests/test_multipart.py
@@ -699,8 +699,8 @@ def test_serialize_with_content_encoding_identity(self):
             thing, {CONTENT_ENCODING: 'identity'})
         stream = part.serialize()
         self.assertEqual(b'CONTENT-ENCODING: identity\r\n'
-                         b'CONTENT-LENGTH: 16\r\n'
-                         b'CONTENT-TYPE: application/octet-stream',
+                         b'CONTENT-TYPE: application/octet-stream\r\n'
+                         b'CONTENT-LENGTH: 16',
                          next(stream))
         self.assertEqual(b'\r\n\r\n', next(stream))
 

From 29d672df825bdbf6f59aaa79346fa2ca521d708d Mon Sep 17 00:00:00 2001
From: Alexander Shorin <kxepal@gmail.com>
Date: Sat, 14 Feb 2015 16:50:43 +0300
Subject: [PATCH 3/5] Extract body part payload serialization into own method

---
 aiohttp/multipart.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/aiohttp/multipart.py b/aiohttp/multipart.py
index fdad53d8e48..a3442528388 100644
--- a/aiohttp/multipart.py
+++ b/aiohttp/multipart.py
@@ -625,21 +625,20 @@ def serialize(self):
                 for item in self.headers.items()
             )
         yield b'\r\n\r\n'
+        yield from self._maybe_encode_stream(self._serialize_obj())
+        yield b'\r\n'
 
+    def _serialize_obj(self):
         obj = self.obj
         mtype, stype, *_ = parse_mimetype(self.headers.get(CONTENT_TYPE))
         serializer = self._serialize_map.get((mtype, stype))
         if serializer is not None:
-            stream = serializer(obj)
-        else:
-            for key in self._serialize_map:
-                if not isinstance(key, tuple) and isinstance(obj, key):
-                    stream = self._serialize_map[key](obj)
-                    break
-            else:
-                stream = self._serialize_default(obj)
-        yield from self._maybe_encode_stream(stream)
-        yield b'\r\n'
+            return serializer(obj)
+
+        for key in self._serialize_map:
+            if not isinstance(key, tuple) and isinstance(obj, key):
+                return self._serialize_map[key](obj)
+        return self._serialize_default(obj)
 
     def _serialize_bytes(self, obj):
         yield obj

From 85006722b76591b83edc5f8443b25864e8a8248d Mon Sep 17 00:00:00 2001
From: Alexander Shorin <kxepal@gmail.com>
Date: Sun, 15 Feb 2015 15:36:08 +0300
Subject: [PATCH 4/5] Guess content length for strings and StringIO

---
 aiohttp/multipart.py    | 14 +++++++++++---
 tests/test_multipart.py | 10 ++++++++--
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/aiohttp/multipart.py b/aiohttp/multipart.py
index a3442528388..78898108818 100644
--- a/aiohttp/multipart.py
+++ b/aiohttp/multipart.py
@@ -578,12 +578,20 @@ def _fill_headers_with_defaults(self):
     def _guess_content_length(self, obj):
         if isinstance(obj, bytes):
             return len(obj)
+        elif isinstance(obj, str):
+            *_, params = parse_mimetype(self.headers.get(CONTENT_TYPE))
+            charset = params.get('charset', 'us-ascii')
+            return len(obj.encode(charset))
+        elif isinstance(obj, io.StringIO):
+            *_, params = parse_mimetype(self.headers.get(CONTENT_TYPE))
+            charset = params.get('charset', 'us-ascii')
+            return len(obj.getvalue().encode(charset)) - obj.tell()
+        elif isinstance(obj, io.BytesIO):
+            return len(obj.getvalue()) - obj.tell()
         elif isinstance(obj, io.IOBase):
             try:
                 return os.fstat(obj.fileno()).st_size - obj.tell()
             except (AttributeError, OSError):
-                if isinstance(obj, io.BytesIO):
-                    return len(obj.getvalue()) - obj.tell()
                 return None
         else:
             return None
@@ -592,7 +600,7 @@ def _guess_content_type(self, obj, default='application/octet-stream'):
         if hasattr(obj, 'name'):
             name = getattr(obj, 'name')
             return mimetypes.guess_type(name)[0]
-        elif isinstance(obj, str):
+        elif isinstance(obj, (str, io.StringIO)):
             return 'text/plain; charset=utf-8'
         else:
             return default
diff --git a/tests/test_multipart.py b/tests/test_multipart.py
index 4681e1f02bd..73b3f052f2e 100644
--- a/tests/test_multipart.py
+++ b/tests/test_multipart.py
@@ -533,12 +533,17 @@ def setUp(self):
         self.part = aiohttp.multipart.BodyPartWriter(b'')
 
     def test_guess_content_length(self):
+        self.part.headers[CONTENT_TYPE] = 'text/plain; charset=utf-8'
         self.assertIsNone(self.part._guess_content_length({}))
         self.assertIsNone(self.part._guess_content_length(object()))
         self.assertEqual(3,
                          self.part._guess_content_length(io.BytesIO(b'foo')))
-        self.assertIsNone(self.part._guess_content_length(io.StringIO('foo')))
+        self.assertEqual(3,
+                         self.part._guess_content_length(io.StringIO('foo')))
+        self.assertEqual(6,
+                         self.part._guess_content_length(io.StringIO('мяу')))
         self.assertEqual(3, self.part._guess_content_length(b'bar'))
+        self.assertEqual(12, self.part._guess_content_length('пассед'))
         with open(__file__, 'rb') as f:
             self.assertEqual(os.fstat(f.fileno()).st_size,
                              self.part._guess_content_length(f))
@@ -644,7 +649,8 @@ def test_serialize_multipart(self):
         multipart.append_json({'test': 'passed'})
         self.assertEqual(
             [b'--:\r\n',
-             b'CONTENT-TYPE: text/plain; charset=utf-8',
+             b'CONTENT-TYPE: text/plain; charset=utf-8\r\n'
+             b'CONTENT-LENGTH: 11',
              b'\r\n\r\n',
              b'foo-bar-baz',
              b'\r\n',

From 51df89564924e0b6e3750e8289f76224699dd479 Mon Sep 17 00:00:00 2001
From: Alexander Shorin <kxepal@gmail.com>
Date: Thu, 12 Mar 2015 10:41:02 +0300
Subject: [PATCH 5/5] Add multipart docs

---
 docs/api.rst       |  12 +-
 docs/client.rst    |   1 +
 docs/index.rst     |   1 +
 docs/multipart.rst | 331 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 344 insertions(+), 1 deletion(-)
 create mode 100644 docs/multipart.rst

diff --git a/docs/api.rst b/docs/api.rst
index 2a72415f8ae..859a83ef744 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -1,8 +1,10 @@
+.. _aiohttp-api:
+
 Helpers API
 ===========
 
 All public names from submodules ``client``, ``connector``,
-``errors``, ``parsers``, ``protocol``, ``server``, ``utils``,
+``errors``, ``multipart``, ``parsers``, ``protocol``, ``server``, ``utils``,
 ``websocket`` and ``wsgi`` are exported into ``aiohttp``
 namespace.
 
@@ -38,6 +40,14 @@ aiohttp.helpers module
     :undoc-members:
     :show-inheritance:
 
+aiohttp.multipart module
+------------------------
+
+.. automodule:: aiohttp.multipart
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 aiohttp.parsers module
 ----------------------
 
diff --git a/docs/client.rst b/docs/client.rst
index ffb26db810d..90d5708c698 100644
--- a/docs/client.rst
+++ b/docs/client.rst
@@ -251,6 +251,7 @@ If you pass file object as data parameter, aiohttp will stream it to server
 automatically. Check :class:`aiohttp.stream.StreamReader` for supported format
 information.
 
+.. seealso:: :ref:`aiohttp-multipart`
 
 Streaming uploads
 -----------------
diff --git a/docs/index.rst b/docs/index.rst
index 86d80a6b746..344de72b3b8 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -129,6 +129,7 @@ Contents:
    web_reference
    server
    multidict
+   multipart
    api
    contributing
    changes
diff --git a/docs/multipart.rst b/docs/multipart.rst
new file mode 100644
index 00000000000..e3aab8503f3
--- /dev/null
+++ b/docs/multipart.rst
@@ -0,0 +1,331 @@
+.. highlight:: python
+
+.. module:: aiohttp.multipart
+
+.. _aiohttp-multipart:
+
+Working with Multipart
+======================
+
+`aiohttp` supports full featured multipart reader and writer. Both are designed
+with steaming processing in mind to avoid unwanted footprint which may be
+significant if you're dealing with large payloads, but this also means that
+most I/O operation are only possible to execute only a single time.
+
+Reading Multipart Responses
+---------------------------
+
+Assume you made a request, as usual, and want to process the respond multipart
+data::
+
+    >>> resp = yield from aiohttp.request(...)
+
+First, you need to wrap the response with a
+:meth:`MultipartReader.from_response`. This needs to keep implementation of
+:class:`MultipartReader` separated from response and connection routines what
+makes him more portable::
+
+    >>> reader = aiohttp.MultipartReader.from_response(resp)
+
+Let's assume with this response you'd received some JSON document and multiple
+files for it, but you don't need all of them, just a specific one.
+
+So first you need to enter into a loop where multipart body will be processed::
+
+    >>> metadata = None
+    >>> filedata = None
+    >>> while True:
+    ...     part = yield from reader.next()
+
+The returned type depends on what the next part is: if it's a simple body part
+than you'll get :class:`BodyPartReader` instance here, otherwise, it will
+be another :class:`MultipartReader` instance for the nested multipart. Remember,
+that multipart format is recursive and supports multiple levels of nested body
+parts. When there are no more parts left to fetch, ``None`` value will be
+returned - that's our signal to break the loop::
+
+    ...     if part is None:
+    ...         break
+
+Both :class:`BodyPartReader` and :class:`MultipartReader` provides access to
+body part headers: this allows you to filter parts by their attributes::
+
+    ...     if part.headers[aiohttp.hdrs.CONTENT-TYPE] == 'application/json':
+    ...         metadata = yield from part.json()
+    ...         continue
+
+Nor :class:`BodyPartReader` or :class:`MultipartReader` instances doesn't
+reads whole body part data without explicit asking for. :class:`BodyPartReader`
+provides a set of helpers to fetch popular content types in friendly way:
+
+- :meth:`BodyPartReader.text` for plaintext data;
+- :meth:`BodyPartReader.json` for JSON;
+- :meth:`BodyPartReader.form` for `application/www-urlform-encode`
+
+Each of these helpers automagically recognizes if content is compressed by
+using `gzip` and `deflate` encoding (while it respects `identity` one), or if
+transfer encoding is base64 or `quoted-printable` - in each case the result
+will get automagically decoded. But in case if you need to access to raw binary
+data as it is, there are :meth:`BodyPartReader.read` and
+:meth:`BodyPartReader.read_chunk` coroutine methods as well to read raw binary
+data as it is all-in-single-shot or by chunks respectively.
+
+When you have to deal with multipart files, the :attr:`BodyPartReader.filename`
+property comes to the aid. It's very smart helper which handles
+`Content-Disposition` handler right and extracts the right filename attribute
+from it::
+
+    ...     if part.filename != 'secret.txt':
+    ...         continue
+
+If current body part doesn't matches your expectation and you want to skip it
+- just continue a loop to start a next iteration of it. Here the magic happens.
+Before fetch next body part ``yield from reader.next()`` ensures that previous
+one was read completely. If it wasn't even started to be, all it content
+sends to the void in term to fetch the next part. So you don't have to care
+about cleanup routines while you're within a loop.
+
+Once you'd found a part for the file you'd searched for, just read it. Let's
+handle it as it is without applying any decoding magic::
+
+    ...     filedata = yield from part.read(decode=False)
+
+Later you may decide to decode the data. It's still simple and possible
+to do::
+
+    ...     filedata = part.decode(filedata)
+
+Once you done multipart processing, just break a loop::
+
+    ...     break
+
+And release connection to not let it hold a response in the middle of the data::
+
+    ...  yield from resp.release()  # or yield from reader.release()
+
+
+Sending Multipart Requests
+--------------------------
+
+:class:`MultipartWriter` provides an interface to build multipart payload from
+the Python data and serialize it into chunked binary stream. Since multipart
+format is recursive and supports deeply nestings, you can use ``with`` statement
+to design your multipart data closer to how it will be::
+
+    >>> with aiohttp.MultipartWriter('mixed') as mpwriter:
+    ...     ...
+    ...     with aiohttp.MultipartWriter('related') as subwriter:
+    ...         ...
+    ...     mpwriter.append(subwriter)
+    ...
+    ...     with aiohttp.MultipartWriter('related') as subwriter:
+    ...         ...
+    ...         with aiohttp.MultipartWriter('related') as subsubwriter:
+    ...             ...
+    ...         subwriter.append(subsubwriter)
+    ...     mpwriter.append(subwriter)
+    ...
+    ...     with aiohttp.MultipartWriter('related') as subwriter:
+    ...         ...
+    ...     mpwriter.append(subwriter)
+
+The :meth:`MultipartWriter.append` is used join a new body parts into the
+single stream. It accepts various input and determines which default headers
+should be used for.
+
+For text data default `Content-Type` is :mimetype:`text/plain; charset=utf-8`::
+
+    ...     mpwriter.append('hello')
+
+For binary data :mimetype:`application/octet-stream` is used::
+
+    ...     mpwriter.append(b'aiohttp')
+
+You can always override these default by passing own headers with the second
+argument::
+
+    ...     mpwriter.append(io.BytesIO(b'GIF89a...'),
+                            {'CONTENT-TYPE': 'image/gif'})
+
+For file objects `Content-Type` will be determined by using Python's
+`mimetypes`_ module and additionally `Content-Disposition` header will include
+file's basename::
+
+    ...     part = root.append(open(__file__, 'rb))
+
+If you want to send a file with different name, just handle the
+:class:`BodyPartWriter` instance which :meth:`MultipartWriter.append` always
+returns and set `Content-Disposition` explicitly by using
+:meth:`BodyPartWriter.set_content_disposition` helper::
+
+    ...     part.set_content_disposition('attachment', filename='secret.txt')
+
+Additionally, you may set other headers here::
+
+    ...     part.headers[aiohttp.hdrs.CONTENT_ID] = 'X-12345'
+
+If you'd set `Content-Encoding`, it will be automatically applied to the
+data on serialization (see below)::
+
+    ...     part.headers[aiohttp.hdrs.CONTENT_ENCODING] = 'gzip'
+
+There are also :meth:`MultipartWriter.append_json` and
+:meth:`MultipartWriter.append_form` helpers which are useful to work with JSON
+and form urlencoded data, so you don't have to encode it every time manually::
+
+    ...     mpwriter.append_json({'test': 'passed'})
+    ...     mpwriter.append_form([('key', 'value')])
+
+When it's done, to make a request just pass root :class:`MultipartWriter`
+instance as :func:`aiohttp.client.request` `data` argument::
+
+    >>> yield from aiohttp.request('POST', 'http://example.com', data=mpwriter)
+
+Behind the scene :meth:`MultipartWriter.serialize` will yield by chunks every
+part and if body part has `Content-Encoding` or `Content-Transfer-Encoding`
+they will be applied on streaming content.
+
+Please note, that on :meth:`MultipartWriter.serialize` all the file objects
+will be read till the end and there is no way to repeat a request without rewind
+their pointers to the start.
+
+Hacking Multipart
+-----------------
+
+The Internet is a full of terror and sometimes you may find a server which
+implements a multipart support in a strange ways when an oblivious solution
+doesn't works.
+
+For instance, is server used `cgi.FieldStorage`_ then you have to ensure that
+no body part contains a `Content-Length` header::
+
+    for part in mpwriter:
+        part.headers.pop(aiohttp.hdrs.CONTENT_LENGTH, None)
+
+On the other hand, some server may require to specify `Content-Length` for the
+whole multipart request. `aiohttp` doesn't do that since it sends multipart
+using chunked transfer encoding by default. To overcome this issue, you have
+to serialize a :class:`MultipartWriter` by our own in the way to calculate it
+size::
+
+    body = b''.join(mpwriter.serialize())
+    yield from aiohttp.request('POST', 'http://example.com',
+                               data=body, headers=mpwriter.headers)
+
+Sometimes the server response may not be well structured: it may or may not
+contains nested parts. For instance, we requesting a resource which returns
+JSON documents with the files attached to it. If document has any attachments,
+they are returned as a nested multipart thing. If it has not it comes as plain
+body part::
+
+    CONTENT-TYPE: multipart/mixed; boundary=--:
+
+    --:
+    CONTENT-TYPE: application/json
+
+    {"_id": "foo"}
+    --:
+    CONTENT-TYPE: multipart/related; boundary=----:
+
+    ----:
+    CONTENT-TYPE: application/json
+
+    {"_id": "bar"}
+    ----:
+    CONTENT-TYPE: text/plain
+    CONTENT-DISPOSITION: attachment; filename=bar.txt
+
+    bar! bar! bar!
+    ----:--
+    --:
+    CONTENT-TYPE: application/json
+
+    {"_id": "boo"}
+    --:
+    CONTENT-TYPE: multipart/related; boundary=----:
+
+    ----:
+    CONTENT-TYPE: application/json
+
+    {"_id": "baz"}
+    ----:
+    CONTENT-TYPE: text/plain
+    CONTENT-DISPOSITION: attachment; filename=baz.txt
+
+    baz! baz! baz!
+    ----:--
+    --:--
+
+Reading such kind of data in single stream is possible, but not clean a lot::
+
+    result = []
+    while True:
+        part = yield from reader.next()
+
+        if part is None:
+            break
+
+        if isinstance(part, aiohttp.MultipartReader):
+            # Fetching files
+            while True:
+                filepart = yield from part.next()
+                if filepart is None:
+                    break
+                result[-1].append((yield from filepart.read()))
+
+        else:
+            # Fetching document
+            result.append([(yield from part.json())])
+
+Let's hack a reader in the way to return pairs of document and reader of the
+related files on each iteration::
+
+    class PairsMultipartReader(aiohttp.MultipartReader):
+
+        # keep reference on the original reader
+        multipart_reader_cls = aiohttp.MultipartReader
+
+        @asyncio.coroutine
+        def next(self):
+            """Emits a tuple of document object (:class:`dict`) and multipart
+            reader of the followed attachments (if any).
+
+            :rtype: tuple
+            """
+            reader = yield from super().next()
+
+            if self._at_eof:
+                return None, None
+
+            if isinstance(reader, self.multipart_reader_cls):
+                part = yield from reader.next()
+                doc = yield from part.json()
+            else:
+                doc = yield from reader.json()
+
+            return doc, reader
+
+And this gives us a more cleaner solution::
+
+    reader = PairsMultipartReader.from_response(resp)
+    result = []
+    while True:
+        doc, files_reader = yield from reader.next()
+
+        if doc is None:
+            break
+
+        files = []
+        while True:
+            filepart = yield from files_reader.next()
+            if file.part is None:
+                break
+            files.append((yield from filepart.read()))
+
+        result.append((doc, files))
+
+.. seealso:: Multipart API in :ref:`aiohttp-api` section.
+
+
+.. _cgi.FieldStorage: https://docs.python.org/3.4/library/cgi.html
+.. _mimetypes: https://docs.python.org/3.4/library/mimetypes.html