From 2e844ad65b4e1f7b3440ac4de01aef7f42548de3 Mon Sep 17 00:00:00 2001 From: cdeler Date: Tue, 8 Sep 2020 12:36:36 +0300 Subject: [PATCH 1/8] Added last_raw_chunk_size to the Response object (#1208) --- httpx/_models.py | 10 ++++++++++ tests/models/test_responses.py | 25 +++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/httpx/_models.py b/httpx/_models.py index 713281e662..53d0becffd 100644 --- a/httpx/_models.py +++ b/httpx/_models.py @@ -697,6 +697,8 @@ def __init__( self._raw_stream = ByteStream(body=content or b"") self.read() + self._last_raw_chunk_size = 0 + @property def elapsed(self) -> datetime.timedelta: """ @@ -885,6 +887,10 @@ def links(self) -> typing.Dict[typing.Optional[str], typing.Dict[str, str]]: ldict[key] = link return ldict + @property + def last_raw_chunk_size(self) -> int: + return self._last_raw_chunk_size + def __repr__(self) -> str: return f"" @@ -951,8 +957,10 @@ def iter_raw(self) -> typing.Iterator[bytes]: raise ResponseClosed() self.is_stream_consumed = True + self._last_raw_chunk_size = 0 with map_exceptions(HTTPCORE_EXC_MAP, request=self._request): for part in self._raw_stream: + self._last_raw_chunk_size = len(part) yield part self.close() @@ -1032,8 +1040,10 @@ async def aiter_raw(self) -> typing.AsyncIterator[bytes]: raise ResponseClosed() self.is_stream_consumed = True + self._last_raw_chunk_size = 0 with map_exceptions(HTTPCORE_EXC_MAP, request=self._request): async for part in self._raw_stream: + self._last_raw_chunk_size = len(part) yield part await self.aclose() diff --git a/tests/models/test_responses.py b/tests/models/test_responses.py index 2b07a27040..da01306c46 100644 --- a/tests/models/test_responses.py +++ b/tests/models/test_responses.py @@ -227,6 +227,18 @@ def test_iter_raw(): assert raw == b"Hello, world!" +def test_iter_raw_increments_updates_counter(): + stream = IteratorStream(iterator=streaming_body()) + + response = httpx.Response( + 200, + stream=stream, + ) + + for part in response.iter_raw(): + assert len(part) == response.last_raw_chunk_size + + @pytest.mark.asyncio async def test_aiter_raw(): stream = AsyncIteratorStream(aiterator=async_streaming_body()) @@ -241,6 +253,19 @@ async def test_aiter_raw(): assert raw == b"Hello, world!" +@pytest.mark.asyncio +async def test_aiter_raw_increments_updates_counter(): + stream = AsyncIteratorStream(aiterator=async_streaming_body()) + + response = httpx.Response( + 200, + stream=stream, + ) + + async for part in response.aiter_raw(): + assert len(part) == response.last_raw_chunk_size + + def test_iter_bytes(): response = httpx.Response( 200, From 60b23440f51138f240c755d514f1493cc1983b51 Mon Sep 17 00:00:00 2001 From: cdeler Date: Tue, 8 Sep 2020 13:19:42 +0300 Subject: [PATCH 2/8] Added example with progress bar (#1208) Co-authored-by: Florimond Manca --- docs/advanced.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/docs/advanced.md b/docs/advanced.md index b2a07df371..696b541358 100644 --- a/docs/advanced.md +++ b/docs/advanced.md @@ -221,6 +221,28 @@ with httpx.Client(headers=headers) as client: ... ``` +## Monitoring download progress + +If you need to monitor download progress, you can stream with using `response.last_raw_chunk_size` property. + +For example, you can build a nice progress bar using the `tqdm` library: + +```python +import tempfile + +import httpx +from tqdm import tqdm + +with tempfile.NamedTemporaryFile() as download_file: + data = b"@" * 1000000 + with httpx.stream("POST", "https://httpbin.org/anything", data=data) as response: + content_length = int(response.headers["Content-Length"]) + with tqdm(total=content_length) as progress: + for chunk in response.iter_bytes(): + download_file.write(chunk) + progress.update(response.last_raw_chunk_size) +``` + ## .netrc Support HTTPX supports .netrc file. In `trust_env=True` cases, if auth parameter is From f584760fb5b5663f0e41757756eb862eb0729bef Mon Sep 17 00:00:00 2001 From: cdeler Date: Wed, 9 Sep 2020 12:31:30 +0300 Subject: [PATCH 3/8] Apply suggestions from code review Co-authored-by: Florimond Manca --- docs/advanced.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/advanced.md b/docs/advanced.md index 696b541358..1f7ff4be2e 100644 --- a/docs/advanced.md +++ b/docs/advanced.md @@ -223,9 +223,9 @@ with httpx.Client(headers=headers) as client: ## Monitoring download progress -If you need to monitor download progress, you can stream with using `response.last_raw_chunk_size` property. +If you need to monitor download progress of large responses, you can use stream and the `response.last_raw_chunk_size` property. -For example, you can build a nice progress bar using the `tqdm` library: +For example, showing a progress bar using the [`tqdm`](https://github.com/tqdm/tqdm) library while a response is being downloaded could be done like this… ```python import tempfile From fde7503d894108a230e8ab82676f867a7b839328 Mon Sep 17 00:00:00 2001 From: cdeler Date: Wed, 9 Sep 2020 21:41:26 +0300 Subject: [PATCH 4/8] PR review Changed last_raw_chunk_size to num_bytes_downloaded ; Edited the example according to documentaion --- docs/advanced.md | 17 ++++++++++++----- httpx/_models.py | 14 +++++++------- tests/models/test_responses.py | 8 ++++++-- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/docs/advanced.md b/docs/advanced.md index 69fb236a8f..9a3e5694fe 100644 --- a/docs/advanced.md +++ b/docs/advanced.md @@ -234,13 +234,20 @@ import httpx from tqdm import tqdm with tempfile.NamedTemporaryFile() as download_file: - data = b"@" * 1000000 - with httpx.stream("POST", "https://httpbin.org/anything", data=data) as response: - content_length = int(response.headers["Content-Length"]) - with tqdm(total=content_length) as progress: + url = "https://speed.hetzner.de/100MB.bin" + with httpx.stream("GET", url) as response: + if "Content-Length" in response.headers: + total = int(response.headers["Content-Length"]) + else: + total = None + + with tqdm(total=total, unit_scale=True, unit_divisor=1024, unit="B") as progress: + num_bytes_downloaded = response.num_bytes_downloaded for chunk in response.iter_bytes(): download_file.write(chunk) - progress.update(response.last_raw_chunk_size) + progress.update(response.num_bytes_downloaded - num_bytes_downloaded) + num_bytes_downloaded = response.num_bytes_downloaded + print(f"The total download size is {response.num_bytes_downloaded} bytes") ``` ## .netrc Support diff --git a/httpx/_models.py b/httpx/_models.py index 53d0becffd..65db9ae8b8 100644 --- a/httpx/_models.py +++ b/httpx/_models.py @@ -697,7 +697,7 @@ def __init__( self._raw_stream = ByteStream(body=content or b"") self.read() - self._last_raw_chunk_size = 0 + self._num_bytes_downloaded = 0 @property def elapsed(self) -> datetime.timedelta: @@ -888,8 +888,8 @@ def links(self) -> typing.Dict[typing.Optional[str], typing.Dict[str, str]]: return ldict @property - def last_raw_chunk_size(self) -> int: - return self._last_raw_chunk_size + def num_bytes_downloaded(self) -> int: + return self._num_bytes_downloaded def __repr__(self) -> str: return f"" @@ -957,10 +957,10 @@ def iter_raw(self) -> typing.Iterator[bytes]: raise ResponseClosed() self.is_stream_consumed = True - self._last_raw_chunk_size = 0 + self._num_bytes_downloaded = 0 with map_exceptions(HTTPCORE_EXC_MAP, request=self._request): for part in self._raw_stream: - self._last_raw_chunk_size = len(part) + self._num_bytes_downloaded += len(part) yield part self.close() @@ -1040,10 +1040,10 @@ async def aiter_raw(self) -> typing.AsyncIterator[bytes]: raise ResponseClosed() self.is_stream_consumed = True - self._last_raw_chunk_size = 0 + self._num_bytes_downloaded = 0 with map_exceptions(HTTPCORE_EXC_MAP, request=self._request): async for part in self._raw_stream: - self._last_raw_chunk_size = len(part) + self._num_bytes_downloaded += len(part) yield part await self.aclose() diff --git a/tests/models/test_responses.py b/tests/models/test_responses.py index da01306c46..30d600086a 100644 --- a/tests/models/test_responses.py +++ b/tests/models/test_responses.py @@ -235,8 +235,10 @@ def test_iter_raw_increments_updates_counter(): stream=stream, ) + num_downloaded = response.num_bytes_downloaded for part in response.iter_raw(): - assert len(part) == response.last_raw_chunk_size + assert len(part) == (response.num_bytes_downloaded - num_downloaded) + num_downloaded = response.num_bytes_downloaded @pytest.mark.asyncio @@ -262,8 +264,10 @@ async def test_aiter_raw_increments_updates_counter(): stream=stream, ) + num_downloaded = response.num_bytes_downloaded async for part in response.aiter_raw(): - assert len(part) == response.last_raw_chunk_size + assert len(part) == (response.num_bytes_downloaded - num_downloaded) + num_downloaded = response.num_bytes_downloaded def test_iter_bytes(): From 3e4bb74a27d345b40bb41b2f005235d110151092 Mon Sep 17 00:00:00 2001 From: cdeler Date: Wed, 9 Sep 2020 22:42:14 +0300 Subject: [PATCH 5/8] Update docs/advanced.md Co-authored-by: Florimond Manca --- docs/advanced.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/advanced.md b/docs/advanced.md index 9a3e5694fe..9714bd786f 100644 --- a/docs/advanced.md +++ b/docs/advanced.md @@ -223,7 +223,7 @@ with httpx.Client(headers=headers) as client: ## Monitoring download progress -If you need to monitor download progress of large responses, you can use stream and the `response.last_raw_chunk_size` property. +If you need to monitor download progress of large responses, you can use stream and the `response.num_bytes_downloaded` property. For example, showing a progress bar using the [`tqdm`](https://github.com/tqdm/tqdm) library while a response is being downloaded could be done like this… From 612c9410a9b1bbfd6e6ef55c109b6757bc194267 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Thu, 10 Sep 2020 09:27:31 +0100 Subject: [PATCH 6/8] Update docs/advanced.md Co-authored-by: Florimond Manca --- docs/advanced.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/advanced.md b/docs/advanced.md index 9714bd786f..6a28f559b2 100644 --- a/docs/advanced.md +++ b/docs/advanced.md @@ -236,10 +236,7 @@ from tqdm import tqdm with tempfile.NamedTemporaryFile() as download_file: url = "https://speed.hetzner.de/100MB.bin" with httpx.stream("GET", url) as response: - if "Content-Length" in response.headers: - total = int(response.headers["Content-Length"]) - else: - total = None + total = int(response.headers["Content-Length"]) with tqdm(total=total, unit_scale=True, unit_divisor=1024, unit="B") as progress: num_bytes_downloaded = response.num_bytes_downloaded From 2189f3090368d26545293b1b9b059ab53df1ebc3 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Thu, 10 Sep 2020 09:33:33 +0100 Subject: [PATCH 7/8] Update docs/advanced.md --- docs/advanced.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/advanced.md b/docs/advanced.md index 6a28f559b2..07e7331dc5 100644 --- a/docs/advanced.md +++ b/docs/advanced.md @@ -223,7 +223,7 @@ with httpx.Client(headers=headers) as client: ## Monitoring download progress -If you need to monitor download progress of large responses, you can use stream and the `response.num_bytes_downloaded` property. +If you need to monitor download progress of large responses, you can use response streaming and inspect the `response.num_bytes_downloaded` property. For example, showing a progress bar using the [`tqdm`](https://github.com/tqdm/tqdm) library while a response is being downloaded could be done like this… From 7be043a35733dcf941192e3e04038f7e557f5b3c Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Thu, 10 Sep 2020 09:38:29 +0100 Subject: [PATCH 8/8] Update docs/advanced.md --- docs/advanced.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/advanced.md b/docs/advanced.md index 07e7331dc5..d9a43354fa 100644 --- a/docs/advanced.md +++ b/docs/advanced.md @@ -225,6 +225,8 @@ with httpx.Client(headers=headers) as client: If you need to monitor download progress of large responses, you can use response streaming and inspect the `response.num_bytes_downloaded` property. +This interface is required for properly determining download progress, because the total number of bytes returned by `response.content` or `response.iter_content()` will not always correspond with the raw content length of the response if HTTP response compression is being used. + For example, showing a progress bar using the [`tqdm`](https://github.com/tqdm/tqdm) library while a response is being downloaded could be done like this… ```python