replicate · aron · Oct 16, 2024 · Oct 11, 2024 · Oct 14, 2024 · Oct 15, 2024
diff --git a/python/cog/server/clients.py b/python/cog/server/clients.py
@@ -1,3 +1,4 @@
+from functools import partial
 import base64
 import io
 import mimetypes
@@ -21,7 +22,7 @@
 
 from .. import types
 from ..schema import PredictionResponse, Status, WebhookEvent
-from ..types import Path
+from ..types import Path, URLFile
 from .eventtypes import PredictionInput
 from .response_throttler import ResponseThrottler
 from .retry_transport import RetryTransport
@@ -126,11 +127,11 @@ def __init__(self, fh: io.IOBase) -> None:
         self.fh = fh
 
     async def __aiter__(self) -> AsyncIterator[bytes]:
-        self.fh.seek(0)
+        if self.fh.seekable():
+            self.fh.seek(0)
+
         while True:
             chunk = self.fh.read(1024 * 1024)
-            if isinstance(chunk, str):
-                chunk = chunk.encode("utf-8")
             if not chunk:
                 log.info("finished reading file")
                 break
@@ -288,7 +289,10 @@ async def upload_files(
             with obj.open("rb") as f:
                 return await self.upload_file(f, url=url, prediction_id=prediction_id)
         if isinstance(obj, io.IOBase):
-            return await self.upload_file(obj, url=url, prediction_id=prediction_id)
+            try:
+                return await self.upload_file(obj, url=url, prediction_id=prediction_id)
+            finally:
+                obj.close()
         return obj
 
     # inputs

diff --git a/python/cog/types.py b/python/cog/types.py
@@ -10,7 +10,6 @@
 from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, TypeVar, Union
 
 import httpx
-import requests
 from pydantic import Field, SecretStr
 
 FILENAME_ILLEGAL_CHARS = set("\u0000/")
@@ -195,22 +194,17 @@ def unlink(self, missing_ok: bool = False) -> None:
                     raise
 
 
-# we would prefer URLFile to stay lazy
-# except... that doesn't really work with httpx?
-
-
 class URLFile(io.IOBase):
     """
     URLFile is a proxy object for a :class:`urllib3.response.HTTPResponse`
     object that is created lazily. It's a file-like object constructed from a
     URL that can survive pickling/unpickling.
-
-    This is the only place Cog uses requests
     """
 
     __slots__ = ("__target__", "__url__")
 
     def __init__(self, url: str) -> None:
+        object.__setattr__(self, "name", os.path.basename(url))
         object.__setattr__(self, "__url__", url)
 
     # We provide __getstate__ and __setstate__ explicitly to ensure that the
@@ -242,19 +236,25 @@ def __delattr__(self, name: str) -> None:
 
     # Luckily the only dunder method on HTTPResponse is __iter__
     def __iter__(self) -> Iterator[bytes]:
-        return iter(self.__wrapped__)
+        response = self.__wrapped__
+        return iter(response)
 
     @property
     def __wrapped__(self) -> Any:
         try:
             return object.__getattribute__(self, "__target__")
         except AttributeError:
             url = object.__getattribute__(self, "__url__")
-            resp = requests.get(url, stream=True)
-            resp.raise_for_status()
-            resp.raw.decode_content = True
-            object.__setattr__(self, "__target__", resp.raw)
-            return resp.raw
+
+            # We create a streaming response here, much like the `requests`
+            # version in the main 0.9.x branch. The only concerning bit here
+            # is that the book keeping for closing the response needs to be
+            # handled elsewhere. There's probably a better design for this
+            # in the long term.
+            res = urllib.request.urlopen(url)
+            object.__setattr__(self, "__target__", res)
+
+            return res
 
     def __repr__(self) -> str:
         try:

diff --git a/python/tests/server/test_clients.py b/python/tests/server/test_clients.py
@@ -1,14 +1,18 @@
-import httpx
+from email.message import Message
+import io
 import os
-import responses
 import tempfile
+from urllib.response import addinfourl
+from unittest import mock
 
 import cog
+import httpx
 import pytest
 from cog.server.clients import ClientManager
 
+pytest.mark.asyncio
+
 
-@pytest.mark.asyncio
 async def test_upload_files_without_url():
     client_manager = ClientManager()
     temp_dir = tempfile.mkdtemp()
@@ -103,9 +107,62 @@ async def test_upload_files_with_retry(respx_mock):
 
     obj = {"path": cog.Path(temp_path)}
     with pytest.raises(httpx.HTTPStatusError):
-        result = await client_manager.upload_files(
+        await client_manager.upload_files(
+            obj, url="https://example.com/bucket", prediction_id=None
+        )
+
+    assert uploader.call_count == 3
+
+
+@pytest.mark.asyncio
+@pytest.mark.respx(base_url="https://example.com")
+@mock.patch("urllib.request.urlopen")
+async def test_upload_files_with_url_file(urlopen_mock, respx_mock):
+    fp = io.BytesIO(b"hello world")
+    urlopen_mock.return_value = addinfourl(
+        fp=fp, headers=Message(), url="https://example.com/cdn/my_file.txt"
+    )
+
+    uploader = respx_mock.put("/bucket/my_file.txt").mock(
+        return_value=httpx.Response(
+            201, headers={"Location": "https://cdn.example.com/bucket/my_file.txt"}
+        )
+    )
+
+    client_manager = ClientManager()
+
+    obj = {"path": cog.types.URLFile("https://example.com/cdn/my_file.txt")}
+    result = await client_manager.upload_files(
+        obj, url="https://example.com/bucket", prediction_id=None
+    )
+    assert result == {"path": "https://cdn.example.com/bucket/my_file.txt"}
+
+    assert uploader.call_count == 1
+    assert urlopen_mock.call_count == 1
+    assert urlopen_mock.call_args[0][0] == "https://example.com/cdn/my_file.txt"
+
+
+@pytest.mark.asyncio
+@pytest.mark.respx(base_url="https://example.com")
+@mock.patch("urllib.request.urlopen")
+async def test_upload_files_with_url_file_with_retry(urlopen_mock, respx_mock):
+    fp = io.BytesIO(b"hello world")
+    urlopen_mock.return_value = addinfourl(
+        fp=fp, headers=Message(), url="https://example.com/cdn/my_file.txt"
+    )
+
+    uploader = respx_mock.put("/bucket/my_file.txt").mock(
+        return_value=httpx.Response(502)
+    )
+
+    client_manager = ClientManager()
+
+    obj = {"path": cog.types.URLFile("https://example.com/cdn/my_file.txt")}
+    with pytest.raises(httpx.HTTPStatusError):
+        await client_manager.upload_files(
             obj, url="https://example.com/bucket", prediction_id=None
         )
 
-        assert result == {"path": "https://cdn.example.com/bucket/my_file.txt"}
-        assert uploader.call_count == 3
+    assert uploader.call_count == 3
+    assert urlopen_mock.call_count == 1
+    assert urlopen_mock.call_args[0][0] == "https://example.com/cdn/my_file.txt"