From d2efe18a95d0680d7f47766a6f1701dd2172c8bb Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 26 Apr 2017 13:40:37 -0400 Subject: [PATCH] API: Relax is-file-like conditions Previously, we were requiring that all file-like objects had "read," "write," "seek," and "tell" methods, but that was too strict (e.g. read-only buffers). This commit relaxes those requirements to having EITHER "read" or "write" as attributes. Closes gh-16135. --- pandas/core/dtypes/inference.py | 15 ++++--------- pandas/tests/dtypes/test_inference.py | 30 ++++++++++++++++++++++++++ pandas/tests/io/parser/common.py | 20 +++++++++++++++++ pandas/tests/io/parser/test_network.py | 19 ++++++++++++++++ 4 files changed, 73 insertions(+), 11 deletions(-) diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 66f4d87aa8e33..a5316a83612cb 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -142,12 +142,8 @@ def is_file_like(obj): Check if the object is a file-like object. For objects to be considered file-like, they must - be an iterator AND have the following four methods: - - 1) read - 2) write - 3) seek - 4) tell + be an iterator AND have either a `read` and/or `write` + method as an attribute. Note: file-like objects must be iterable, but iterable objects need not be file-like. @@ -172,11 +168,8 @@ def is_file_like(obj): False """ - file_attrs = ('read', 'write', 'seek', 'tell') - - for attr in file_attrs: - if not hasattr(obj, attr): - return False + if not (hasattr(obj, 'read') or hasattr(obj, 'write')): + return False if not is_iterator(obj): return False diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 8dcf75e8a1aec..1d3a956829a3c 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -100,11 +100,41 @@ def test_is_dict_like(): def test_is_file_like(): + class MockFile(object): + pass + is_file = inference.is_file_like data = StringIO("data") assert is_file(data) + # No read / write attributes + # No iterator attributes + m = MockFile() + assert not is_file(m) + + MockFile.write = lambda self: 0 + + # Write attribute but not an iterator + m = MockFile() + assert not is_file(m) + + MockFile.__iter__ = lambda self: self + MockFile.__next__ = lambda self: 0 + MockFile.next = MockFile.__next__ + + # Valid write-only file + m = MockFile() + assert is_file(m) + + del MockFile.write + MockFile.read = lambda self: 0 + + # Valid read-only file + m = MockFile() + assert is_file(m) + + # Iterator but no read / write attributes data = [1, 2, 3] assert not is_file(data) diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index afb23f540264e..e3df02a948080 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -1685,6 +1685,26 @@ class InvalidBuffer(object): with tm.assert_raises_regex(ValueError, msg): self.read_csv(InvalidBuffer()) + # gh-16135: we want to ensure that "tell" and "seek" + # aren't actually being used when we call `read_csv` + # + # Thus, while the object may look "invalid" (these + # methods are attributes of the `StringIO` class), + # it is still a valid file-object for our purposes. + class NoSeekTellBuffer(StringIO): + def tell(self): + raise AttributeError("No tell method") + + def seek(self, pos, whence=0): + raise AttributeError("No seek method") + + data = "a\n1" + + expected = pd.DataFrame({"a": [1]}) + result = self.read_csv(NoSeekTellBuffer(data)) + + tm.assert_frame_equal(result, expected) + if PY3: from unittest import mock diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index b9920983856d4..e3a1b42fd4d45 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -176,3 +176,22 @@ def test_s3_fails(self): # It's irrelevant here that this isn't actually a table. with pytest.raises(IOError): read_csv('s3://cant_get_it/') + + @tm.network + def boto3_client_s3(self): + # see gh-16135 + + # boto3 is a dependency of s3fs + import boto3 + client = boto3.client("s3") + + key = "/tips.csv" + bucket = "pandas-test" + s3_object = client.get_object(Bucket=bucket, Key=key) + + result = read_csv(s3_object["Body"]) + assert isinstance(result, DataFrame) + assert not result.empty + + expected = read_csv(tm.get_data_path('tips.csv')) + tm.assert_frame_equal(result, expected)