Skip to content

Commit

Permalink
API: Relax is-file-like conditions (#16150)
Browse files Browse the repository at this point in the history
Previously, we were requiring that
all file-like objects had "read,"
"write," "seek," and "tell" methods,
but that was too strict (e.g. read-only
buffers). This commit relaxes those
requirements to having EITHER "read"
or "write" as attributes.

Closes gh-16135.
  • Loading branch information
gfyoung authored and jreback committed Apr 27, 2017
1 parent 2d9909c commit a16fc8d
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 11 deletions.
15 changes: 4 additions & 11 deletions pandas/core/dtypes/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,12 +142,8 @@ def is_file_like(obj):
Check if the object is a file-like object.
For objects to be considered file-like, they must
be an iterator AND have the following four methods:
1) read
2) write
3) seek
4) tell
be an iterator AND have either a `read` and/or `write`
method as an attribute.
Note: file-like objects must be iterable, but
iterable objects need not be file-like.
Expand All @@ -172,11 +168,8 @@ def is_file_like(obj):
False
"""

file_attrs = ('read', 'write', 'seek', 'tell')

for attr in file_attrs:
if not hasattr(obj, attr):
return False
if not (hasattr(obj, 'read') or hasattr(obj, 'write')):
return False

if not is_iterator(obj):
return False
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/dtypes/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,41 @@ def test_is_dict_like():


def test_is_file_like():
class MockFile(object):
pass

is_file = inference.is_file_like

data = StringIO("data")
assert is_file(data)

# No read / write attributes
# No iterator attributes
m = MockFile()
assert not is_file(m)

MockFile.write = lambda self: 0

# Write attribute but not an iterator
m = MockFile()
assert not is_file(m)

MockFile.__iter__ = lambda self: self
MockFile.__next__ = lambda self: 0
MockFile.next = MockFile.__next__

# Valid write-only file
m = MockFile()
assert is_file(m)

del MockFile.write
MockFile.read = lambda self: 0

# Valid read-only file
m = MockFile()
assert is_file(m)

# Iterator but no read / write attributes
data = [1, 2, 3]
assert not is_file(data)

Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/io/parser/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1685,6 +1685,26 @@ class InvalidBuffer(object):
with tm.assert_raises_regex(ValueError, msg):
self.read_csv(InvalidBuffer())

# gh-16135: we want to ensure that "tell" and "seek"
# aren't actually being used when we call `read_csv`
#
# Thus, while the object may look "invalid" (these
# methods are attributes of the `StringIO` class),
# it is still a valid file-object for our purposes.
class NoSeekTellBuffer(StringIO):
def tell(self):
raise AttributeError("No tell method")

def seek(self, pos, whence=0):
raise AttributeError("No seek method")

data = "a\n1"

expected = pd.DataFrame({"a": [1]})
result = self.read_csv(NoSeekTellBuffer(data))

tm.assert_frame_equal(result, expected)

if PY3:
from unittest import mock

Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/io/parser/test_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,22 @@ def test_s3_fails(self):
# It's irrelevant here that this isn't actually a table.
with pytest.raises(IOError):
read_csv('s3://cant_get_it/')

@tm.network
def boto3_client_s3(self):
# see gh-16135

# boto3 is a dependency of s3fs
import boto3
client = boto3.client("s3")

key = "/tips.csv"
bucket = "pandas-test"
s3_object = client.get_object(Bucket=bucket, Key=key)

result = read_csv(s3_object["Body"])
assert isinstance(result, DataFrame)
assert not result.empty

expected = read_csv(tm.get_data_path('tips.csv'))
tm.assert_frame_equal(result, expected)

0 comments on commit a16fc8d

Please sign in to comment.