Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API: Relax is-file-like conditions #16150

Merged
merged 1 commit into from
Apr 27, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 4 additions & 11 deletions pandas/core/dtypes/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,12 +142,8 @@ def is_file_like(obj):
Check if the object is a file-like object.

For objects to be considered file-like, they must
be an iterator AND have the following four methods:

1) read
2) write
3) seek
4) tell
be an iterator AND have either a `read` and/or `write`
method as an attribute.

Note: file-like objects must be iterable, but
iterable objects need not be file-like.
Expand All @@ -172,11 +168,8 @@ def is_file_like(obj):
False
"""

file_attrs = ('read', 'write', 'seek', 'tell')

for attr in file_attrs:
if not hasattr(obj, attr):
return False
if not (hasattr(obj, 'read') or hasattr(obj, 'write')):
return False

if not is_iterator(obj):
return False
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/dtypes/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,41 @@ def test_is_dict_like():


def test_is_file_like():
class MockFile(object):
pass

is_file = inference.is_file_like

data = StringIO("data")
assert is_file(data)

# No read / write attributes
# No iterator attributes
m = MockFile()
assert not is_file(m)

MockFile.write = lambda self: 0

# Write attribute but not an iterator
m = MockFile()
assert not is_file(m)

MockFile.__iter__ = lambda self: self
MockFile.__next__ = lambda self: 0
MockFile.next = MockFile.__next__

# Valid write-only file
m = MockFile()
assert is_file(m)

del MockFile.write
MockFile.read = lambda self: 0

# Valid read-only file
m = MockFile()
assert is_file(m)

# Iterator but no read / write attributes
data = [1, 2, 3]
assert not is_file(data)

Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/io/parser/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1685,6 +1685,26 @@ class InvalidBuffer(object):
with tm.assert_raises_regex(ValueError, msg):
self.read_csv(InvalidBuffer())

# gh-16135: we want to ensure that "tell" and "seek"
# aren't actually being used when we call `read_csv`
#
# Thus, while the object may look "invalid" (these
# methods are attributes of the `StringIO` class),
# it is still a valid file-object for our purposes.
class NoSeekTellBuffer(StringIO):
def tell(self):
raise AttributeError("No tell method")

def seek(self, pos, whence=0):
raise AttributeError("No seek method")

data = "a\n1"

expected = pd.DataFrame({"a": [1]})
result = self.read_csv(NoSeekTellBuffer(data))

tm.assert_frame_equal(result, expected)

if PY3:
from unittest import mock

Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/io/parser/test_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,22 @@ def test_s3_fails(self):
# It's irrelevant here that this isn't actually a table.
with pytest.raises(IOError):
read_csv('s3://cant_get_it/')

@tm.network
def boto3_client_s3(self):
# see gh-16135

# boto3 is a dependency of s3fs
import boto3
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we have a skip if s3fs is not installed?

Copy link
Member Author

@gfyoung gfyoung Apr 27, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Absolutely. There's a setUp method in this test class which skips on that exact condition.

client = boto3.client("s3")

key = "/tips.csv"
bucket = "pandas-test"
s3_object = client.get_object(Bucket=bucket, Key=key)

result = read_csv(s3_object["Body"])
assert isinstance(result, DataFrame)
assert not result.empty

expected = read_csv(tm.get_data_path('tips.csv'))
tm.assert_frame_equal(result, expected)