Skip to content

Commit

Permalink
TST: Document and test na_filter in read_excel (pandas-dev#29171)
Browse files Browse the repository at this point in the history
  • Loading branch information
gfyoung authored and WillAyd committed Oct 23, 2019
1 parent 0e60bc9 commit bc020f6
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 2 deletions.
20 changes: 18 additions & 2 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,24 @@
+ fill("', '".join(sorted(_NA_VALUES)), 70, subsequent_indent=" ")
+ """'.
keep_default_na : bool, default True
If na_values are specified and keep_default_na is False the default NaN
values are overridden, otherwise they're appended to.
Whether or not to include the default NaN values when parsing the data.
Depending on whether `na_values` is passed in, the behavior is as follows:
* If `keep_default_na` is True, and `na_values` are specified, `na_values`
is appended to the default NaN values used for parsing.
* If `keep_default_na` is True, and `na_values` are not specified, only
the default NaN values are used for parsing.
* If `keep_default_na` is False, and `na_values` are specified, only
the NaN values specified `na_values` are used for parsing.
* If `keep_default_na` is False, and `na_values` are not specified, no
strings will be parsed as NaN.
Note that if `na_filter` is passed in as False, the `keep_default_na` and
`na_values` parameters will be ignored.
na_filter : bool, default True
Detect missing value markers (empty strings and the value of na_values). In
data without any NAs, passing na_filter=False can improve the performance
of reading a large file.
verbose : bool, default False
Indicate number of NA values placed in non-numeric columns.
parse_dates : bool, list-like, or dict, default False
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,6 +870,27 @@ def test_excel_passes_na(self, read_ext):
)
tm.assert_frame_equal(parsed, expected)

@pytest.mark.parametrize("na_filter", [None, True, False])
def test_excel_passes_na_filter(self, read_ext, na_filter):
# gh-25453
kwargs = {}

if na_filter is not None:
kwargs["na_filter"] = na_filter

with pd.ExcelFile("test5" + read_ext) as excel:
parsed = pd.read_excel(
excel, "Sheet1", keep_default_na=True, na_values=["apple"], **kwargs
)

if na_filter is False:
expected = [["1.#QNAN"], [1], ["nan"], ["apple"], ["rabbit"]]
else:
expected = [[np.nan], [1], [np.nan], [np.nan], ["rabbit"]]

expected = DataFrame(expected, columns=["Test"])
tm.assert_frame_equal(parsed, expected)

@pytest.mark.parametrize("arg", ["sheet", "sheetname", "parse_cols"])
def test_unexpected_kwargs_raises(self, read_ext, arg):
# gh-17964
Expand Down

0 comments on commit bc020f6

Please sign in to comment.