-
Notifications
You must be signed in to change notification settings - Fork 651
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
TEST-#6016: make sure eval_general
doesn't expect exceptions by default
#6954
Changes from 60 commits
1bcb09e
22207df
b281235
c014163
a5db6a3
2971259
cb1f705
d52e6b9
4d2c028
c9122d4
46cfb38
8eb878b
27e1444
0e7e206
5ec5984
74f3ebf
a40b71f
93af135
e7e30f0
54910b4
d0a0867
d5ae03c
73a0d69
04ecf14
89327bc
8b59bcb
4af874e
3693b60
f9ec678
08f0a28
023cf96
12c8337
0f9172b
728987d
d65bd21
7fb4ba1
779c7f8
b6fb0cc
7d2f6cb
999ce44
47a886c
a64e5d1
5f00068
d03ebc6
2d5e139
abf89fb
65db771
9baeb87
760f040
3937854
b16edab
def475e
2fe8086
925b4ce
af29f5b
cd04805
d3ce0c2
6abeba7
6bcd51d
9a340ec
46c4abe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -457,7 +457,10 @@ def _read_csv_check_support( | |||||
+ "'infer' header values", | ||||||
) | ||||||
if isinstance(parse_dates, list) and not set(parse_dates).issubset(names): | ||||||
raise ValueError("Missing column provided to 'parse_dates'") | ||||||
missed_columns = set(parse_dates) - set(names) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
raise ValueError( | ||||||
f"Missing column provided to 'parse_dates': '{', '.join(missed_columns)}'" | ||||||
) | ||||||
|
||||||
empty_pandas_df = pandas.read_csv( | ||||||
**dict( | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -296,6 +296,7 @@ def test_read_csv_datetime( | |
engine, | ||
parse_dates, | ||
names, | ||
request, | ||
): | ||
parse_dates_unsupported = isinstance(parse_dates, dict) or ( | ||
isinstance(parse_dates, list) | ||
|
@@ -311,9 +312,27 @@ def test_read_csv_datetime( | |
skip_exc_type_check = parse_dates_unsupported and engine == "arrow" | ||
if skip_exc_type_check: | ||
pytest.xfail(reason="https://github.com/modin-project/modin/issues/7012") | ||
|
||
raising_exceptions = None | ||
if "names1-parse_dates2" in request.node.callspec.id: | ||
raising_exceptions = ValueError( | ||
"Missing column provided to 'parse_dates': 'col2'" | ||
) | ||
elif ( | ||
"names1-parse_dates5-None" in request.node.callspec.id | ||
or "names1-parse_dates4-None" in request.node.callspec.id | ||
): | ||
raising_exceptions = ValueError( | ||
"Missing column provided to 'parse_dates': 'col2, col3'" | ||
) | ||
elif "None-parse_dates3" in request.node.callspec.id: | ||
raising_exceptions = ValueError( | ||
"Missing column provided to 'parse_dates': 'c2'" | ||
) | ||
eval_io( | ||
fn_name="read_csv", | ||
md_extra_kwargs={"engine": engine}, | ||
raising_exceptions=raising_exceptions, | ||
# read_csv kwargs | ||
filepath_or_buffer=pytest.csvs_names["test_read_csv_regular"], | ||
parse_dates=parse_dates, | ||
|
@@ -509,7 +528,7 @@ def test_dup_names(self, names): | |
["i1", "i2", "a"], | ||
], | ||
) | ||
def test_reset_index(self, names): | ||
def test_reset_index(self, names, request): | ||
index = pandas.MultiIndex.from_tuples( | ||
[(i, j, k) for i in range(2) for j in range(3) for k in range(4)], | ||
names=names, | ||
|
@@ -519,7 +538,12 @@ def applier(lib): | |
df = lib.DataFrame(self.data, index=index) + 1 | ||
return df.reset_index() | ||
|
||
eval_general(pd, pandas, applier) | ||
raising_exceptions = None | ||
if "names3" in request.node.callspec.id: | ||
raising_exceptions = ValueError("cannot insert i1, already exists") | ||
elif "names4" in request.node.callspec.id: | ||
raising_exceptions = ValueError("cannot insert a, already exists") | ||
eval_general(pd, pandas, applier, raising_exceptions=raising_exceptions) | ||
|
||
@pytest.mark.parametrize("is_multiindex", [True, False]) | ||
def test_reset_index_multicolumns(self, is_multiindex): | ||
|
@@ -1327,7 +1351,7 @@ def groupby(df, **kwargs): | |
@pytest.mark.parametrize("n", [10, -10]) | ||
@pytest.mark.parametrize("invert", [True, False]) | ||
@pytest.mark.parametrize("select", [True, False]) | ||
@pytest.mark.parametrize("ascending", [None, True, False]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pandas no longer uses this value as default. |
||
@pytest.mark.parametrize("ascending", [True, False]) | ||
def test_head_tail(self, op, n, invert, select, ascending): | ||
def head(df, **kwargs): | ||
if invert: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,7 +49,7 @@ | |
@pytest.mark.parametrize( | ||
"other", | ||
[ | ||
lambda df: 4, | ||
lambda df, axis: 4, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lambda was used incorrectly. |
||
lambda df, axis: df.iloc[0] if axis == "columns" else list(df[df.columns[0]]), | ||
lambda df, axis: { | ||
label: idx + 1 | ||
|
@@ -114,14 +114,19 @@ def test___rdivmod__(): | |
*("truediv", "rtruediv", "mul", "rmul", "floordiv", "rfloordiv"), | ||
], | ||
) | ||
def test_math_functions_fill_value(other, fill_value, op): | ||
def test_math_functions_fill_value(other, fill_value, op, request): | ||
data = test_data["int_data"] | ||
modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data) | ||
|
||
raising_exceptions = None | ||
if "check_different_index" in request.node.callspec.id and fill_value == 3.0: | ||
raising_exceptions = NotImplementedError("fill_value 3.0 not supported.") | ||
|
||
eval_general( | ||
modin_df, | ||
pandas_df, | ||
lambda df: getattr(df, op)(other(df), axis=0, fill_value=fill_value), | ||
raising_exceptions=raising_exceptions, | ||
# This test causes an empty slice to be generated thus triggering: | ||
# https://github.com/modin-project/modin/issues/5974 | ||
comparator_kwargs={"check_dtypes": get_current_execution() != "BaseOnPython"}, | ||
|
@@ -178,7 +183,7 @@ def test_math_alias(math_op, alias): | |
@pytest.mark.parametrize("other", ["as_left", 4, 4.0, "a"]) | ||
@pytest.mark.parametrize("op", ["eq", "ge", "gt", "le", "lt", "ne"]) | ||
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) | ||
def test_comparison(data, op, other): | ||
def test_comparison(data, op, other, request): | ||
def operation(df): | ||
df = getattr(df, op)(df if other == "as_left" else other) | ||
if other == "as_left" and StorageFormat.get() == "Hdk": | ||
|
@@ -187,9 +192,20 @@ def operation(df): | |
df = df.sort_index(axis=1) | ||
return df | ||
|
||
raising_exceptions = None | ||
if "int_data" in request.node.callspec.id and other == "a": | ||
pytest.xfail(reason="https://github.com/modin-project/modin/issues/7019") | ||
elif "float_nan_data" in request.node.callspec.id and other == "a": | ||
raising_exceptions = TypeError( | ||
"Invalid comparison between dtype=float64 and str" | ||
) | ||
if StorageFormat.get() == "Hdk": | ||
pytest.xfail(reason="https://github.com/modin-project/modin/issues/7019") | ||
|
||
eval_general( | ||
*create_test_dfs(data), | ||
operation=operation, | ||
raising_exceptions=raising_exceptions, | ||
) | ||
|
||
|
||
|
@@ -319,7 +335,9 @@ def test_equals_with_nans(): | |
@pytest.mark.parametrize( | ||
"is_idx_aligned", [True, False], ids=["idx_aligned", "idx_not_aligned"] | ||
) | ||
def test_mismatched_row_partitions(is_idx_aligned, op_type, is_more_other_partitions): | ||
def test_mismatched_row_partitions( | ||
is_idx_aligned, op_type, is_more_other_partitions, request | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is request added? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. removed |
||
): | ||
data = [0, 1, 2, 3, 4, 5] | ||
modin_df1, pandas_df1 = create_test_dfs({"a": data, "b": data}) | ||
modin_df, pandas_df = modin_df1.loc[:2], pandas_df1.loc[:2] | ||
|
@@ -344,6 +362,9 @@ def test_mismatched_row_partitions(is_idx_aligned, op_type, is_more_other_partit | |
lambda df: ( | ||
df / modin_df1.a if isinstance(df, pd.DataFrame) else df / pandas_df1.a | ||
), | ||
raising_exceptions=ValueError( | ||
"cannot reindex on an axis with duplicate labels" | ||
), | ||
) | ||
return | ||
|
||
|
@@ -492,13 +513,40 @@ def test_non_commutative_multiply(): | |
pytest.param(3.5, id="float scalar"), | ||
], | ||
) | ||
def test_arithmetic_with_tricky_dtypes(val1, val2, op): | ||
def test_arithmetic_with_tricky_dtypes(val1, val2, op, request): | ||
modin_df1, pandas_df1 = create_test_dfs(val1) | ||
modin_df2, pandas_df2 = ( | ||
create_test_dfs(val2) if isinstance(val2, list) else (val2, val2) | ||
) | ||
|
||
raising_exceptions = None | ||
if ( | ||
"bool-bool" in request.node.callspec.id | ||
or "bool scalar-bool" in request.node.callspec.id | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if we are going to proceed using request.* to identify the exact errors and how difficult it is for developers to find the exact cases for those errors using request.*? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Well, this is quite inconvenient, but this is the price we need to pay in order to be more confident that what is actually being tested is what is intended. I would also like to note that to avoid this inconvenience, when writing a test, we need to separate test cases that should work correctly from those that should not. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes, separating tests would probably make sense here. Matching error-prone parameters with exceptions would help I think. |
||
) and op in [ | ||
"pow", | ||
"rpow", | ||
"truediv", | ||
"rtruediv", | ||
"floordiv", | ||
"rfloordiv", | ||
]: | ||
op_name = op[1:] if op.startswith("r") else op | ||
raising_exceptions = NotImplementedError( | ||
f"operator '{op_name}' not implemented for bool dtypes" | ||
) | ||
elif ( | ||
"bool-bool" in request.node.callspec.id | ||
or "bool scalar-bool" in request.node.callspec.id | ||
) and op in ["sub", "rsub"]: | ||
raising_exceptions = TypeError( | ||
"numpy boolean subtract, the `-` operator, is not supported, " | ||
+ "use the bitwise_xor, the `^` operator, or the logical_xor function instead." | ||
) | ||
|
||
eval_general( | ||
(modin_df1, modin_df2), | ||
(pandas_df1, pandas_df2), | ||
lambda dfs: getattr(dfs[0], op)(dfs[1]), | ||
raising_exceptions=raising_exceptions, | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just for testing purpose. Need to revert.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
reverted