-
-
Notifications
You must be signed in to change notification settings - Fork 17.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[#19431] Regression in make_block_same_class (tests failing for new fastparquet release) #19434
Changes from 20 commits
eb55928
2f4fc07
e18f8e6
9a07d97
3d9810d
ee75fdf
68d6324
985081d
0cfcd37
0c4a6d7
6ce68cf
f414743
0d76fe7
bb95dc6
d9a2e2a
97b17e9
800b741
52220b9
c602a76
ddbbde3
6e6b5f0
326394f
77422ba
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -224,12 +224,17 @@ def make_block_scalar(self, values): | |
""" | ||
return ScalarBlock(values) | ||
|
||
def make_block_same_class(self, values, placement=None, ndim=None): | ||
def make_block_same_class(self, values, placement=None, ndim=None, | ||
dtype=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. deprecate dtype here (add as a FutureWarning) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
""" Wrap given values in a block of same type as self. """ | ||
if dtype is not None: | ||
# issue 19431 fastparquet is passing this | ||
warnings.warn("dtype argument is deprecated, will be removed " | ||
"in a future release.", FutureWarning) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's make this a DeprecationWarning instead of FutureWarning, as it is typically only something developers need to see, not users. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. post an issue on fastparquet to fix this as well There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let’s leave this as a FutureWarning There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can use the issue I opened yesterday: dask/fastparquet#297 FutureWarning will just be annoying for users in this case, and I am confident fastparquet will change that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if it’s changed then we can simply move the bar on fp min version and this is no problem There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we are not keeping private API around for external packages There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. gents, which is it? FutureWarning or DeprecationWarning? it seems to me that DeprecationWarning is meant to be used here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pls leave it as FutureWarning There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
if placement is None: | ||
placement = self.mgr_locs | ||
return make_block(values, placement=placement, ndim=ndim, | ||
klass=self.__class__) | ||
klass=self.__class__, dtype=dtype) | ||
|
||
def __unicode__(self): | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -285,6 +285,14 @@ def test_delete(self): | |
with pytest.raises(Exception): | ||
newb.delete(3) | ||
|
||
def test_make_block_same_class(self): | ||
block = create_block('M8[ns, US/Eastern]', [3]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add the issue number as a comment There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
with pytest.warns(FutureWarning): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good point. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
copy = block.make_block_same_class(block.values, | ||
dtype=block.values.dtype) | ||
assert block.dtype == copy.dtype | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you don't need these asserts, just running this is enough There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
assert block.__class__ == copy.__class__ | ||
|
||
|
||
class TestDatetimeBlock(object): | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -71,6 +71,15 @@ def fp(): | |
return 'fastparquet' | ||
|
||
|
||
@pytest.fixture | ||
def fp_lt_014(): | ||
if not _HAVE_FASTPARQUET: | ||
pytest.skip("fastparquet is not installed") | ||
if LooseVersion(fastparquet.__version__) >= LooseVersion('0.1.4'): | ||
pytest.skip("fastparquet is >= 0.1.4") | ||
return 'fastparquet' | ||
|
||
|
||
@pytest.fixture | ||
def df_compat(): | ||
return pd.DataFrame({'A': [1, 2, 3], 'B': 'foo'}) | ||
|
@@ -448,9 +457,11 @@ class TestParquetFastParquet(Base): | |
def test_basic(self, fp, df_full): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. make 2 tests here, one for < 0.1.4 and 1 for > |
||
df = df_full | ||
|
||
# additional supported types for fastparquet | ||
# additional supported types for fastparquet>=0.1.4 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can leave out the ">=0.1.4" as the timedelta is not specific for the newer versions |
||
if LooseVersion(fastparquet.__version__) >= LooseVersion('0.1.4'): | ||
df['datetime_tz'] = pd.date_range('20130101', periods=3, | ||
tz='US/Eastern') | ||
df['timedelta'] = pd.timedelta_range('1 day', periods=3) | ||
|
||
check_round_trip(df, fp) | ||
|
||
@pytest.mark.skip(reason="not supported") | ||
|
@@ -482,14 +493,15 @@ def test_categorical(self, fp): | |
df = pd.DataFrame({'a': pd.Categorical(list('abc'))}) | ||
check_round_trip(df, fp) | ||
|
||
def test_datetime_tz(self, fp): | ||
# doesn't preserve tz | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same need 2 tests (1 with each fixture) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pyarrow is using similar style. there are already two tests for datetime_tz, one < 0.1.4 one >. |
||
def test_datetime_tz(self, fp_lt_014): | ||
|
||
# fastparquet<0.1.4 doesn't preserve tz | ||
df = pd.DataFrame({'a': pd.date_range('20130101', periods=3, | ||
tz='US/Eastern')}) | ||
|
||
# warns on the coercion | ||
with catch_warnings(record=True): | ||
check_round_trip(df, fp, expected=df.astype('datetime64[ns]')) | ||
check_round_trip(df, fp_lt_014, | ||
expected=df.astype('datetime64[ns]')) | ||
|
||
def test_filter_row_groups(self, fp): | ||
d = {'a': list(range(0, 3))} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this needs to change. you can't say 'now supports' there is no context here. Simply remove the fp statement (or qualify it with fp >1.4)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done