-
-
Notifications
You must be signed in to change notification settings - Fork 17.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
PERF/BUG: reimplement MultiIndex.remove_unused_levels #16565
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2515,7 +2515,34 @@ def test_reconstruct_remove_unused(self): | |
# idempotent | ||
result2 = result.remove_unused_levels() | ||
tm.assert_index_equal(result2, expected) | ||
assert result2 is result | ||
assert result2.is_(result) | ||
|
||
@pytest.mark.parametrize('first_type,second_type', [ | ||
('int64', 'int64'), | ||
('datetime64[D]', 'str')]) | ||
def test_remove_unused_levels_large(self, first_type, second_type): | ||
# GH16556 | ||
|
||
# because tests should be deterministic (and this test in particular | ||
# checks that levels are removed, which is not the case for every | ||
# random input): | ||
rng = np.random.RandomState(4) # seed is arbitrary value that works | ||
|
||
size = 1 << 16 | ||
df = DataFrame(dict( | ||
first=rng.randint(0, 1 << 13, size).astype(first_type), | ||
second=rng.randint(0, 1 << 10, size).astype(second_type), | ||
third=rng.rand(size))) | ||
df = df.groupby(['first', 'second']).sum() | ||
df = df[df.third < 0.1] | ||
|
||
result = df.index.remove_unused_levels() | ||
assert len(result.levels[0]) < len(df.index.levels[0]) | ||
assert len(result.levels[1]) < len(df.index.levels[1]) | ||
assert result.equals(df.index) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also tests with different things for the levels, e.g. add another example with dates & strings |
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also construct that this IS equal to |
||
expected = df.reset_index().set_index(['first', 'second']).index | ||
tm.assert_index_equal(result, expected) | ||
|
||
def test_isin(self): | ||
values = [('foo', 2), ('bar', 3), ('quux', 4)] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yep!