Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: add ignore_index option in DataFrame.explode #34933

Merged
merged 31 commits into from
Jun 26, 2020
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
de64663
[IMP] - #34932 - added option to ignore_index in DataFrame.explod
Jun 22, 2020
136a568
ENH - #34932 - added docstring ignore_index to explode
Jun 22, 2020
f133297
ENH - #34932 - added type to argument
Jun 22, 2020
4105930
ENH - #34932 - added ignore_index to Series.explode
Jun 22, 2020
02e1266
ENH - #34932 - added ignore_index to docstring Series.explode
Jun 22, 2020
2f80197
ENH - #34932 - added default to argument ignore_index frame
Jun 22, 2020
3e85845
ENH - #34932 - added default to argument ignore_index series
Jun 22, 2020
fd803ac
ENH - #34932 - added test Series explode ignore_index
Jun 22, 2020
4f6a55b
ENH - #34932 - added test DataFrame explode ignore_index
Jun 22, 2020
0a531dd
ENH - #34932 - added ignore_index explode to release notes
Jun 22, 2020
0ba378d
ENH - #34932 - fixed merge conflicts in release note
Jun 22, 2020
951a20a
Merge branch 'master' into explode_ignore_index_34932
erfannariman Jun 22, 2020
9465319
update release notes with suggestions
erfannariman Jun 22, 2020
5c59b54
Added ibase default index instead of reset index
Jun 22, 2020
288644a
Merge branch 'explode_ignore_index_34932' of github.com:erfannariman/…
Jun 22, 2020
a4d28fe
Removed reset_index
Jun 22, 2020
5005f96
Add type to test Series
Jun 22, 2020
b99ad4b
Changes CI black
Jun 22, 2020
47b985e
Removed double line break in docstrings
Jun 22, 2020
92bcb89
removed redundant repetition of line
erfannariman Jun 22, 2020
8551d70
Version added to docstring argument
erfannariman Jun 23, 2020
7537ec0
Removed name from result series
erfannariman Jun 23, 2020
64fa916
Update doc/source/whatsnew/v1.1.0.rst
jreback Jun 24, 2020
3110aae
Added meth refs
erfannariman Jun 24, 2020
dc45b71
Added different approach constructor
erfannariman Jun 24, 2020
5198045
Merge branch 'explode_ignore_index_34932' of github.com:erfannariman/…
erfannariman Jun 24, 2020
cf91f9d
Merge branch 'master' into explode_ignore_index_34932
erfannariman Jun 24, 2020
97d5cb7
Merge branch 'master' into explode_ignore_index_34932
erfannariman Jun 25, 2020
6edb1ab
Fixed whatsnew v1.1.0.rst
erfannariman Jun 26, 2020
6e5a4c3
Fixed change in whatsnew doc
erfannariman Jun 26, 2020
d8a198b
Merge branch 'master' into explode_ignore_index_34932
erfannariman Jun 26, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ Other enhancements
- :meth:`~pandas.io.gbq.read_gbq` now allows to disable progress bar (:issue:`33360`).
- :meth:`~pandas.io.gbq.read_gbq` now supports the ``max_results`` kwarg from ``pandas-gbq`` (:issue:`34639`).
- :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list of dict to change only some specific columns' width (:issue:`28917`).
erfannariman marked this conversation as resolved.
Show resolved Hide resolved
- :meth:`~Series.explode` now accepts ``ignore_index`` to reset the index, similarly to :meth:`pd.concat` or :meth:`DataFrame.sort_values` (:issue:`34932`).

.. ---------------------------------------------------------------------------

Expand Down
11 changes: 9 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6895,7 +6895,9 @@ def stack(self, level=-1, dropna=True):
else:
return stack(self, level, dropna=dropna)

def explode(self, column: Union[str, Tuple]) -> "DataFrame":
def explode(
self, column: Union[str, Tuple], ignore_index: bool = False
) -> "DataFrame":
"""
Transform each element of a list-like to a row, replicating index values.

Expand All @@ -6905,6 +6907,8 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame":
----------
column : str or tuple
Column to explode.
ignore_index : bool, default False
If True, the resulting index will be labeled 0, 1, …, n - 1.
erfannariman marked this conversation as resolved.
Show resolved Hide resolved

Returns
-------
Expand Down Expand Up @@ -6961,7 +6965,10 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame":
assert df is not None # needed for mypy
result = df[column].explode()
result = df.drop([column], axis=1).join(result)
result.index = self.index.take(result.index)
if ignore_index:
result.index = ibase.default_index(len(result))
else:
result.index = self.index.take(result.index)
result = result.reindex(columns=self.columns, copy=False)

return result
Expand Down
17 changes: 13 additions & 4 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3762,12 +3762,17 @@ def reorder_levels(self, order) -> "Series":
result.index = result.index.reorder_levels(order)
return result

def explode(self) -> "Series":
def explode(self, ignore_index: bool = False) -> "Series":
"""
Transform each element of a list-like to a row.

.. versionadded:: 0.25.0

Parameters
----------
ignore_index : bool, default False
If True, the resulting index will be labeled 0, 1, …, n - 1.
erfannariman marked this conversation as resolved.
Show resolved Hide resolved

Returns
-------
Series
Expand Down Expand Up @@ -3814,9 +3819,13 @@ def explode(self) -> "Series":

values, counts = reshape.explode(np.asarray(self.array))

result = self._constructor(
values, index=self.index.repeat(counts), name=self.name
)
if ignore_index:
result = self._constructor(values, index=range(len(values)), name=self.name)
erfannariman marked this conversation as resolved.
Show resolved Hide resolved
else:
result = self._constructor(
erfannariman marked this conversation as resolved.
Show resolved Hide resolved
values, index=self.index.repeat(counts), name=self.name
)

return result

def unstack(self, level=-1, fill_value=None):
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/frame/methods/test_explode.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,13 @@ def test_duplicate_index(input_dict, input_index, expected_dict, expected_index)
result = df.explode("col1")
expected = pd.DataFrame(expected_dict, index=expected_index, dtype=object)
tm.assert_frame_equal(result, expected)


def test_ignore_index():
# GH 34932
df = pd.DataFrame({"id": range(0, 20, 10), "values": [list("ab"), list("cd")]})
result = df.explode("values", ignore_index=True)
expected = pd.DataFrame(
{"id": [0, 0, 10, 10], "values": list("abcd")}, index=[0, 1, 2, 3]
)
tm.assert_frame_equal(result, expected)
8 changes: 8 additions & 0 deletions pandas/tests/series/methods/test_explode.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,11 @@ def test_duplicate_index():
result = s.explode()
expected = pd.Series([1, 2, 3, 4], index=[0, 0, 0, 0], dtype=object)
tm.assert_series_equal(result, expected)


def test_ignore_index():
# GH 34932
s = pd.Series([[1, 2], [3, 4]])
result = s.explode(ignore_index=True)
expected = pd.Series([1, 2, 3, 4], index=[0, 1, 2, 3], dtype=object)
tm.assert_series_equal(result, expected)