Skip to content

Commit

Permalink
Propogating NaN values when using str.split (#18450)
Browse files Browse the repository at this point in the history
  • Loading branch information
WillAyd committed Nov 24, 2017
1 parent e6eac0b commit d64995a
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 0 deletions.
4 changes: 4 additions & 0 deletions pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1423,6 +1423,10 @@ def cons_row(x):
return [x]

result = [cons_row(x) for x in result]
if result:
# propogate nan values to match longest sequence (GH 18450)
max_len = max(len(x) for x in result)
result = [x * max_len if x[0] is np.nan else x for x in result]

if not isinstance(expand, bool):
raise ValueError("expand must be True or False")
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2002,6 +2002,12 @@ def test_split_to_dataframe(self):
5: [NA, 'not']})
tm.assert_frame_equal(result, exp)

# make sure we propogate NaN values across all columns
s = Series(["foo,bar,baz", NA])
result = s.str.split(",", expand=True)
exp = DataFrame([["foo", "bar", "baz"], [NA, NA, NA]])
tm.assert_frame_equal(result, exp)

s = Series(['some_splits', 'with_index'], index=['preserve', 'me'])
result = s.str.split('_', expand=True)
exp = DataFrame({0: ['some', 'with'], 1: ['splits', 'index']},
Expand Down

0 comments on commit d64995a

Please sign in to comment.