diff --git a/pandas/core/strings.py b/pandas/core/strings.py index abef6f6086dbd6..9614641aa1abf9 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1423,6 +1423,10 @@ def cons_row(x): return [x] result = [cons_row(x) for x in result] + if result: + # propogate nan values to match longest sequence (GH 18450) + max_len = max(len(x) for x in result) + result = [x * max_len if x[0] is np.nan else x for x in result] if not isinstance(expand, bool): raise ValueError("expand must be True or False") diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index f1b97081b6d93a..9a5c0abf9e92fa 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -2002,6 +2002,12 @@ def test_split_to_dataframe(self): 5: [NA, 'not']}) tm.assert_frame_equal(result, exp) + # make sure we propogate NaN values across all columns + s = Series(["foo,bar,baz", NA]) + result = s.str.split(",", expand=True) + exp = DataFrame([["foo", "bar", "baz"], [NA, NA, NA]]) + tm.assert_frame_equal(result, exp) + s = Series(['some_splits', 'with_index'], index=['preserve', 'me']) result = s.str.split('_', expand=True) exp = DataFrame({0: ['some', 'with'], 1: ['splits', 'index']},