Skip to content

Commit

Permalink
[python-package] make a shallow copy when replacing categorical featu…
Browse files Browse the repository at this point in the history
…res with codes (fixes #4596) (#5225)
  • Loading branch information
jmoralez authored May 22, 2022
1 parent b077415 commit c000b8c
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
2 changes: 1 addition & 1 deletion python-package/lightgbm/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,7 @@ def _data_from_pandas(data, feature_name, categorical_feature, pandas_categorica
if list(data[col].cat.categories) != list(category):
data[col] = data[col].cat.set_categories(category)
if len(cat_cols): # cat_cols is list
data = data.copy() # not alter origin DataFrame
data = data.copy(deep=False) # not alter origin DataFrame
data[cat_cols] = data[cat_cols].apply(lambda x: x.cat.codes).replace({-1: np.nan})
if categorical_feature is not None:
if feature_name is None:
Expand Down
11 changes: 11 additions & 0 deletions tests/python_package_test/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,17 @@ def test_no_copy_when_single_float_dtype_dataframe(dtype):
assert np.shares_memory(X, built_data)


def test_categorical_code_conversion_doesnt_modify_original_data():
pd = pytest.importorskip('pandas')
X = np.random.choice(['a', 'b'], 100).reshape(-1, 1)
df = pd.DataFrame(X.copy(), columns=['x1'], dtype='category')
data = lgb.basic._data_from_pandas(df, ['x1'], None, None)[0]
# check that the original data wasn't modified
np.testing.assert_equal(df['x1'], X[:, 0])
# check that the built data has the codes
np.testing.assert_equal(df['x1'].cat.codes, data[:, 0])


@pytest.mark.parametrize('min_data_in_bin', [2, 10])
def test_feature_num_bin(min_data_in_bin):
X = np.vstack([
Expand Down

0 comments on commit c000b8c

Please sign in to comment.