diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index e31396b0cb8ff..e323dd613b3c7 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -90,6 +90,7 @@ I/O - Bug in parsing integer datetime-like columns with specified format in ``read_sql`` (:issue:`17855`). - Bug in :meth:`DataFrame.to_msgpack` when serializing data of the numpy.bool_ datatype (:issue:`18390`) - Bug in :func:`read_json` not decoding when reading line deliminted JSON from S3 (:issue:`17200`) +- Bug in :func:`pandas.io.json.json_normalize` to avoid modification of ``meta`` (:issue:`18610`) Plotting diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index e811dd1eab142..23d2f730d070c 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -181,7 +181,7 @@ def _pull_field(js, spec): return result - if isinstance(data, list) and len(data) is 0: + if isinstance(data, list) and not data: return DataFrame() # A bit of a hackjob @@ -207,9 +207,7 @@ def _pull_field(js, spec): elif not isinstance(meta, list): meta = [meta] - for i, x in enumerate(meta): - if not isinstance(x, list): - meta[i] = [x] + meta = [m if isinstance(m, list) else [m] for m in meta] # Disastrously inefficient for now records = [] diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 49b765b18d623..1cceae32cd748 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -173,6 +173,21 @@ def test_meta_name_conflict(self): for val in ['metafoo', 'metabar', 'foo', 'bar']: assert val in result + def test_meta_parameter_not_modified(self): + # GH 18610 + data = [{'foo': 'hello', + 'bar': 'there', + 'data': [{'foo': 'something', 'bar': 'else'}, + {'foo': 'something2', 'bar': 'else2'}]}] + + COLUMNS = ['foo', 'bar'] + result = json_normalize(data, 'data', meta=COLUMNS, + meta_prefix='meta') + + assert COLUMNS == ['foo', 'bar'] + for val in ['metafoo', 'metabar', 'foo', 'bar']: + assert val in result + def test_record_prefix(self, state_data): result = json_normalize(state_data[0], 'counties') expected = DataFrame(state_data[0]['counties'])