From 9f2af4248f7212f512da977da2cbacd98d30d931 Mon Sep 17 00:00:00 2001 From: Simon Gibbons Date: Thu, 5 Oct 2017 06:52:56 +0100 Subject: [PATCH 1/6] ENH: Add tranparent compression to json reading/writing This works in the same way as the argument to ``read_csv`` and ``to_csv``. I've added tests confirming that it works with both file paths, as well and file URLs and S3 URLs. --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/generic.py | 10 +- pandas/io/json/json.py | 49 ++++-- .../tests/io/json/data/tsframe_v012.json.zip | Bin 0 -> 436 bytes pandas/tests/io/json/test_compression.py | 155 ++++++++++++++++++ pandas/tests/io/json/test_readlines.py | 2 +- 6 files changed, 198 insertions(+), 20 deletions(-) create mode 100644 pandas/tests/io/json/data/tsframe_v012.json.zip create mode 100644 pandas/tests/io/json/test_compression.py diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 7fbf2533428dc..22c40c36853dd 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -195,7 +195,7 @@ Other Enhancements - :func:`read_json` now accepts a ``chunksize`` parameter that can be used when ``lines=True``. If ``chunksize`` is passed, read_json now returns an iterator which reads in ``chunksize`` lines with each iteration. (:issue:`17048`) - :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names - Improved the import time of pandas by about 2.25x (:issue:`16764`) - +- :func:`read_json` and :func:`to_json` now accept a ``compression`` argument which allows them to transparently handled compressed files. (:issue:`XXXXXXX`) .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 942a9ff279092..c7ae9bbee9013 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1258,7 +1258,7 @@ def _repr_latex_(self): def to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=10, force_ascii=True, date_unit='ms', - default_handler=None, lines=False): + default_handler=None, lines=False, compression=None): """ Convert the object to a JSON string. @@ -1320,6 +1320,12 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, .. versionadded:: 0.19.0 + compression : {None, 'gzip', 'bz2', 'xz'} + A string representing the compression to use in the output file, + only used when the first argument is a filename + + .. versionadded:: 0.21.0 + Returns ------- same type as input object with filtered info axis @@ -1372,7 +1378,7 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=double_precision, force_ascii=force_ascii, date_unit=date_unit, default_handler=default_handler, - lines=lines) + lines=lines, compression=compression) def to_hdf(self, path_or_buf, key, **kwargs): """Write the contained data to an HDF5 file using HDFStore. diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index ab74b265b6a06..416059f3474f6 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -5,11 +5,12 @@ import pandas._libs.json as json from pandas._libs.tslib import iNaT -from pandas.compat import StringIO, long, u +from pandas.compat import PY3, StringIO, long, u from pandas import compat, isna from pandas import Series, DataFrame, to_datetime, MultiIndex from pandas.io.common import (get_filepath_or_buffer, _get_handle, - _stringify_path, BaseIterator) + _infer_compression, _stringify_path, + BaseIterator) from pandas.io.parsers import _validate_integer from pandas.core.common import AbstractMethodError from pandas.core.reshape.concat import concat @@ -27,7 +28,7 @@ # interface to/from def to_json(path_or_buf, obj, orient=None, date_format='epoch', double_precision=10, force_ascii=True, date_unit='ms', - default_handler=None, lines=False): + default_handler=None, lines=False, compression=None): path_or_buf = _stringify_path(path_or_buf) if lines and orient != 'records': @@ -54,8 +55,11 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', s = _convert_to_line_delimits(s) if isinstance(path_or_buf, compat.string_types): - with open(path_or_buf, 'w') as fh: + fh, handles = _get_handle(path_or_buf, 'w', compression=compression) + try: fh.write(s) + finally: + fh.close() elif path_or_buf is None: return s else: @@ -178,7 +182,7 @@ def write(self): def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, convert_axes=True, convert_dates=True, keep_default_dates=True, numpy=False, precise_float=False, date_unit=None, encoding=None, - lines=False, chunksize=None): + lines=False, chunksize=None, compression='infer'): """ Convert a JSON string to pandas object @@ -277,6 +281,15 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, .. versionadded:: 0.21.0 + compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer' + For on-the-fly decompression of on-disk data. If 'infer', then use + gzip, bz2, zip or xz if path_or_buf is a string ending in + '.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression + otherwise. If using 'zip', the ZIP file must contain only one data + file to be read in. Set to None for no decompression. + + .. versionadded:: 0.21.0 + Returns ------- result : Series or DataFrame, depending on the value of `typ`. @@ -334,15 +347,17 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, {"index": "row 2", "col 1": "c", "col 2": "d"}]}' """ - filepath_or_buffer, _, _ = get_filepath_or_buffer(path_or_buf, - encoding=encoding) + compression = _infer_compression(path_or_buf, compression) + filepath_or_buffer, _, compression = get_filepath_or_buffer( + path_or_buf, encoding=encoding, compression=compression, + ) json_reader = JsonReader( filepath_or_buffer, orient=orient, typ=typ, dtype=dtype, convert_axes=convert_axes, convert_dates=convert_dates, keep_default_dates=keep_default_dates, numpy=numpy, precise_float=precise_float, date_unit=date_unit, encoding=encoding, - lines=lines, chunksize=chunksize + lines=lines, chunksize=chunksize, compression=compression, ) if chunksize: @@ -361,7 +376,7 @@ class JsonReader(BaseIterator): """ def __init__(self, filepath_or_buffer, orient, typ, dtype, convert_axes, convert_dates, keep_default_dates, numpy, precise_float, - date_unit, encoding, lines, chunksize): + date_unit, encoding, lines, chunksize, compression): self.path_or_buf = filepath_or_buffer self.orient = orient @@ -374,6 +389,7 @@ def __init__(self, filepath_or_buffer, orient, typ, dtype, convert_axes, self.precise_float = precise_float self.date_unit = date_unit self.encoding = encoding + self.compression = compression self.lines = lines self.chunksize = chunksize self.nrows_seen = 0 @@ -415,20 +431,21 @@ def _get_data_from_filepath(self, filepath_or_buffer): data = filepath_or_buffer + exists = False if isinstance(data, compat.string_types): try: exists = os.path.exists(filepath_or_buffer) - # gh-5874: if the filepath is too long will raise here except (TypeError, ValueError): pass - else: - if exists: - data, _ = _get_handle(filepath_or_buffer, 'r', - encoding=self.encoding) - self.should_close = True - self.open_stream = data + if exists or self.compression is not None: + data, _ = _get_handle(filepath_or_buffer, 'r', + encoding=self.encoding, + compression=self.compression) + # TODO (Simon): Determine if we need to worry about closing file pointers that are passed in from e.g. S3 + self.should_close = True + self.open_stream = data return data diff --git a/pandas/tests/io/json/data/tsframe_v012.json.zip b/pandas/tests/io/json/data/tsframe_v012.json.zip new file mode 100644 index 0000000000000000000000000000000000000000..100ba0c87b2ba55c169081bb0ed60c5db7391bbb GIT binary patch literal 436 zcmWIWW@Zs#-~d8>PgidSBp}Ejz)(`0R+N~V8ee8$Xrz}_oSzpO!Nb60eJyg=i>r~} z7)2P4PTcFqY$(uj|LLnEw<6!?Th+y}ylfKDbYKphQr@pG)b!*{7t{95#=p{PX2~tP zo9VSN!2DO`Wj2tkn(477rQ0RX7Wsm1^R literal 0 HcmV?d00001 diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py new file mode 100644 index 0000000000000..72cd122777d67 --- /dev/null +++ b/pandas/tests/io/json/test_compression.py @@ -0,0 +1,155 @@ +import pytest +import moto + +import pandas as pd +from pandas import compat +import pandas.util.testing as tm +from pandas.util.testing import assert_frame_equal, assert_raises_regex + + +COMPRESSION_TYPES = [None, 'bz2', 'gzip', 'xz'] + + +def test_compress_gzip(): + df = pd.DataFrame([[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + + with tm.ensure_clean() as path: + df.to_json(path, compression='gzip') + assert_frame_equal(df, pd.read_json(path, compression='gzip')) + + # explicitly make sure file is gzipped + import gzip + with gzip.GzipFile(path, 'rb') as f: + text = f.read().decode('utf8') + assert_frame_equal(df, pd.read_json(text)) + + +def test_compress_bz2(): + df = pd.DataFrame([[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + + with tm.ensure_clean() as path: + df.to_json(path, compression='bz2') + assert_frame_equal(df, pd.read_json(path, compression='bz2')) + + # explicitly make sure file is bz2ed + import bz2 + with bz2.BZ2File(path, 'rb') as f: + text = f.read().decode('utf8') + assert_frame_equal(df, pd.read_json(text)) + + +def test_compress_xz(): + tm._skip_if_no_lzma() + + df = pd.DataFrame([[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + + with tm.ensure_clean() as path: + df.to_json(path, compression='xz') + assert_frame_equal(df, pd.read_json(path, compression='xz')) + + # explicitly make sure file is xzipped + lzma = compat.import_lzma() + with lzma.open(path, 'rb') as f: + text = f.read().decode('utf8') + assert_frame_equal(df, pd.read_json(text)) + + +def test_compress_zip_value_error(): + df = pd.DataFrame([[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + + with tm.ensure_clean() as path: + import zipfile + pytest.raises(zipfile.BadZipfile, df.to_json, path, compression="zip") + + +def test_read_zipped_json(): + uncompressed_path = tm.get_data_path("tsframe_v012.json") + uncompressed_df = pd.read_json(uncompressed_path) + + compressed_path = tm.get_data_path("tsframe_v012.json.zip") + compressed_df = pd.read_json(compressed_path, compression='zip') + + assert_frame_equal(uncompressed_df, compressed_df) + + +@pytest.mark.parametrize('compression', COMPRESSION_TYPES) +def test_with_file_url(compression): + if compression == 'xz': + tm._skip_if_no_lzma() + + with tm.ensure_clean() as path: + df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + df.to_json(path, compression=compression) + file_url = 'file://localhost' + path + assert_frame_equal(df, pd.read_json(file_url, compression=compression)) + + +@pytest.mark.parametrize('compression', COMPRESSION_TYPES) +def test_with_s3_url(compression): + boto3 = pytest.importorskip('boto3') + pytest.importorskip('s3fs') + if compression == 'xz': + tm._skip_if_no_lzma() + + df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + with moto.mock_s3(): + conn = boto3.resource("s3", region_name="us-east-1") + bucket = conn.create_bucket(Bucket="pandas-test") + + with tm.ensure_clean() as path: + df.to_json(path, compression=compression) + with open(path, 'rb') as f: + bucket.put_object(Key='test-1', Body=f) + + s3_df = pd.read_json('s3://pandas-test/test-1', compression=compression) + assert_frame_equal(df, s3_df) + + +@pytest.mark.parametrize('compression', COMPRESSION_TYPES) +def test_lines_with_compression(compression): + if compression == 'xz': + tm._skip_if_no_lzma() + + with tm.ensure_clean() as path: + df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + df.to_json(path, orient='records', lines=True, compression=compression) + roundtripped_df = pd.read_json(path, lines=True, + compression=compression) + assert_frame_equal(df, roundtripped_df) + + +@pytest.mark.parametrize('compression', COMPRESSION_TYPES) +def test_chunksize_with_compression(compression): + if compression == 'xz': + tm._skip_if_no_lzma() + + with tm.ensure_clean() as path: + df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}') + df.to_json(path, orient='records', lines=True, compression=compression) + + roundtripped_df = pd.concat(pd.read_json(path, lines=True, chunksize=1, + compression=compression)) + assert_frame_equal(df, roundtripped_df) + + +def test_write_unsupported_compression_type(): + df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + with tm.ensure_clean() as path: + msg = "Unrecognized compression type: unsupported" + assert_raises_regex(ValueError, msg, df.to_json, + path, compression="unsupported") + + +def test_read_unsupported_compression_type(): + with tm.ensure_clean() as path: + msg = "Unrecognized compression type: unsupported" + assert_raises_regex(ValueError, msg, pd.read_json, + path, compression="unsupported") diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index d14355b07cf20..95f23e82fced0 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -128,7 +128,7 @@ def test_readjson_chunks_closes(chunksize): path, orient=None, typ="frame", dtype=True, convert_axes=True, convert_dates=True, keep_default_dates=True, numpy=False, precise_float=False, date_unit=None, encoding=None, - lines=True, chunksize=chunksize) + lines=True, chunksize=chunksize, compression=None) reader.read() assert reader.open_stream.closed, "didn't close stream with \ chunksize = %s" % chunksize From 3ed830cae20ec5fa9fcf53dacfceb789b5a04064 Mon Sep 17 00:00:00 2001 From: Simon Gibbons Date: Thu, 5 Oct 2017 21:10:26 +0100 Subject: [PATCH 2/6] Fix PEP8 violations --- pandas/io/json/json.py | 3 +-- pandas/tests/io/json/test_compression.py | 5 +++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 416059f3474f6..be39f4baba0fb 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -5,7 +5,7 @@ import pandas._libs.json as json from pandas._libs.tslib import iNaT -from pandas.compat import PY3, StringIO, long, u +from pandas.compat import StringIO, long, u from pandas import compat, isna from pandas import Series, DataFrame, to_datetime, MultiIndex from pandas.io.common import (get_filepath_or_buffer, _get_handle, @@ -443,7 +443,6 @@ def _get_data_from_filepath(self, filepath_or_buffer): data, _ = _get_handle(filepath_or_buffer, 'r', encoding=self.encoding, compression=self.compression) - # TODO (Simon): Determine if we need to worry about closing file pointers that are passed in from e.g. S3 self.should_close = True self.open_stream = data diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py index 72cd122777d67..350b8f8be4403 100644 --- a/pandas/tests/io/json/test_compression.py +++ b/pandas/tests/io/json/test_compression.py @@ -109,8 +109,9 @@ def test_with_s3_url(compression): with open(path, 'rb') as f: bucket.put_object(Key='test-1', Body=f) - s3_df = pd.read_json('s3://pandas-test/test-1', compression=compression) - assert_frame_equal(df, s3_df) + roundtripped_df = pd.read_json('s3://pandas-test/test-1', + compression=compression) + assert_frame_equal(df, roundtripped_df) @pytest.mark.parametrize('compression', COMPRESSION_TYPES) From 2a7c3b27c81e05a9fed5a36ef59d11517f7279a2 Mon Sep 17 00:00:00 2001 From: Simon Gibbons Date: Fri, 6 Oct 2017 07:30:38 +0100 Subject: [PATCH 3/6] Add PR number to whatsnew entry --- doc/source/whatsnew/v0.21.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 22c40c36853dd..dce57e851e250 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -195,7 +195,7 @@ Other Enhancements - :func:`read_json` now accepts a ``chunksize`` parameter that can be used when ``lines=True``. If ``chunksize`` is passed, read_json now returns an iterator which reads in ``chunksize`` lines with each iteration. (:issue:`17048`) - :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names - Improved the import time of pandas by about 2.25x (:issue:`16764`) -- :func:`read_json` and :func:`to_json` now accept a ``compression`` argument which allows them to transparently handled compressed files. (:issue:`XXXXXXX`) +- :func:`read_json` and :func:`to_json` now accept a ``compression`` argument which allows them to transparently handled compressed files. (:issue:`17798`) .. _whatsnew_0210.api_breaking: From 8e9fd4aeb3b96aebe9408d277617862207a097b2 Mon Sep 17 00:00:00 2001 From: Simon Gibbons Date: Fri, 6 Oct 2017 07:32:10 +0100 Subject: [PATCH 4/6] Remove problematic Windows test (The S3 test hits the same edge case) --- pandas/tests/io/json/test_compression.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py index 350b8f8be4403..6867bb0a838ca 100644 --- a/pandas/tests/io/json/test_compression.py +++ b/pandas/tests/io/json/test_compression.py @@ -80,18 +80,6 @@ def test_read_zipped_json(): assert_frame_equal(uncompressed_df, compressed_df) -@pytest.mark.parametrize('compression', COMPRESSION_TYPES) -def test_with_file_url(compression): - if compression == 'xz': - tm._skip_if_no_lzma() - - with tm.ensure_clean() as path: - df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') - df.to_json(path, compression=compression) - file_url = 'file://localhost' + path - assert_frame_equal(df, pd.read_json(file_url, compression=compression)) - - @pytest.mark.parametrize('compression', COMPRESSION_TYPES) def test_with_s3_url(compression): boto3 = pytest.importorskip('boto3') From ff98b60ef41035033d4d45937b198f4a10f16d60 Mon Sep 17 00:00:00 2001 From: Simon Gibbons Date: Fri, 6 Oct 2017 08:06:39 +0100 Subject: [PATCH 5/6] Extract decompress file function so that pytest.paramatrize can be used cleanly --- pandas/tests/io/json/test_compression.py | 63 ++++++++++-------------- 1 file changed, 26 insertions(+), 37 deletions(-) diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py index 6867bb0a838ca..e9976da6f6774 100644 --- a/pandas/tests/io/json/test_compression.py +++ b/pandas/tests/io/json/test_compression.py @@ -10,54 +10,43 @@ COMPRESSION_TYPES = [None, 'bz2', 'gzip', 'xz'] -def test_compress_gzip(): - df = pd.DataFrame([[0.123456, 0.234567, 0.567567], - [12.32112, 123123.2, 321321.2]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - - with tm.ensure_clean() as path: - df.to_json(path, compression='gzip') - assert_frame_equal(df, pd.read_json(path, compression='gzip')) - - # explicitly make sure file is gzipped +def decompress_file(path, compression): + if compression is None: + f = open(path, 'rb') + elif compression == 'gzip': import gzip - with gzip.GzipFile(path, 'rb') as f: - text = f.read().decode('utf8') - assert_frame_equal(df, pd.read_json(text)) - - -def test_compress_bz2(): - df = pd.DataFrame([[0.123456, 0.234567, 0.567567], - [12.32112, 123123.2, 321321.2]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - - with tm.ensure_clean() as path: - df.to_json(path, compression='bz2') - assert_frame_equal(df, pd.read_json(path, compression='bz2')) - - # explicitly make sure file is bz2ed + f = gzip.GzipFile(path, 'rb') + elif compression == 'bz2': import bz2 - with bz2.BZ2File(path, 'rb') as f: - text = f.read().decode('utf8') - assert_frame_equal(df, pd.read_json(text)) + f = bz2.BZ2File(path, 'rb') + elif compression == 'xz': + lzma = compat.import_lzma() + f = lzma.open(path, 'rb') + else: + msg = 'Unrecognized compression type: {}'.format(compression) + raise ValueError(msg) + result = f.read().decode('utf8') + f.close() + return result -def test_compress_xz(): - tm._skip_if_no_lzma() + +@pytest.mark.parametrize('compression', COMPRESSION_TYPES) +def test_compression_roundtrip(compression): + if compression == 'xz': + tm._skip_if_no_lzma() df = pd.DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) with tm.ensure_clean() as path: - df.to_json(path, compression='xz') - assert_frame_equal(df, pd.read_json(path, compression='xz')) + df.to_json(path, compression=compression) + assert_frame_equal(df, pd.read_json(path, compression=compression)) - # explicitly make sure file is xzipped - lzma = compat.import_lzma() - with lzma.open(path, 'rb') as f: - text = f.read().decode('utf8') - assert_frame_equal(df, pd.read_json(text)) + # explicitly ensure file was compressed. + uncompressed_content = decompress_file(path, compression) + assert_frame_equal(df, pd.read_json(uncompressed_content)) def test_compress_zip_value_error(): From 402fa110afd1fba177549a690aed89d150a8d2ef Mon Sep 17 00:00:00 2001 From: Simon Gibbons Date: Fri, 6 Oct 2017 09:17:43 +0100 Subject: [PATCH 6/6] Fix typo in whatsnew entry --- doc/source/whatsnew/v0.21.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index dce57e851e250..8d6d7947b6892 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -195,7 +195,7 @@ Other Enhancements - :func:`read_json` now accepts a ``chunksize`` parameter that can be used when ``lines=True``. If ``chunksize`` is passed, read_json now returns an iterator which reads in ``chunksize`` lines with each iteration. (:issue:`17048`) - :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names - Improved the import time of pandas by about 2.25x (:issue:`16764`) -- :func:`read_json` and :func:`to_json` now accept a ``compression`` argument which allows them to transparently handled compressed files. (:issue:`17798`) +- :func:`read_json` and :func:`to_json` now accept a ``compression`` argument which allows them to transparently handle compressed files. (:issue:`17798`) .. _whatsnew_0210.api_breaking: