From 9f2af4248f7212f512da977da2cbacd98d30d931 Mon Sep 17 00:00:00 2001
From: Simon Gibbons <simongibbons@gmail.com>
Date: Thu, 5 Oct 2017 06:52:56 +0100
Subject: [PATCH 1/6] ENH: Add tranparent compression to json reading/writing

This works in the same way as the argument to ``read_csv``
and ``to_csv``.

I've added tests confirming that it works with both file
paths, as well and file URLs and S3 URLs.
---
 doc/source/whatsnew/v0.21.0.txt               |   2 +-
 pandas/core/generic.py                        |  10 +-
 pandas/io/json/json.py                        |  49 ++++--
 .../tests/io/json/data/tsframe_v012.json.zip  | Bin 0 -> 436 bytes
 pandas/tests/io/json/test_compression.py      | 155 ++++++++++++++++++
 pandas/tests/io/json/test_readlines.py        |   2 +-
 6 files changed, 198 insertions(+), 20 deletions(-)
 create mode 100644 pandas/tests/io/json/data/tsframe_v012.json.zip
 create mode 100644 pandas/tests/io/json/test_compression.py

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 7fbf2533428dc..22c40c36853dd 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -195,7 +195,7 @@ Other Enhancements
 - :func:`read_json` now accepts a ``chunksize`` parameter that can be used when ``lines=True``. If ``chunksize`` is passed, read_json now returns an iterator which reads in ``chunksize`` lines with each iteration. (:issue:`17048`)
 - :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names
 - Improved the import time of pandas by about 2.25x  (:issue:`16764`)
-
+- :func:`read_json` and :func:`to_json` now accept a ``compression`` argument which allows them to transparently handled compressed files. (:issue:`XXXXXXX`)
 
 .. _whatsnew_0210.api_breaking:
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 942a9ff279092..c7ae9bbee9013 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -1258,7 +1258,7 @@ def _repr_latex_(self):
 
     def to_json(self, path_or_buf=None, orient=None, date_format=None,
                 double_precision=10, force_ascii=True, date_unit='ms',
-                default_handler=None, lines=False):
+                default_handler=None, lines=False, compression=None):
         """
         Convert the object to a JSON string.
 
@@ -1320,6 +1320,12 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None,
 
             .. versionadded:: 0.19.0
 
+        compression : {None, 'gzip', 'bz2', 'xz'}
+            A string representing the compression to use in the output file,
+            only used when the first argument is a filename
+
+            .. versionadded:: 0.21.0
+
         Returns
         -------
         same type as input object with filtered info axis
@@ -1372,7 +1378,7 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None,
                             double_precision=double_precision,
                             force_ascii=force_ascii, date_unit=date_unit,
                             default_handler=default_handler,
-                            lines=lines)
+                            lines=lines, compression=compression)
 
     def to_hdf(self, path_or_buf, key, **kwargs):
         """Write the contained data to an HDF5 file using HDFStore.
diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py
index ab74b265b6a06..416059f3474f6 100644
--- a/pandas/io/json/json.py
+++ b/pandas/io/json/json.py
@@ -5,11 +5,12 @@
 
 import pandas._libs.json as json
 from pandas._libs.tslib import iNaT
-from pandas.compat import StringIO, long, u
+from pandas.compat import PY3, StringIO, long, u
 from pandas import compat, isna
 from pandas import Series, DataFrame, to_datetime, MultiIndex
 from pandas.io.common import (get_filepath_or_buffer, _get_handle,
-                              _stringify_path, BaseIterator)
+                              _infer_compression, _stringify_path,
+                              BaseIterator)
 from pandas.io.parsers import _validate_integer
 from pandas.core.common import AbstractMethodError
 from pandas.core.reshape.concat import concat
@@ -27,7 +28,7 @@
 # interface to/from
 def to_json(path_or_buf, obj, orient=None, date_format='epoch',
             double_precision=10, force_ascii=True, date_unit='ms',
-            default_handler=None, lines=False):
+            default_handler=None, lines=False, compression=None):
 
     path_or_buf = _stringify_path(path_or_buf)
     if lines and orient != 'records':
@@ -54,8 +55,11 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
         s = _convert_to_line_delimits(s)
 
     if isinstance(path_or_buf, compat.string_types):
-        with open(path_or_buf, 'w') as fh:
+        fh, handles = _get_handle(path_or_buf, 'w', compression=compression)
+        try:
             fh.write(s)
+        finally:
+            fh.close()
     elif path_or_buf is None:
         return s
     else:
@@ -178,7 +182,7 @@ def write(self):
 def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
               convert_axes=True, convert_dates=True, keep_default_dates=True,
               numpy=False, precise_float=False, date_unit=None, encoding=None,
-              lines=False, chunksize=None):
+              lines=False, chunksize=None, compression='infer'):
     """
     Convert a JSON string to pandas object
 
@@ -277,6 +281,15 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
 
         .. versionadded:: 0.21.0
 
+    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
+        For on-the-fly decompression of on-disk data. If 'infer', then use
+        gzip, bz2, zip or xz if path_or_buf is a string ending in
+        '.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression
+        otherwise. If using 'zip', the ZIP file must contain only one data
+        file to be read in. Set to None for no decompression.
+
+        .. versionadded:: 0.21.0
+
     Returns
     -------
     result : Series or DataFrame, depending on the value of `typ`.
@@ -334,15 +347,17 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
                 {"index": "row 2", "col 1": "c", "col 2": "d"}]}'
     """
 
-    filepath_or_buffer, _, _ = get_filepath_or_buffer(path_or_buf,
-                                                      encoding=encoding)
+    compression = _infer_compression(path_or_buf, compression)
+    filepath_or_buffer, _, compression = get_filepath_or_buffer(
+        path_or_buf, encoding=encoding, compression=compression,
+    )
 
     json_reader = JsonReader(
         filepath_or_buffer, orient=orient, typ=typ, dtype=dtype,
         convert_axes=convert_axes, convert_dates=convert_dates,
         keep_default_dates=keep_default_dates, numpy=numpy,
         precise_float=precise_float, date_unit=date_unit, encoding=encoding,
-        lines=lines, chunksize=chunksize
+        lines=lines, chunksize=chunksize, compression=compression,
     )
 
     if chunksize:
@@ -361,7 +376,7 @@ class JsonReader(BaseIterator):
     """
     def __init__(self, filepath_or_buffer, orient, typ, dtype, convert_axes,
                  convert_dates, keep_default_dates, numpy, precise_float,
-                 date_unit, encoding, lines, chunksize):
+                 date_unit, encoding, lines, chunksize, compression):
 
         self.path_or_buf = filepath_or_buffer
         self.orient = orient
@@ -374,6 +389,7 @@ def __init__(self, filepath_or_buffer, orient, typ, dtype, convert_axes,
         self.precise_float = precise_float
         self.date_unit = date_unit
         self.encoding = encoding
+        self.compression = compression
         self.lines = lines
         self.chunksize = chunksize
         self.nrows_seen = 0
@@ -415,20 +431,21 @@ def _get_data_from_filepath(self, filepath_or_buffer):
 
         data = filepath_or_buffer
 
+        exists = False
         if isinstance(data, compat.string_types):
             try:
                 exists = os.path.exists(filepath_or_buffer)
-
             # gh-5874: if the filepath is too long will raise here
             except (TypeError, ValueError):
                 pass
 
-            else:
-                if exists:
-                    data, _ = _get_handle(filepath_or_buffer, 'r',
-                                          encoding=self.encoding)
-                    self.should_close = True
-                    self.open_stream = data
+        if exists or self.compression is not None:
+            data, _ = _get_handle(filepath_or_buffer, 'r',
+                                  encoding=self.encoding,
+                                  compression=self.compression)
+            # TODO (Simon): Determine if we need to worry about closing file pointers that are passed in from e.g. S3
+            self.should_close = True
+            self.open_stream = data
 
         return data
 
diff --git a/pandas/tests/io/json/data/tsframe_v012.json.zip b/pandas/tests/io/json/data/tsframe_v012.json.zip
new file mode 100644
index 0000000000000000000000000000000000000000..100ba0c87b2ba55c169081bb0ed60c5db7391bbb
GIT binary patch
literal 436
zcmWIWW@Zs#-~d8>PgidSBp}Ejz)(`0R+N~V8ee8$Xrz}_oSzpO!Nb60eJyg=i>r~}
z7)2P4PTcFqY$(uj|LLnEw<6!?Th+y}ylfKDbYKphQr@pG)b!*{7t{95#=p{PX2~tP
zo9VSN!2DO`Wj<G3jVZ6wE(W*cebE&SQ)TFW7vlNu?}RloYpym<D|@b97JPDtoI&V5
z%Zy#i4k<i8`^DnWlB8E}Obw4LQAkgGUmV2PWD@avPKe8y%S*o*ZjaINoq0w2`Q?gp
z42A*4XKqfiv~jDJTK;0frZAP+3;a6|F|N+JqcGon0x#3K++x{!tC;9Xne&UfgBGT=
zecZm;eMQ2C^z2Pfy4;oL#@l^Yj)=bFbMgn@3*+zauf1sA9m*WC^~{glcjl4o+uZWy
zKR^7sVgCPHul~NS__JT`Id_0JJ4fj)#|JEo3=Ebm3=9F@j7*};h(JY_6J+3l1+l}D
dMi3KA5OctTIKZ2g4Wx(>2tkn(477rQ0RX7Wsm1^R

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py
new file mode 100644
index 0000000000000..72cd122777d67
--- /dev/null
+++ b/pandas/tests/io/json/test_compression.py
@@ -0,0 +1,155 @@
+import pytest
+import moto
+
+import pandas as pd
+from pandas import compat
+import pandas.util.testing as tm
+from pandas.util.testing import assert_frame_equal, assert_raises_regex
+
+
+COMPRESSION_TYPES = [None, 'bz2', 'gzip', 'xz']
+
+
+def test_compress_gzip():
+    df = pd.DataFrame([[0.123456, 0.234567, 0.567567],
+                       [12.32112, 123123.2, 321321.2]],
+                      index=['A', 'B'], columns=['X', 'Y', 'Z'])
+
+    with tm.ensure_clean() as path:
+        df.to_json(path, compression='gzip')
+        assert_frame_equal(df, pd.read_json(path, compression='gzip'))
+
+        # explicitly make sure file is gzipped
+        import gzip
+        with gzip.GzipFile(path, 'rb') as f:
+            text = f.read().decode('utf8')
+        assert_frame_equal(df, pd.read_json(text))
+
+
+def test_compress_bz2():
+    df = pd.DataFrame([[0.123456, 0.234567, 0.567567],
+                       [12.32112, 123123.2, 321321.2]],
+                      index=['A', 'B'], columns=['X', 'Y', 'Z'])
+
+    with tm.ensure_clean() as path:
+        df.to_json(path, compression='bz2')
+        assert_frame_equal(df, pd.read_json(path, compression='bz2'))
+
+        # explicitly make sure file is bz2ed
+        import bz2
+        with bz2.BZ2File(path, 'rb') as f:
+            text = f.read().decode('utf8')
+        assert_frame_equal(df, pd.read_json(text))
+
+
+def test_compress_xz():
+    tm._skip_if_no_lzma()
+
+    df = pd.DataFrame([[0.123456, 0.234567, 0.567567],
+                       [12.32112, 123123.2, 321321.2]],
+                      index=['A', 'B'], columns=['X', 'Y', 'Z'])
+
+    with tm.ensure_clean() as path:
+        df.to_json(path, compression='xz')
+        assert_frame_equal(df, pd.read_json(path, compression='xz'))
+
+        # explicitly make sure file is xzipped
+        lzma = compat.import_lzma()
+        with lzma.open(path, 'rb') as f:
+            text = f.read().decode('utf8')
+        assert_frame_equal(df, pd.read_json(text))
+
+
+def test_compress_zip_value_error():
+    df = pd.DataFrame([[0.123456, 0.234567, 0.567567],
+                       [12.32112, 123123.2, 321321.2]],
+                      index=['A', 'B'], columns=['X', 'Y', 'Z'])
+
+    with tm.ensure_clean() as path:
+        import zipfile
+        pytest.raises(zipfile.BadZipfile, df.to_json, path, compression="zip")
+
+
+def test_read_zipped_json():
+    uncompressed_path = tm.get_data_path("tsframe_v012.json")
+    uncompressed_df = pd.read_json(uncompressed_path)
+
+    compressed_path = tm.get_data_path("tsframe_v012.json.zip")
+    compressed_df = pd.read_json(compressed_path, compression='zip')
+
+    assert_frame_equal(uncompressed_df, compressed_df)
+
+
+@pytest.mark.parametrize('compression', COMPRESSION_TYPES)
+def test_with_file_url(compression):
+    if compression == 'xz':
+        tm._skip_if_no_lzma()
+
+    with tm.ensure_clean() as path:
+        df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
+        df.to_json(path, compression=compression)
+        file_url = 'file://localhost' + path
+        assert_frame_equal(df, pd.read_json(file_url, compression=compression))
+
+
+@pytest.mark.parametrize('compression', COMPRESSION_TYPES)
+def test_with_s3_url(compression):
+    boto3 = pytest.importorskip('boto3')
+    pytest.importorskip('s3fs')
+    if compression == 'xz':
+        tm._skip_if_no_lzma()
+
+    df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
+    with moto.mock_s3():
+        conn = boto3.resource("s3", region_name="us-east-1")
+        bucket = conn.create_bucket(Bucket="pandas-test")
+
+        with tm.ensure_clean() as path:
+            df.to_json(path, compression=compression)
+            with open(path, 'rb') as f:
+                bucket.put_object(Key='test-1', Body=f)
+
+        s3_df = pd.read_json('s3://pandas-test/test-1', compression=compression)
+        assert_frame_equal(df, s3_df)
+
+
+@pytest.mark.parametrize('compression', COMPRESSION_TYPES)
+def test_lines_with_compression(compression):
+    if compression == 'xz':
+        tm._skip_if_no_lzma()
+
+    with tm.ensure_clean() as path:
+        df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
+        df.to_json(path, orient='records', lines=True, compression=compression)
+        roundtripped_df = pd.read_json(path, lines=True,
+                                       compression=compression)
+        assert_frame_equal(df, roundtripped_df)
+
+
+@pytest.mark.parametrize('compression', COMPRESSION_TYPES)
+def test_chunksize_with_compression(compression):
+    if compression == 'xz':
+        tm._skip_if_no_lzma()
+
+    with tm.ensure_clean() as path:
+        df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}')
+        df.to_json(path, orient='records', lines=True, compression=compression)
+
+        roundtripped_df = pd.concat(pd.read_json(path, lines=True, chunksize=1,
+                                                 compression=compression))
+        assert_frame_equal(df, roundtripped_df)
+
+
+def test_write_unsupported_compression_type():
+    df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
+    with tm.ensure_clean() as path:
+        msg = "Unrecognized compression type: unsupported"
+        assert_raises_regex(ValueError, msg, df.to_json,
+                            path, compression="unsupported")
+
+
+def test_read_unsupported_compression_type():
+    with tm.ensure_clean() as path:
+        msg = "Unrecognized compression type: unsupported"
+        assert_raises_regex(ValueError, msg, pd.read_json,
+                            path, compression="unsupported")
diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py
index d14355b07cf20..95f23e82fced0 100644
--- a/pandas/tests/io/json/test_readlines.py
+++ b/pandas/tests/io/json/test_readlines.py
@@ -128,7 +128,7 @@ def test_readjson_chunks_closes(chunksize):
             path, orient=None, typ="frame", dtype=True, convert_axes=True,
             convert_dates=True, keep_default_dates=True, numpy=False,
             precise_float=False, date_unit=None, encoding=None,
-            lines=True, chunksize=chunksize)
+            lines=True, chunksize=chunksize, compression=None)
         reader.read()
         assert reader.open_stream.closed, "didn't close stream with \
             chunksize = %s" % chunksize

From 3ed830cae20ec5fa9fcf53dacfceb789b5a04064 Mon Sep 17 00:00:00 2001
From: Simon Gibbons <simongibbons@gmail.com>
Date: Thu, 5 Oct 2017 21:10:26 +0100
Subject: [PATCH 2/6] Fix PEP8 violations

---
 pandas/io/json/json.py                   | 3 +--
 pandas/tests/io/json/test_compression.py | 5 +++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py
index 416059f3474f6..be39f4baba0fb 100644
--- a/pandas/io/json/json.py
+++ b/pandas/io/json/json.py
@@ -5,7 +5,7 @@
 
 import pandas._libs.json as json
 from pandas._libs.tslib import iNaT
-from pandas.compat import PY3, StringIO, long, u
+from pandas.compat import StringIO, long, u
 from pandas import compat, isna
 from pandas import Series, DataFrame, to_datetime, MultiIndex
 from pandas.io.common import (get_filepath_or_buffer, _get_handle,
@@ -443,7 +443,6 @@ def _get_data_from_filepath(self, filepath_or_buffer):
             data, _ = _get_handle(filepath_or_buffer, 'r',
                                   encoding=self.encoding,
                                   compression=self.compression)
-            # TODO (Simon): Determine if we need to worry about closing file pointers that are passed in from e.g. S3
             self.should_close = True
             self.open_stream = data
 
diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py
index 72cd122777d67..350b8f8be4403 100644
--- a/pandas/tests/io/json/test_compression.py
+++ b/pandas/tests/io/json/test_compression.py
@@ -109,8 +109,9 @@ def test_with_s3_url(compression):
             with open(path, 'rb') as f:
                 bucket.put_object(Key='test-1', Body=f)
 
-        s3_df = pd.read_json('s3://pandas-test/test-1', compression=compression)
-        assert_frame_equal(df, s3_df)
+        roundtripped_df = pd.read_json('s3://pandas-test/test-1',
+                                       compression=compression)
+        assert_frame_equal(df, roundtripped_df)
 
 
 @pytest.mark.parametrize('compression', COMPRESSION_TYPES)

From 2a7c3b27c81e05a9fed5a36ef59d11517f7279a2 Mon Sep 17 00:00:00 2001
From: Simon Gibbons <simongibbons@gmail.com>
Date: Fri, 6 Oct 2017 07:30:38 +0100
Subject: [PATCH 3/6] Add PR number to whatsnew entry

---
 doc/source/whatsnew/v0.21.0.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 22c40c36853dd..dce57e851e250 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -195,7 +195,7 @@ Other Enhancements
 - :func:`read_json` now accepts a ``chunksize`` parameter that can be used when ``lines=True``. If ``chunksize`` is passed, read_json now returns an iterator which reads in ``chunksize`` lines with each iteration. (:issue:`17048`)
 - :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names
 - Improved the import time of pandas by about 2.25x  (:issue:`16764`)
-- :func:`read_json` and :func:`to_json` now accept a ``compression`` argument which allows them to transparently handled compressed files. (:issue:`XXXXXXX`)
+- :func:`read_json` and :func:`to_json` now accept a ``compression`` argument which allows them to transparently handled compressed files. (:issue:`17798`)
 
 .. _whatsnew_0210.api_breaking:
 

From 8e9fd4aeb3b96aebe9408d277617862207a097b2 Mon Sep 17 00:00:00 2001
From: Simon Gibbons <simongibbons@gmail.com>
Date: Fri, 6 Oct 2017 07:32:10 +0100
Subject: [PATCH 4/6] Remove problematic Windows test (The S3 test hits the
 same edge case)

---
 pandas/tests/io/json/test_compression.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py
index 350b8f8be4403..6867bb0a838ca 100644
--- a/pandas/tests/io/json/test_compression.py
+++ b/pandas/tests/io/json/test_compression.py
@@ -80,18 +80,6 @@ def test_read_zipped_json():
     assert_frame_equal(uncompressed_df, compressed_df)
 
 
-@pytest.mark.parametrize('compression', COMPRESSION_TYPES)
-def test_with_file_url(compression):
-    if compression == 'xz':
-        tm._skip_if_no_lzma()
-
-    with tm.ensure_clean() as path:
-        df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
-        df.to_json(path, compression=compression)
-        file_url = 'file://localhost' + path
-        assert_frame_equal(df, pd.read_json(file_url, compression=compression))
-
-
 @pytest.mark.parametrize('compression', COMPRESSION_TYPES)
 def test_with_s3_url(compression):
     boto3 = pytest.importorskip('boto3')

From ff98b60ef41035033d4d45937b198f4a10f16d60 Mon Sep 17 00:00:00 2001
From: Simon Gibbons <simongibbons@gmail.com>
Date: Fri, 6 Oct 2017 08:06:39 +0100
Subject: [PATCH 5/6] Extract decompress file function so that
 pytest.paramatrize can be used cleanly

---
 pandas/tests/io/json/test_compression.py | 63 ++++++++++--------------
 1 file changed, 26 insertions(+), 37 deletions(-)

diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py
index 6867bb0a838ca..e9976da6f6774 100644
--- a/pandas/tests/io/json/test_compression.py
+++ b/pandas/tests/io/json/test_compression.py
@@ -10,54 +10,43 @@
 COMPRESSION_TYPES = [None, 'bz2', 'gzip', 'xz']
 
 
-def test_compress_gzip():
-    df = pd.DataFrame([[0.123456, 0.234567, 0.567567],
-                       [12.32112, 123123.2, 321321.2]],
-                      index=['A', 'B'], columns=['X', 'Y', 'Z'])
-
-    with tm.ensure_clean() as path:
-        df.to_json(path, compression='gzip')
-        assert_frame_equal(df, pd.read_json(path, compression='gzip'))
-
-        # explicitly make sure file is gzipped
+def decompress_file(path, compression):
+    if compression is None:
+        f = open(path, 'rb')
+    elif compression == 'gzip':
         import gzip
-        with gzip.GzipFile(path, 'rb') as f:
-            text = f.read().decode('utf8')
-        assert_frame_equal(df, pd.read_json(text))
-
-
-def test_compress_bz2():
-    df = pd.DataFrame([[0.123456, 0.234567, 0.567567],
-                       [12.32112, 123123.2, 321321.2]],
-                      index=['A', 'B'], columns=['X', 'Y', 'Z'])
-
-    with tm.ensure_clean() as path:
-        df.to_json(path, compression='bz2')
-        assert_frame_equal(df, pd.read_json(path, compression='bz2'))
-
-        # explicitly make sure file is bz2ed
+        f = gzip.GzipFile(path, 'rb')
+    elif compression == 'bz2':
         import bz2
-        with bz2.BZ2File(path, 'rb') as f:
-            text = f.read().decode('utf8')
-        assert_frame_equal(df, pd.read_json(text))
+        f = bz2.BZ2File(path, 'rb')
+    elif compression == 'xz':
+        lzma = compat.import_lzma()
+        f = lzma.open(path, 'rb')
+    else:
+        msg = 'Unrecognized compression type: {}'.format(compression)
+        raise ValueError(msg)
 
+    result = f.read().decode('utf8')
+    f.close()
+    return result
 
-def test_compress_xz():
-    tm._skip_if_no_lzma()
+
+@pytest.mark.parametrize('compression', COMPRESSION_TYPES)
+def test_compression_roundtrip(compression):
+    if compression == 'xz':
+        tm._skip_if_no_lzma()
 
     df = pd.DataFrame([[0.123456, 0.234567, 0.567567],
                        [12.32112, 123123.2, 321321.2]],
                       index=['A', 'B'], columns=['X', 'Y', 'Z'])
 
     with tm.ensure_clean() as path:
-        df.to_json(path, compression='xz')
-        assert_frame_equal(df, pd.read_json(path, compression='xz'))
+        df.to_json(path, compression=compression)
+        assert_frame_equal(df, pd.read_json(path, compression=compression))
 
-        # explicitly make sure file is xzipped
-        lzma = compat.import_lzma()
-        with lzma.open(path, 'rb') as f:
-            text = f.read().decode('utf8')
-        assert_frame_equal(df, pd.read_json(text))
+        # explicitly ensure file was compressed.
+        uncompressed_content = decompress_file(path, compression)
+        assert_frame_equal(df, pd.read_json(uncompressed_content))
 
 
 def test_compress_zip_value_error():

From 402fa110afd1fba177549a690aed89d150a8d2ef Mon Sep 17 00:00:00 2001
From: Simon Gibbons <simongibbons@gmail.com>
Date: Fri, 6 Oct 2017 09:17:43 +0100
Subject: [PATCH 6/6] Fix typo in whatsnew entry

---
 doc/source/whatsnew/v0.21.0.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index dce57e851e250..8d6d7947b6892 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -195,7 +195,7 @@ Other Enhancements
 - :func:`read_json` now accepts a ``chunksize`` parameter that can be used when ``lines=True``. If ``chunksize`` is passed, read_json now returns an iterator which reads in ``chunksize`` lines with each iteration. (:issue:`17048`)
 - :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names
 - Improved the import time of pandas by about 2.25x  (:issue:`16764`)
-- :func:`read_json` and :func:`to_json` now accept a ``compression`` argument which allows them to transparently handled compressed files. (:issue:`17798`)
+- :func:`read_json` and :func:`to_json` now accept a ``compression`` argument which allows them to transparently handle compressed files. (:issue:`17798`)
 
 .. _whatsnew_0210.api_breaking: