DEPR: deprecate msgpack support (#27103)

* DEPR: deprecate msgpack support closes #27084 * warnings in docs * review comments
pandas-dev · Jun 29, 2019 · 989f912 · 989f912
1 parent d050791
commit 989f912
Show file tree

Hide file tree

Showing 7 changed files with 80 additions and 25 deletions.
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -3393,15 +3393,15 @@ both on the writing (serialization), and reading (deserialization).
 
 .. warning::
 
-   This is a very new feature of pandas. We intend to provide certain
-   optimizations in the io of the ``msgpack`` data. Since this is marked
-   as an EXPERIMENTAL LIBRARY, the storage format may not be stable until a future release.
+   The msgpack format is deprecated as of 0.25 and will be removed in a future version.
+   It is recommended to use pyarrow for on-the-wire transmission of pandas objects.
 
 .. warning::
 
    :func:`read_msgpack` is only guaranteed backwards compatible back to pandas version 0.20.3
 
 .. ipython:: python
+   :okwarning:
 
    df = pd.DataFrame(np.random.rand(5, 2), columns=list('AB'))
    df.to_msgpack('foo.msg')
@@ -3411,20 +3411,23 @@ both on the writing (serialization), and reading (deserialization).
 You can pass a list of objects and you will receive them back on deserialization.
 
 .. ipython:: python
+   :okwarning:
 
    pd.to_msgpack('foo.msg', df, 'foo', np.array([1, 2, 3]), s)
    pd.read_msgpack('foo.msg')
 
 You can pass ``iterator=True`` to iterate over the unpacked results:
 
 .. ipython:: python
+   :okwarning:
 
    for o in pd.read_msgpack('foo.msg', iterator=True):
        print(o)
 
 You can pass ``append=True`` to the writer to append to an existing pack:
 
 .. ipython:: python
+   :okwarning:
 
    df.to_msgpack('foo.msg', append=True)
    pd.read_msgpack('foo.msg')
@@ -3435,6 +3438,7 @@ can pack arbitrary collections of Python lists, dicts, scalars, while intermixin
 pandas objects.
 
 .. ipython:: python
+   :okwarning:
 
    pd.to_msgpack('foo2.msg', {'dict': [{'df': df}, {'string': 'foo'},
                                        {'scalar': 1.}, {'s': s}]})
@@ -3453,14 +3457,16 @@ Read/write API
 Msgpacks can also be read from and written to strings.
 
 .. ipython:: python
+   :okwarning:
 
    df.to_msgpack()
 
 Furthermore you can concatenate the strings to produce a list of the original objects.
 
 .. ipython:: python
+   :okwarning:
 
-  pd.read_msgpack(df.to_msgpack() + s.to_msgpack())
+   pd.read_msgpack(df.to_msgpack() + s.to_msgpack())
 
 .. _io.hdf5:
 

diff --git a/doc/source/whatsnew/v0.13.0.rst b/doc/source/whatsnew/v0.13.0.rst
@@ -829,6 +829,7 @@ Experimental
      Since this is an EXPERIMENTAL LIBRARY, the storage format may not be stable until a future release.
 
   .. ipython:: python
+     :okwarning:
 
      df = pd.DataFrame(np.random.rand(5, 2), columns=list('AB'))
      df.to_msgpack('foo.msg')
@@ -841,6 +842,7 @@ Experimental
   You can pass ``iterator=True`` to iterator over the unpacked results
 
   .. ipython:: python
+     :okwarning:
 
      for o in pd.read_msgpack('foo.msg', iterator=True):
          print(o)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -590,6 +590,12 @@ by a ``Series`` or ``DataFrame`` with sparse values.
 
 The memory usage of the two approaches is identical. See :ref:`sparse.migration` for more (:issue:`19239`).
 
+msgpack format
+^^^^^^^^^^^^^^
+
+The msgpack format is deprecated as of 0.25 and will be removed in a future version. It is recommended to use pyarrow for on-the-wire transmission of pandas objects. (:issue:`27084`)
+
+
 Other deprecations
 ^^^^^^^^^^^^^^^^^^
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -2418,8 +2418,11 @@ def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs):
         """
         Serialize object to input file path using msgpack format.
 
-        THIS IS AN EXPERIMENTAL LIBRARY and the storage format
-        may not be stable until a future release.
+        .. deprecated:: 0.25.0
+
+        to_msgpack is deprecated and will be removed in a future version.
+        It is recommended to use pyarrow for on-the-wire transmission of
+        pandas objects.
 
         Parameters
         ----------

diff --git a/pandas/io/packers.py b/pandas/io/packers.py
@@ -78,8 +78,11 @@ def to_msgpack(path_or_buf, *args, **kwargs):
     """
     msgpack (serialize) object to input file path
 
-    THIS IS AN EXPERIMENTAL LIBRARY and the storage format
-    may not be stable until a future release.
+    .. deprecated:: 0.25.0
+
+    to_msgpack is deprecated and will be removed in a future version.
+    It is recommended to use pyarrow for on-the-wire transmission of
+    pandas objects.
 
     Parameters
     ----------
@@ -92,6 +95,12 @@ def to_msgpack(path_or_buf, *args, **kwargs):
     compress : type of compressor (zlib or blosc), default to None (no
                compression)
     """
+    warnings.warn("to_msgpack is deprecated and will be removed in a "
+                  "future version.\n"
+                  "It is recommended to use pyarrow for on-the-wire "
+                  "transmission of pandas objects.",
+                  FutureWarning, stacklevel=3)
+
     global compressor
     compressor = kwargs.pop('compress', None)
     append = kwargs.pop('append', None)
@@ -121,8 +130,11 @@ def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs):
     Load msgpack pandas object from the specified
     file path
 
-    THIS IS AN EXPERIMENTAL LIBRARY and the storage format
-    may not be stable until a future release.
+    .. deprecated:: 0.25.0
+
+    read_msgpack is deprecated and will be removed in a future version.
+    It is recommended to use pyarrow for on-the-wire transmission of
+    pandas objects.
 
     Parameters
     ----------
@@ -140,6 +152,12 @@ def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs):
     read_msgpack is only guaranteed to be backwards compatible to pandas
     0.20.3.
     """
+    warnings.warn("The read_msgpack is deprecated and will be removed in a "
+                  "future version.\n"
+                  "It is recommended to use pyarrow for on-the-wire "
+                  "transmission of pandas objects.",
+                  FutureWarning, stacklevel=3)
+
     path_or_buf, _, _, should_close = get_filepath_or_buffer(path_or_buf)
     if iterator:
         return Iterator(path_or_buf)

diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
@@ -45,6 +45,7 @@ def __fspath__(self):
 
 # https://github.com/cython/cython/issues/1720
 @pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning")
+@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
 class TestCommonIOCapabilities:
     data1 = """index,A,B,C,D
 foo,2,3,4,5

diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py
@@ -2,7 +2,7 @@
 import glob
 from io import BytesIO
 import os
-from warnings import catch_warnings
+from warnings import catch_warnings, filterwarnings
 
 import numpy as np
 import pytest
@@ -83,6 +83,7 @@ def check_arbitrary(a, b):
         assert(a == b)
 
 
+@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
 class TestPackers:
 
     def setup_method(self, method):
@@ -97,6 +98,7 @@ def encode_decode(self, x, compress=None, **kwargs):
             return read_msgpack(p, **kwargs)
 
 
+@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
 class TestAPI(TestPackers):
 
     def test_string_io(self):
@@ -159,6 +161,7 @@ def __init__(self):
             read_msgpack(path_or_buf=A())
 
 
+@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
 class TestNumpy(TestPackers):
 
     def test_numpy_scalar_float(self):
@@ -277,6 +280,7 @@ def test_list_mixed(self):
         tm.assert_almost_equal(tuple(x), x_rec)
 
 
+@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
 class TestBasic(TestPackers):
 
     def test_timestamp(self):
@@ -322,6 +326,7 @@ def test_intervals(self):
             assert i == i_rec
 
 
+@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
 class TestIndex(TestPackers):
 
     def setup_method(self, method):
@@ -387,6 +392,7 @@ def categorical_index(self):
         tm.assert_frame_equal(result, df)
 
 
+@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
 class TestSeries(TestPackers):
 
     def setup_method(self, method):
@@ -437,6 +443,7 @@ def test_basic(self):
                 assert_series_equal(i, i_rec)
 
 
+@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
 class TestCategorical(TestPackers):
 
     def setup_method(self, method):
@@ -460,6 +467,7 @@ def test_basic(self):
                 assert_categorical_equal(i, i_rec)
 
 
+@pytest.mark.filterwarnings("ignore:msgpack:FutureWarning")
 class TestNDFrame(TestPackers):
 
     def setup_method(self, method):
@@ -549,6 +557,7 @@ def test_dataframe_duplicate_column_names(self):
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
 @pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
 @pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
 class TestSparse(TestPackers):
 
     def _check_roundtrip(self, obj, comparator, **kwargs):
@@ -595,6 +604,7 @@ def test_sparse_frame(self):
                               check_frame_type=True)
 
 
+@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
 class TestCompression(TestPackers):
     """See https://github.com/pandas-dev/pandas/pull/9783
     """
@@ -676,18 +686,21 @@ def decompress(ob):
         with monkeypatch.context() as m, \
                 tm.assert_produces_warning(PerformanceWarning) as ws:
             m.setattr(compress_module, 'decompress', decompress)
-            i_rec = self.encode_decode(self.frame, compress=compress)
-            for k in self.frame.keys():
-
-                value = i_rec[k]
-                expected = self.frame[k]
-                assert_frame_equal(value, expected)
-                # make sure that we can write to the new frames even though
-                # we needed to copy the data
-                for block in value._data.blocks:
-                    assert block.values.flags.writeable
-                    # mutate the data in some way
-                    block.values[0] += rhs[block.dtype]
+
+            with catch_warnings():
+                filterwarnings('ignore', category=FutureWarning)
+                i_rec = self.encode_decode(self.frame, compress=compress)
+                for k in self.frame.keys():
+
+                    value = i_rec[k]
+                    expected = self.frame[k]
+                    assert_frame_equal(value, expected)
+                    # make sure that we can write to the new frames even though
+                    # we needed to copy the data
+                    for block in value._data.blocks:
+                        assert block.values.flags.writeable
+                        # mutate the data in some way
+                        block.values[0] += rhs[block.dtype]
 
         for w in ws:
             # check the messages from our warnings
@@ -715,14 +728,18 @@ def test_compression_warns_when_decompress_caches_blosc(self, monkeypatch):
     def _test_small_strings_no_warn(self, compress):
         empty = np.array([], dtype='uint8')
         with tm.assert_produces_warning(None):
-            empty_unpacked = self.encode_decode(empty, compress=compress)
+            with catch_warnings():
+                filterwarnings('ignore', category=FutureWarning)
+                empty_unpacked = self.encode_decode(empty, compress=compress)
 
         tm.assert_numpy_array_equal(empty_unpacked, empty)
         assert empty_unpacked.flags.writeable
 
         char = np.array([ord(b'a')], dtype='uint8')
         with tm.assert_produces_warning(None):
-            char_unpacked = self.encode_decode(char, compress=compress)
+            with catch_warnings():
+                filterwarnings('ignore', category=FutureWarning)
+                char_unpacked = self.encode_decode(char, compress=compress)
 
         tm.assert_numpy_array_equal(char_unpacked, char)
         assert char_unpacked.flags.writeable
@@ -794,6 +811,7 @@ def test_readonly_axis_zlib_to_sql(self):
         assert_frame_equal(expected, result)
 
 
+@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
 class TestEncoding(TestPackers):
 
     def setup_method(self, method):
@@ -839,6 +857,7 @@ def legacy_packer(request, datapath):
 
 
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning")
 class TestMsgpack:
     """
     How to add msgpack tests: