From d0acb0c0fc012711e9b2e85f829aa849dce5e7c3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 21 Apr 2017 16:11:55 +0200 Subject: [PATCH] CLN: move tools.hashing to util.hashing --- asv_bench/benchmarks/algorithms.py | 22 +++++++++++++--------- doc/source/whatsnew/v0.20.0.txt | 6 +++--- pandas/core/indexes/multi.py | 4 ++-- pandas/tests/reshape/test_hashing.py | 2 +- pandas/{tools => util}/hashing.py | 2 +- pandas/{tools => util}/hashing.pyx | 0 setup.py | 4 ++-- 7 files changed, 22 insertions(+), 18 deletions(-) rename pandas/{tools => util}/hashing.py (99%) rename pandas/{tools => util}/hashing.pyx (100%) diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index 0e2182c58d44c..d79051ed2d66c 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -1,12 +1,16 @@ +from importlib import import_module + import numpy as np + import pandas as pd from pandas.util import testing as tm -try: - from pandas.tools.hashing import hash_pandas_object -except ImportError: - pass - +for imp in ['pandas.util.hashing', 'pandas.tools.hashing']: + try: + hashing = import_module(imp) + break + except: + pass class Algorithms(object): goal_time = 0.2 @@ -108,13 +112,13 @@ def setup(self): self.df.iloc[10:20] = np.nan def time_frame(self): - hash_pandas_object(self.df) + hashing.hash_pandas_object(self.df) def time_series_int(self): - hash_pandas_object(self.df.E) + hashing.hash_pandas_object(self.df.E) def time_series_string(self): - hash_pandas_object(self.df.B) + hashing.hash_pandas_object(self.df.B) def time_series_categorical(self): - hash_pandas_object(self.df.C) + hashing.hash_pandas_object(self.df.C) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 02c54f28a1695..86d9bef636e17 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -533,7 +533,7 @@ Other Enhancements - ``pd.merge_asof()`` gained the option ``direction='backward'|'forward'|'nearest'`` (:issue:`14887`) - ``Series/DataFrame.asfreq()`` have gained a ``fill_value`` parameter, to fill missing values (:issue:`3715`). - ``Series/DataFrame.resample.asfreq`` have gained a ``fill_value`` parameter, to fill missing values during resampling (:issue:`3715`). -- ``pandas.tools.hashing`` has gained a ``hash_tuples`` routine, and ``hash_pandas_object`` has gained the ability to hash a ``MultiIndex`` (:issue:`15224`) +- ``pandas.util.hashing`` has gained a ``hash_tuples`` routine, and ``hash_pandas_object`` has gained the ability to hash a ``MultiIndex`` (:issue:`15224`) - ``Series/DataFrame.squeeze()`` have gained the ``axis`` parameter. (:issue:`15339`) - ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`) - ``pd.read_html()`` will parse multiple header rows, creating a multiindex header. (:issue:`13434`). @@ -1423,7 +1423,7 @@ If indicated, a deprecation warning will be issued if you reference theses modul "pandas.types", "pandas.core.dtypes", "" "pandas.io.sas.saslib", "pandas.io.sas.libsas", "" "pandas._join", "pandas._libs.join", "" - "pandas._hash", "pandas.tools.libhash", "" + "pandas._hash", "pandas.util.libhashing", "" "pandas._period", "pandas._libs.period", "" "pandas._sparse", "pandas.core.sparse.libsparse", "" "pandas._testing", "pandas.util.libtesting", "" @@ -1619,7 +1619,7 @@ I/O - Bug in ``pd.read_csv()`` for the Python engine in which unhelpful error messages were being raised when parsing errors occurred (:issue:`15910`) - Bug in ``pd.read_csv()`` in which the ``skipfooter`` parameter was not being properly validated (:issue:`15925`) - Bug in ``pd.to_csv()`` in which there was numeric overflow when a timestamp index was being written (:issue:`15982`) -- Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) +- Bug in ``pd.util.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) - Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) - Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`) - Bug in ``.to_json()`` for the C engine where rollover was not correctly handled for case where frac is odd and diff is exactly 0.5 (:issue:`15716`, :issue:`15864`) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 92baf9d289cd2..d46d2c78fbdb0 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -718,7 +718,7 @@ def _inferred_type_levels(self): @cache_readonly def _hashed_values(self): """ return a uint64 ndarray of my hashed values """ - from pandas.tools.hashing import hash_tuples + from pandas.util.hashing import hash_tuples return hash_tuples(self) def _hashed_indexing_key(self, key): @@ -740,7 +740,7 @@ def _hashed_indexing_key(self, key): we need to stringify if we have mixed levels """ - from pandas.tools.hashing import hash_tuples + from pandas.util.hashing import hash_tuples if not isinstance(key, tuple): return hash_tuples(key) diff --git a/pandas/tests/reshape/test_hashing.py b/pandas/tests/reshape/test_hashing.py index 7ab9558d961aa..21e84fa6be369 100644 --- a/pandas/tests/reshape/test_hashing.py +++ b/pandas/tests/reshape/test_hashing.py @@ -5,7 +5,7 @@ import pandas as pd from pandas import DataFrame, Series, Index, MultiIndex -from pandas.tools.hashing import hash_array, hash_tuples, hash_pandas_object +from pandas.util.hashing import hash_array, hash_tuples, hash_pandas_object import pandas.util.testing as tm diff --git a/pandas/tools/hashing.py b/pandas/util/hashing.py similarity index 99% rename from pandas/tools/hashing.py rename to pandas/util/hashing.py index 275c1c87ea57a..3046c62a03f48 100644 --- a/pandas/tools/hashing.py +++ b/pandas/util/hashing.py @@ -5,7 +5,7 @@ import numpy as np from pandas import Series, factorize, Categorical, Index, MultiIndex -from pandas.tools import libhashing as _hash +from pandas.util import libhashing as _hash from pandas._libs.lib import is_bool_array from pandas.core.dtypes.generic import ( ABCIndexClass, diff --git a/pandas/tools/hashing.pyx b/pandas/util/hashing.pyx similarity index 100% rename from pandas/tools/hashing.pyx rename to pandas/util/hashing.pyx diff --git a/setup.py b/setup.py index 830968768ceb2..5647e18aa227c 100755 --- a/setup.py +++ b/setup.py @@ -528,8 +528,8 @@ def pxd(name): _pxi_dep['sparse'])}, 'util.libtesting': {'pyxfile': 'util/testing', 'depends': ['pandas/util/testing.pyx']}, - 'tools.libhashing': {'pyxfile': 'tools/hashing', - 'depends': ['pandas/tools/hashing.pyx']}, + 'util.libhashing': {'pyxfile': 'util/hashing', + 'depends': ['pandas/util/hashing.pyx']}, 'io.sas.libsas': {'pyxfile': 'io/sas/sas'}, }