[CLN] De-privatize commonly-used functions (#21870)

pandas-dev · Jul 17, 2018 · 537b65c · 537b65c
1 parent a860028
commit 537b65c
Show file tree

Hide file tree

Showing 52 changed files with 385 additions and 378 deletions.
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -485,7 +485,7 @@ cpdef ndarray[object] astype_str(ndarray arr):
 
 def clean_index_list(list obj):
     """
-    Utility used in pandas.core.index._ensure_index
+    Utility used in pandas.core.index.ensure_index
     """
     cdef:
         Py_ssize_t i, n = len(obj)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -27,9 +27,9 @@
     is_datetime64_any_dtype, is_datetime64tz_dtype,
     is_timedelta64_dtype, is_datetimelike,
     is_interval_dtype, is_scalar, is_list_like,
-    _ensure_platform_int, _ensure_object,
-    _ensure_float64, _ensure_uint64,
-    _ensure_int64)
+    ensure_platform_int, ensure_object,
+    ensure_float64, ensure_uint64,
+    ensure_int64)
 from pandas.compat.numpy import _np_version_under1p10
 from pandas.core.dtypes.missing import isna, na_value_for_dtype
 
@@ -73,32 +73,32 @@ def _ensure_data(values, dtype=None):
     # we check some simple dtypes first
     try:
         if is_object_dtype(dtype):
-            return _ensure_object(np.asarray(values)), 'object', 'object'
+            return ensure_object(np.asarray(values)), 'object', 'object'
         if is_bool_dtype(values) or is_bool_dtype(dtype):
             # we are actually coercing to uint64
             # until our algos support uint8 directly (see TODO)
             return np.asarray(values).astype('uint64'), 'bool', 'uint64'
         elif is_signed_integer_dtype(values) or is_signed_integer_dtype(dtype):
-            return _ensure_int64(values), 'int64', 'int64'
+            return ensure_int64(values), 'int64', 'int64'
         elif (is_unsigned_integer_dtype(values) or
               is_unsigned_integer_dtype(dtype)):
-            return _ensure_uint64(values), 'uint64', 'uint64'
+            return ensure_uint64(values), 'uint64', 'uint64'
         elif is_float_dtype(values) or is_float_dtype(dtype):
-            return _ensure_float64(values), 'float64', 'float64'
+            return ensure_float64(values), 'float64', 'float64'
         elif is_object_dtype(values) and dtype is None:
-            return _ensure_object(np.asarray(values)), 'object', 'object'
+            return ensure_object(np.asarray(values)), 'object', 'object'
         elif is_complex_dtype(values) or is_complex_dtype(dtype):
 
             # ignore the fact that we are casting to float
             # which discards complex parts
             with catch_warnings(record=True):
-                values = _ensure_float64(values)
+                values = ensure_float64(values)
             return values, 'float64', 'float64'
 
     except (TypeError, ValueError):
         # if we are trying to coerce to a dtype
         # and it is incompat this will fall thru to here
-        return _ensure_object(values), 'object', 'object'
+        return ensure_object(values), 'object', 'object'
 
     # datetimelike
     if (needs_i8_conversion(values) or
@@ -129,13 +129,13 @@ def _ensure_data(values, dtype=None):
 
         # we are actually coercing to int64
         # until our algos support int* directly (not all do)
-        values = _ensure_int64(values)
+        values = ensure_int64(values)
 
         return values, dtype, 'int64'
 
     # we have failed, return object
     values = np.asarray(values)
-    return _ensure_object(values), 'object', 'object'
+    return ensure_object(values), 'object', 'object'
 
 
 def _reconstruct_data(values, dtype, original):
@@ -475,7 +475,7 @@ def _factorize_array(values, na_sentinel=-1, size_hint=None,
     labels = table.get_labels(values, uniques, 0, na_sentinel,
                               na_value=na_value)
 
-    labels = _ensure_platform_int(labels)
+    labels = ensure_platform_int(labels)
     uniques = uniques.to_array()
     return labels, uniques
 
@@ -1309,7 +1309,7 @@ def _take_nd_object(arr, indexer, out, axis, fill_value, mask_info):
     if arr.dtype != out.dtype:
         arr = arr.astype(out.dtype)
     if arr.shape[axis] > 0:
-        arr.take(_ensure_platform_int(indexer), axis=axis, out=out)
+        arr.take(ensure_platform_int(indexer), axis=axis, out=out)
     if needs_masking:
         outindexer = [slice(None)] * arr.ndim
         outindexer[axis] = mask
@@ -1450,7 +1450,7 @@ def _get_take_nd_function(ndim, arr_dtype, out_dtype, axis=0, mask_info=None):
             return func
 
     def func(arr, indexer, out, fill_value=np.nan):
-        indexer = _ensure_int64(indexer)
+        indexer = ensure_int64(indexer)
         _take_nd_object(arr, indexer, out, axis=axis, fill_value=fill_value,
                         mask_info=mask_info)
 
@@ -1609,7 +1609,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
         indexer = np.arange(arr.shape[axis], dtype=np.int64)
         dtype, fill_value = arr.dtype, arr.dtype.type()
     else:
-        indexer = _ensure_int64(indexer, copy=False)
+        indexer = ensure_int64(indexer, copy=False)
         if not allow_fill:
             dtype, fill_value = arr.dtype, arr.dtype.type()
             mask_info = None, False
@@ -1687,11 +1687,11 @@ def take_2d_multi(arr, indexer, out=None, fill_value=np.nan, mask_info=None,
         if row_idx is None:
             row_idx = np.arange(arr.shape[0], dtype=np.int64)
         else:
-            row_idx = _ensure_int64(row_idx)
+            row_idx = ensure_int64(row_idx)
         if col_idx is None:
             col_idx = np.arange(arr.shape[1], dtype=np.int64)
         else:
-            col_idx = _ensure_int64(col_idx)
+            col_idx = ensure_int64(col_idx)
         indexer = row_idx, col_idx
         if not allow_fill:
             dtype, fill_value = arr.dtype, arr.dtype.type()

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -17,9 +17,9 @@
     coerce_indexer_dtype)
 from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.common import (
-    _ensure_int64,
-    _ensure_object,
-    _ensure_platform_int,
+    ensure_int64,
+    ensure_object,
+    ensure_platform_int,
     is_extension_array_dtype,
     is_dtype_equal,
     is_datetimelike,
@@ -1221,7 +1221,7 @@ def shift(self, periods):
         if codes.ndim > 1:
             raise NotImplementedError("Categorical with ndim > 1.")
         if np.prod(codes.shape) and (periods != 0):
-            codes = np.roll(codes, _ensure_platform_int(periods), axis=0)
+            codes = np.roll(codes, ensure_platform_int(periods), axis=0)
             if periods > 0:
                 codes[:periods] = -1
             else:
@@ -2137,7 +2137,7 @@ def mode(self, dropna=True):
         if dropna:
             good = self._codes != -1
             values = self._codes[good]
-        values = sorted(htable.mode_int64(_ensure_int64(values), dropna))
+        values = sorted(htable.mode_int64(ensure_int64(values), dropna))
         result = self._constructor(values=values, categories=self.categories,
                                    ordered=self.ordered, fastpath=True)
         return result
@@ -2431,8 +2431,8 @@ def _get_codes_for_values(values, categories):
 
     from pandas.core.algorithms import _get_data_algo, _hashtables
     if not is_dtype_equal(values.dtype, categories.dtype):
-        values = _ensure_object(values)
-        categories = _ensure_object(categories)
+        values = ensure_object(values)
+        categories = ensure_object(categories)
 
     (hash_klass, vec_klass), vals = _get_data_algo(values, _hashtables)
     (_, _), cats = _get_data_algo(categories, _hashtables)

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -21,7 +21,7 @@
     is_datetime64tz_dtype,
     is_datetime64_dtype,
     is_timedelta64_dtype,
-    _ensure_int64)
+    ensure_int64)
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
 from pandas.core.dtypes.missing import isna
 from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
@@ -167,7 +167,7 @@ def _simple_new(cls, values, freq=None, tz=None, **kwargs):
             values = np.array(values, copy=False)
 
         if not is_datetime64_dtype(values):
-            values = _ensure_int64(values).view(_NS_DTYPE)
+            values = ensure_int64(values).view(_NS_DTYPE)
 
         result = object.__new__(cls)
         result._data = values

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -19,7 +19,7 @@
                                         ABCSeries, ABCIntervalIndex,
                                         ABCInterval)
 from pandas.core.dtypes.missing import isna, notna
-from pandas.core.indexes.base import Index, _ensure_index
+from pandas.core.indexes.base import Index, ensure_index
 from pandas.util._decorators import Appender
 from pandas.util._doctools import _WritableDoc
 
@@ -145,8 +145,8 @@ def _simple_new(cls, left, right, closed=None,
         result = IntervalMixin.__new__(cls)
 
         closed = closed or 'right'
-        left = _ensure_index(left, copy=copy)
-        right = _ensure_index(right, copy=copy)
+        left = ensure_index(left, copy=copy)
+        right = ensure_index(right, copy=copy)
 
         if dtype is not None:
             # GH 19262: dtype must be an IntervalDtype to override inferred

diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -11,7 +11,7 @@
 from pandas import compat
 
 from pandas.core.dtypes.common import (
-    _TD_DTYPE, _ensure_int64, is_timedelta64_dtype, is_list_like)
+    _TD_DTYPE, ensure_int64, is_timedelta64_dtype, is_list_like)
 from pandas.core.dtypes.generic import ABCSeries
 from pandas.core.dtypes.missing import isna
 
@@ -117,7 +117,7 @@ def _simple_new(cls, values, freq=None, **kwargs):
                 # non-nano unit
                 values = values.astype(_TD_DTYPE)
             else:
-                values = _ensure_int64(values).view(_TD_DTYPE)
+                values = ensure_int64(values).view(_TD_DTYPE)
 
         result = object.__new__(cls)
         result._data = values

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -13,7 +13,7 @@
 from pandas import compat
 from pandas.compat import long, zip, iteritems, PY36, OrderedDict
 from pandas.core.config import get_option
-from pandas.core.dtypes.generic import ABCSeries, ABCIndex
+from pandas.core.dtypes.generic import ABCSeries, ABCIndex, ABCIndexClass
 from pandas.core.dtypes.common import is_integer
 from pandas.core.dtypes.inference import _iterable_not_string
 from pandas.core.dtypes.missing import isna, isnull, notnull  # noqa
@@ -120,11 +120,6 @@ def is_bool_indexer(key):
     return False
 
 
-def _default_index(n):
-    from pandas.core.index import RangeIndex
-    return RangeIndex(0, n, name=None)
-
-
 def _mut_exclusive(**kwargs):
     item1, item2 = kwargs.items()
     label1, val1 = item1
@@ -299,11 +294,10 @@ def intersection(*seqs):
 
 
 def _asarray_tuplesafe(values, dtype=None):
-    from pandas.core.index import Index
 
     if not (isinstance(values, (list, tuple)) or hasattr(values, '__array__')):
         values = list(values)
-    elif isinstance(values, Index):
+    elif isinstance(values, ABCIndexClass):
         return values.values
 
     if isinstance(values, list) and dtype in [np.object_, object]:

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -8,7 +8,7 @@
 from pandas._libs import tslib, lib, tslibs
 from pandas._libs.tslibs import iNaT
 from pandas.compat import string_types, text_type, PY3
-from .common import (_ensure_object, is_bool, is_integer, is_float,
+from .common import (ensure_object, is_bool, is_integer, is_float,
                      is_complex, is_datetimetz, is_categorical_dtype,
                      is_datetimelike,
                      is_extension_type,
@@ -25,8 +25,8 @@
                      is_bool_dtype, is_scalar,
                      is_string_dtype, _string_dtypes,
                      pandas_dtype,
-                     _ensure_int8, _ensure_int16,
-                     _ensure_int32, _ensure_int64,
+                     ensure_int8, ensure_int16,
+                     ensure_int32, ensure_int64,
                      _NS_DTYPE, _TD_DTYPE, _INT64_DTYPE,
                      _POSSIBLY_CAST_DTYPES)
 from .dtypes import (ExtensionDtype, PandasExtensionDtype, DatetimeTZDtype,
@@ -85,7 +85,7 @@ def trans(x):
 
     if isinstance(dtype, string_types):
         if dtype == 'infer':
-            inferred_type = lib.infer_dtype(_ensure_object(result.ravel()))
+            inferred_type = lib.infer_dtype(ensure_object(result.ravel()))
             if inferred_type == 'boolean':
                 dtype = 'bool'
             elif inferred_type == 'integer':
@@ -602,12 +602,12 @@ def coerce_indexer_dtype(indexer, categories):
     """ coerce the indexer input array to the smallest dtype possible """
     length = len(categories)
     if length < _int8_max:
-        return _ensure_int8(indexer)
+        return ensure_int8(indexer)
     elif length < _int16_max:
-        return _ensure_int16(indexer)
+        return ensure_int16(indexer)
     elif length < _int32_max:
-        return _ensure_int32(indexer)
-    return _ensure_int64(indexer)
+        return ensure_int32(indexer)
+    return ensure_int64(indexer)
 
 
 def coerce_to_dtypes(result, dtypes):
@@ -948,7 +948,7 @@ def try_timedelta(v):
         except Exception:
             return v.reshape(shape)
 
-    inferred_type = lib.infer_datetimelike_array(_ensure_object(v))
+    inferred_type = lib.infer_datetimelike_array(ensure_object(v))
 
     if inferred_type == 'date' and convert_dates:
         value = try_datetime(v)

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -32,14 +32,14 @@
 # oh the troubles to reduce import time
 _is_scipy_sparse = None
 
-_ensure_float64 = algos.ensure_float64
-_ensure_float32 = algos.ensure_float32
+ensure_float64 = algos.ensure_float64
+ensure_float32 = algos.ensure_float32
 
 _ensure_datetime64ns = conversion.ensure_datetime64ns
 _ensure_timedelta64ns = conversion.ensure_timedelta64ns
 
 
-def _ensure_float(arr):
+def ensure_float(arr):
     """
     Ensure that an array object has a float dtype if possible.
 
@@ -59,16 +59,16 @@ def _ensure_float(arr):
     return arr
 
 
-_ensure_uint64 = algos.ensure_uint64
-_ensure_int64 = algos.ensure_int64
-_ensure_int32 = algos.ensure_int32
-_ensure_int16 = algos.ensure_int16
-_ensure_int8 = algos.ensure_int8
-_ensure_platform_int = algos.ensure_platform_int
-_ensure_object = algos.ensure_object
+ensure_uint64 = algos.ensure_uint64
+ensure_int64 = algos.ensure_int64
+ensure_int32 = algos.ensure_int32
+ensure_int16 = algos.ensure_int16
+ensure_int8 = algos.ensure_int8
+ensure_platform_int = algos.ensure_platform_int
+ensure_object = algos.ensure_object
 
 
-def _ensure_categorical(arr):
+def ensure_categorical(arr):
     """
     Ensure that an array-like object is a Categorical (if not already).
 

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
@@ -16,7 +16,7 @@
                      is_string_like_dtype, is_bool_dtype,
                      is_integer_dtype, is_dtype_equal,
                      is_extension_array_dtype,
-                     needs_i8_conversion, _ensure_object,
+                     needs_i8_conversion, ensure_object,
                      pandas_dtype,
                      is_scalar,
                      is_object_dtype,
@@ -413,7 +413,7 @@ def array_equivalent(left, right, strict_nan=False):
         if not strict_nan:
             # isna considers NaN and None to be equivalent.
             return lib.array_equivalent_object(
-                _ensure_object(left.ravel()), _ensure_object(right.ravel()))
+                ensure_object(left.ravel()), ensure_object(right.ravel()))
 
         for left_value, right_value in zip(left, right):
             if left_value is NaT and right_value is not NaT:
@@ -470,7 +470,7 @@ def _infer_fill_value(val):
     if is_datetimelike(val):
         return np.array('NaT', dtype=val.dtype)
     elif is_object_dtype(val.dtype):
-        dtype = lib.infer_dtype(_ensure_object(val))
+        dtype = lib.infer_dtype(ensure_object(val))
         if dtype in ['datetime', 'datetime64']:
             return np.array('NaT', dtype=_NS_DTYPE)
         elif dtype in ['timedelta', 'timedelta64']: