diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 172117f7d8059..4cc119a700ca0 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -485,7 +485,7 @@ cpdef ndarray[object] astype_str(ndarray arr): def clean_index_list(list obj): """ - Utility used in pandas.core.index._ensure_index + Utility used in pandas.core.index.ensure_index """ cdef: Py_ssize_t i, n = len(obj) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 6e49e8044ff25..78c9113ce60de 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -27,9 +27,9 @@ is_datetime64_any_dtype, is_datetime64tz_dtype, is_timedelta64_dtype, is_datetimelike, is_interval_dtype, is_scalar, is_list_like, - _ensure_platform_int, _ensure_object, - _ensure_float64, _ensure_uint64, - _ensure_int64) + ensure_platform_int, ensure_object, + ensure_float64, ensure_uint64, + ensure_int64) from pandas.compat.numpy import _np_version_under1p10 from pandas.core.dtypes.missing import isna, na_value_for_dtype @@ -73,32 +73,32 @@ def _ensure_data(values, dtype=None): # we check some simple dtypes first try: if is_object_dtype(dtype): - return _ensure_object(np.asarray(values)), 'object', 'object' + return ensure_object(np.asarray(values)), 'object', 'object' if is_bool_dtype(values) or is_bool_dtype(dtype): # we are actually coercing to uint64 # until our algos support uint8 directly (see TODO) return np.asarray(values).astype('uint64'), 'bool', 'uint64' elif is_signed_integer_dtype(values) or is_signed_integer_dtype(dtype): - return _ensure_int64(values), 'int64', 'int64' + return ensure_int64(values), 'int64', 'int64' elif (is_unsigned_integer_dtype(values) or is_unsigned_integer_dtype(dtype)): - return _ensure_uint64(values), 'uint64', 'uint64' + return ensure_uint64(values), 'uint64', 'uint64' elif is_float_dtype(values) or is_float_dtype(dtype): - return _ensure_float64(values), 'float64', 'float64' + return ensure_float64(values), 'float64', 'float64' elif is_object_dtype(values) and dtype is None: - return _ensure_object(np.asarray(values)), 'object', 'object' + return ensure_object(np.asarray(values)), 'object', 'object' elif is_complex_dtype(values) or is_complex_dtype(dtype): # ignore the fact that we are casting to float # which discards complex parts with catch_warnings(record=True): - values = _ensure_float64(values) + values = ensure_float64(values) return values, 'float64', 'float64' except (TypeError, ValueError): # if we are trying to coerce to a dtype # and it is incompat this will fall thru to here - return _ensure_object(values), 'object', 'object' + return ensure_object(values), 'object', 'object' # datetimelike if (needs_i8_conversion(values) or @@ -129,13 +129,13 @@ def _ensure_data(values, dtype=None): # we are actually coercing to int64 # until our algos support int* directly (not all do) - values = _ensure_int64(values) + values = ensure_int64(values) return values, dtype, 'int64' # we have failed, return object values = np.asarray(values) - return _ensure_object(values), 'object', 'object' + return ensure_object(values), 'object', 'object' def _reconstruct_data(values, dtype, original): @@ -475,7 +475,7 @@ def _factorize_array(values, na_sentinel=-1, size_hint=None, labels = table.get_labels(values, uniques, 0, na_sentinel, na_value=na_value) - labels = _ensure_platform_int(labels) + labels = ensure_platform_int(labels) uniques = uniques.to_array() return labels, uniques @@ -1309,7 +1309,7 @@ def _take_nd_object(arr, indexer, out, axis, fill_value, mask_info): if arr.dtype != out.dtype: arr = arr.astype(out.dtype) if arr.shape[axis] > 0: - arr.take(_ensure_platform_int(indexer), axis=axis, out=out) + arr.take(ensure_platform_int(indexer), axis=axis, out=out) if needs_masking: outindexer = [slice(None)] * arr.ndim outindexer[axis] = mask @@ -1450,7 +1450,7 @@ def _get_take_nd_function(ndim, arr_dtype, out_dtype, axis=0, mask_info=None): return func def func(arr, indexer, out, fill_value=np.nan): - indexer = _ensure_int64(indexer) + indexer = ensure_int64(indexer) _take_nd_object(arr, indexer, out, axis=axis, fill_value=fill_value, mask_info=mask_info) @@ -1609,7 +1609,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, indexer = np.arange(arr.shape[axis], dtype=np.int64) dtype, fill_value = arr.dtype, arr.dtype.type() else: - indexer = _ensure_int64(indexer, copy=False) + indexer = ensure_int64(indexer, copy=False) if not allow_fill: dtype, fill_value = arr.dtype, arr.dtype.type() mask_info = None, False @@ -1687,11 +1687,11 @@ def take_2d_multi(arr, indexer, out=None, fill_value=np.nan, mask_info=None, if row_idx is None: row_idx = np.arange(arr.shape[0], dtype=np.int64) else: - row_idx = _ensure_int64(row_idx) + row_idx = ensure_int64(row_idx) if col_idx is None: col_idx = np.arange(arr.shape[1], dtype=np.int64) else: - col_idx = _ensure_int64(col_idx) + col_idx = ensure_int64(col_idx) indexer = row_idx, col_idx if not allow_fill: dtype, fill_value = arr.dtype, arr.dtype.type() diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 7a6253dffe235..973a8af76bb07 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -17,9 +17,9 @@ coerce_indexer_dtype) from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.dtypes.common import ( - _ensure_int64, - _ensure_object, - _ensure_platform_int, + ensure_int64, + ensure_object, + ensure_platform_int, is_extension_array_dtype, is_dtype_equal, is_datetimelike, @@ -1221,7 +1221,7 @@ def shift(self, periods): if codes.ndim > 1: raise NotImplementedError("Categorical with ndim > 1.") if np.prod(codes.shape) and (periods != 0): - codes = np.roll(codes, _ensure_platform_int(periods), axis=0) + codes = np.roll(codes, ensure_platform_int(periods), axis=0) if periods > 0: codes[:periods] = -1 else: @@ -2137,7 +2137,7 @@ def mode(self, dropna=True): if dropna: good = self._codes != -1 values = self._codes[good] - values = sorted(htable.mode_int64(_ensure_int64(values), dropna)) + values = sorted(htable.mode_int64(ensure_int64(values), dropna)) result = self._constructor(values=values, categories=self.categories, ordered=self.ordered, fastpath=True) return result @@ -2431,8 +2431,8 @@ def _get_codes_for_values(values, categories): from pandas.core.algorithms import _get_data_algo, _hashtables if not is_dtype_equal(values.dtype, categories.dtype): - values = _ensure_object(values) - categories = _ensure_object(categories) + values = ensure_object(values) + categories = ensure_object(categories) (hash_klass, vec_klass), vals = _get_data_algo(values, _hashtables) (_, _), cats = _get_data_algo(categories, _hashtables) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 5835090e25de1..c5e85cb5892f4 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -21,7 +21,7 @@ is_datetime64tz_dtype, is_datetime64_dtype, is_timedelta64_dtype, - _ensure_int64) + ensure_int64) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import isna from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries @@ -167,7 +167,7 @@ def _simple_new(cls, values, freq=None, tz=None, **kwargs): values = np.array(values, copy=False) if not is_datetime64_dtype(values): - values = _ensure_int64(values).view(_NS_DTYPE) + values = ensure_int64(values).view(_NS_DTYPE) result = object.__new__(cls) result._data = values diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 4ad53e16bc439..c915b272aee8b 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -19,7 +19,7 @@ ABCSeries, ABCIntervalIndex, ABCInterval) from pandas.core.dtypes.missing import isna, notna -from pandas.core.indexes.base import Index, _ensure_index +from pandas.core.indexes.base import Index, ensure_index from pandas.util._decorators import Appender from pandas.util._doctools import _WritableDoc @@ -145,8 +145,8 @@ def _simple_new(cls, left, right, closed=None, result = IntervalMixin.__new__(cls) closed = closed or 'right' - left = _ensure_index(left, copy=copy) - right = _ensure_index(right, copy=copy) + left = ensure_index(left, copy=copy) + right = ensure_index(right, copy=copy) if dtype is not None: # GH 19262: dtype must be an IntervalDtype to override inferred diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index f027b84506164..a28f7fc9c32fa 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -11,7 +11,7 @@ from pandas import compat from pandas.core.dtypes.common import ( - _TD_DTYPE, _ensure_int64, is_timedelta64_dtype, is_list_like) + _TD_DTYPE, ensure_int64, is_timedelta64_dtype, is_list_like) from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import isna @@ -117,7 +117,7 @@ def _simple_new(cls, values, freq=None, **kwargs): # non-nano unit values = values.astype(_TD_DTYPE) else: - values = _ensure_int64(values).view(_TD_DTYPE) + values = ensure_int64(values).view(_TD_DTYPE) result = object.__new__(cls) result._data = values diff --git a/pandas/core/common.py b/pandas/core/common.py index 0a33873630d27..0ca776b6bfa77 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -13,7 +13,7 @@ from pandas import compat from pandas.compat import long, zip, iteritems, PY36, OrderedDict from pandas.core.config import get_option -from pandas.core.dtypes.generic import ABCSeries, ABCIndex +from pandas.core.dtypes.generic import ABCSeries, ABCIndex, ABCIndexClass from pandas.core.dtypes.common import is_integer from pandas.core.dtypes.inference import _iterable_not_string from pandas.core.dtypes.missing import isna, isnull, notnull # noqa @@ -120,11 +120,6 @@ def is_bool_indexer(key): return False -def _default_index(n): - from pandas.core.index import RangeIndex - return RangeIndex(0, n, name=None) - - def _mut_exclusive(**kwargs): item1, item2 = kwargs.items() label1, val1 = item1 @@ -299,11 +294,10 @@ def intersection(*seqs): def _asarray_tuplesafe(values, dtype=None): - from pandas.core.index import Index if not (isinstance(values, (list, tuple)) or hasattr(values, '__array__')): values = list(values) - elif isinstance(values, Index): + elif isinstance(values, ABCIndexClass): return values.values if isinstance(values, list) and dtype in [np.object_, object]: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 0bc6ad8499934..8675d3be06287 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -8,7 +8,7 @@ from pandas._libs import tslib, lib, tslibs from pandas._libs.tslibs import iNaT from pandas.compat import string_types, text_type, PY3 -from .common import (_ensure_object, is_bool, is_integer, is_float, +from .common import (ensure_object, is_bool, is_integer, is_float, is_complex, is_datetimetz, is_categorical_dtype, is_datetimelike, is_extension_type, @@ -25,8 +25,8 @@ is_bool_dtype, is_scalar, is_string_dtype, _string_dtypes, pandas_dtype, - _ensure_int8, _ensure_int16, - _ensure_int32, _ensure_int64, + ensure_int8, ensure_int16, + ensure_int32, ensure_int64, _NS_DTYPE, _TD_DTYPE, _INT64_DTYPE, _POSSIBLY_CAST_DTYPES) from .dtypes import (ExtensionDtype, PandasExtensionDtype, DatetimeTZDtype, @@ -85,7 +85,7 @@ def trans(x): if isinstance(dtype, string_types): if dtype == 'infer': - inferred_type = lib.infer_dtype(_ensure_object(result.ravel())) + inferred_type = lib.infer_dtype(ensure_object(result.ravel())) if inferred_type == 'boolean': dtype = 'bool' elif inferred_type == 'integer': @@ -602,12 +602,12 @@ def coerce_indexer_dtype(indexer, categories): """ coerce the indexer input array to the smallest dtype possible """ length = len(categories) if length < _int8_max: - return _ensure_int8(indexer) + return ensure_int8(indexer) elif length < _int16_max: - return _ensure_int16(indexer) + return ensure_int16(indexer) elif length < _int32_max: - return _ensure_int32(indexer) - return _ensure_int64(indexer) + return ensure_int32(indexer) + return ensure_int64(indexer) def coerce_to_dtypes(result, dtypes): @@ -948,7 +948,7 @@ def try_timedelta(v): except Exception: return v.reshape(shape) - inferred_type = lib.infer_datetimelike_array(_ensure_object(v)) + inferred_type = lib.infer_datetimelike_array(ensure_object(v)) if inferred_type == 'date' and convert_dates: value = try_datetime(v) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index ef4f36dc6df33..5a2f91d775fb2 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -32,14 +32,14 @@ # oh the troubles to reduce import time _is_scipy_sparse = None -_ensure_float64 = algos.ensure_float64 -_ensure_float32 = algos.ensure_float32 +ensure_float64 = algos.ensure_float64 +ensure_float32 = algos.ensure_float32 _ensure_datetime64ns = conversion.ensure_datetime64ns _ensure_timedelta64ns = conversion.ensure_timedelta64ns -def _ensure_float(arr): +def ensure_float(arr): """ Ensure that an array object has a float dtype if possible. @@ -59,16 +59,16 @@ def _ensure_float(arr): return arr -_ensure_uint64 = algos.ensure_uint64 -_ensure_int64 = algos.ensure_int64 -_ensure_int32 = algos.ensure_int32 -_ensure_int16 = algos.ensure_int16 -_ensure_int8 = algos.ensure_int8 -_ensure_platform_int = algos.ensure_platform_int -_ensure_object = algos.ensure_object +ensure_uint64 = algos.ensure_uint64 +ensure_int64 = algos.ensure_int64 +ensure_int32 = algos.ensure_int32 +ensure_int16 = algos.ensure_int16 +ensure_int8 = algos.ensure_int8 +ensure_platform_int = algos.ensure_platform_int +ensure_object = algos.ensure_object -def _ensure_categorical(arr): +def ensure_categorical(arr): """ Ensure that an array-like object is a Categorical (if not already). diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 7ef4a7674753e..66998aa6866f6 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -16,7 +16,7 @@ is_string_like_dtype, is_bool_dtype, is_integer_dtype, is_dtype_equal, is_extension_array_dtype, - needs_i8_conversion, _ensure_object, + needs_i8_conversion, ensure_object, pandas_dtype, is_scalar, is_object_dtype, @@ -413,7 +413,7 @@ def array_equivalent(left, right, strict_nan=False): if not strict_nan: # isna considers NaN and None to be equivalent. return lib.array_equivalent_object( - _ensure_object(left.ravel()), _ensure_object(right.ravel())) + ensure_object(left.ravel()), ensure_object(right.ravel())) for left_value, right_value in zip(left, right): if left_value is NaT and right_value is not NaT: @@ -470,7 +470,7 @@ def _infer_fill_value(val): if is_datetimelike(val): return np.array('NaT', dtype=val.dtype) elif is_object_dtype(val.dtype): - dtype = lib.infer_dtype(_ensure_object(val)) + dtype = lib.infer_dtype(ensure_object(val)) if dtype in ['datetime', 'datetime64']: return np.array('NaT', dtype=_NS_DTYPE) elif dtype in ['timedelta', 'timedelta64']: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6380944338010..4578d2ac08199 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -51,9 +51,9 @@ is_dtype_equal, needs_i8_conversion, _get_dtype_from_object, - _ensure_float64, - _ensure_int64, - _ensure_platform_int, + ensure_float64, + ensure_int64, + ensure_platform_int, is_list_like, is_nested_list_like, is_iterator, @@ -64,8 +64,8 @@ from pandas.core.generic import NDFrame, _shared_docs -from pandas.core.index import (Index, MultiIndex, _ensure_index, - _ensure_index_from_sequences) +from pandas.core.index import (Index, MultiIndex, ensure_index, + ensure_index_from_sequences) from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable, check_bool_indexer) from pandas.core.internals import (BlockManager, @@ -88,6 +88,7 @@ from pandas.core.indexes.period import PeriodIndex from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex +import pandas.core.indexes.base as ibase import pandas.core.common as com import pandas.core.nanops as nanops @@ -397,16 +398,16 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, if is_named_tuple(data[0]) and columns is None: columns = data[0]._fields arrays, columns = _to_arrays(data, columns, dtype=dtype) - columns = _ensure_index(columns) + columns = ensure_index(columns) # set the index if index is None: if isinstance(data[0], Series): index = _get_names_from_index(data) elif isinstance(data[0], Categorical): - index = com._default_index(len(data[0])) + index = ibase.default_index(len(data[0])) else: - index = com._default_index(len(data)) + index = ibase.default_index(len(data)) mgr = _arrays_to_mgr(arrays, columns, index, columns, dtype=dtype) @@ -450,7 +451,7 @@ def _init_dict(self, data, index, columns, dtype=None): # raise ValueError if only scalars in dict index = extract_index(arrays[~missing]) else: - index = _ensure_index(index) + index = ensure_index(index) # no obvious "empty" int column if missing.any() and not is_integer_dtype(dtype): @@ -491,14 +492,14 @@ def _get_axes(N, K, index=index, columns=columns): # return axes or defaults if index is None: - index = com._default_index(N) + index = ibase.default_index(N) else: - index = _ensure_index(index) + index = ensure_index(index) if columns is None: - columns = com._default_index(K) + columns = ibase.default_index(K) else: - columns = _ensure_index(columns) + columns = ensure_index(columns) return index, columns # we could have a categorical type passed or coerced to 'category' @@ -1236,7 +1237,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None, # Make a copy of the input columns so we can modify it if columns is not None: - columns = _ensure_index(columns) + columns = ensure_index(columns) if is_iterator(data): if nrows == 0: @@ -1265,7 +1266,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None, if isinstance(data, dict): if columns is None: - columns = arr_columns = _ensure_index(sorted(data)) + columns = arr_columns = ensure_index(sorted(data)) arrays = [data[k] for k in columns] else: arrays = [] @@ -1281,15 +1282,15 @@ def from_records(cls, data, index=None, exclude=None, columns=None, elif isinstance(data, (np.ndarray, DataFrame)): arrays, columns = _to_arrays(data, columns) if columns is not None: - columns = _ensure_index(columns) + columns = ensure_index(columns) arr_columns = columns else: arrays, arr_columns = _to_arrays(data, columns, coerce_float=coerce_float) - arr_columns = _ensure_index(arr_columns) + arr_columns = ensure_index(arr_columns) if columns is not None: - columns = _ensure_index(columns) + columns = ensure_index(columns) else: columns = arr_columns @@ -1312,8 +1313,8 @@ def from_records(cls, data, index=None, exclude=None, columns=None, try: to_remove = [arr_columns.get_loc(field) for field in index] index_data = [arrays[i] for i in to_remove] - result_index = _ensure_index_from_sequences(index_data, - names=index) + result_index = ensure_index_from_sequences(index_data, + names=index) exclude.update(index) except Exception: @@ -1480,18 +1481,18 @@ def from_items(cls, items, columns=None, orient='columns'): if orient == 'columns': if columns is not None: - columns = _ensure_index(columns) + columns = ensure_index(columns) idict = dict(items) if len(idict) < len(items): - if not columns.equals(_ensure_index(keys)): + if not columns.equals(ensure_index(keys)): raise ValueError('With non-unique item names, passed ' 'columns must be identical') arrays = values else: arrays = [idict[k] for k in columns if k in idict] else: - columns = _ensure_index(keys) + columns = ensure_index(keys) arrays = values # GH 17312 @@ -1508,7 +1509,7 @@ def from_items(cls, items, columns=None, orient='columns'): if columns is None: raise TypeError("Must pass columns with orient='index'") - keys = _ensure_index(keys) + keys = ensure_index(keys) # GH 17312 # Provide more informative error msg when scalar values passed @@ -4006,7 +4007,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False, to_remove.append(col) arrays.append(level) - index = _ensure_index_from_sequences(arrays, names) + index = ensure_index_from_sequences(arrays, names) if verify_integrity and not index.is_unique: duplicates = index[index.duplicated()].unique() @@ -4188,7 +4189,7 @@ def _maybe_casted_values(index, labels=None): values, mask, np.nan) return values - new_index = com._default_index(len(new_obj)) + new_index = ibase.default_index(len(new_obj)) if level is not None: if not isinstance(level, (tuple, list)): level = [level] @@ -4509,7 +4510,7 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False, keys.append(k) indexer = lexsort_indexer(keys, orders=ascending, na_position=na_position) - indexer = _ensure_platform_int(indexer) + indexer = ensure_platform_int(indexer) else: from pandas.core.sorting import nargsort @@ -6749,14 +6750,14 @@ def corr(self, method='pearson', min_periods=1): mat = numeric_df.values if method == 'pearson': - correl = libalgos.nancorr(_ensure_float64(mat), minp=min_periods) + correl = libalgos.nancorr(ensure_float64(mat), minp=min_periods) elif method == 'spearman': - correl = libalgos.nancorr_spearman(_ensure_float64(mat), + correl = libalgos.nancorr_spearman(ensure_float64(mat), minp=min_periods) else: if min_periods is None: min_periods = 1 - mat = _ensure_float64(mat).T + mat = ensure_float64(mat).T corrf = nanops.get_corr_func(method) K = len(cols) correl = np.empty((K, K), dtype=float) @@ -6886,7 +6887,7 @@ def cov(self, min_periods=None): baseCov = np.cov(mat.T) baseCov = baseCov.reshape((len(cols), len(cols))) else: - baseCov = libalgos.nancorr(_ensure_float64(mat), cov=True, + baseCov = libalgos.nancorr(ensure_float64(mat), cov=True, minp=min_periods) return self._constructor(baseCov, index=idx, columns=cols) @@ -7076,7 +7077,7 @@ def _count_level(self, level, axis=0, numeric_only=False): level = count_axis._get_level_number(level) level_index = count_axis.levels[level] - labels = _ensure_int64(count_axis.labels[level]) + labels = ensure_int64(count_axis.labels[level]) counts = lib.count_level_2d(mask, labels, len(level_index), axis=0) result = DataFrame(counts, index=level_index, columns=agg_axis) @@ -7608,7 +7609,7 @@ def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None): arrays = _homogenize(arrays, index, dtype) # from BlockManager perspective - axes = [_ensure_index(columns), _ensure_index(index)] + axes = [ensure_index(columns), ensure_index(index)] return create_block_manager_from_arrays(arrays, arr_names, axes) @@ -7660,9 +7661,9 @@ def extract_index(data): (lengths[0], len(index))) raise ValueError(msg) else: - index = com._default_index(lengths[0]) + index = ibase.default_index(lengths[0]) - return _ensure_index(index) + return ensure_index(index) def _prep_ndarray(values, copy=True): @@ -7734,7 +7735,7 @@ def _to_arrays(data, columns, coerce_float=False, dtype=None): dtype=dtype) elif isinstance(data[0], Categorical): if columns is None: - columns = com._default_index(len(data)) + columns = ibase.default_index(len(data)) return data, columns elif (isinstance(data, (np.ndarray, Series, Index)) and data.dtype.names is not None): @@ -7758,11 +7759,11 @@ def _masked_rec_array_to_mgr(data, index, columns, dtype, copy): if index is None: index = _get_names_from_index(fdata) if index is None: - index = com._default_index(len(data)) - index = _ensure_index(index) + index = ibase.default_index(len(data)) + index = ensure_index(index) if columns is not None: - columns = _ensure_index(columns) + columns = ensure_index(columns) arrays, arr_columns = _to_arrays(fdata, columns) # fill if needed @@ -7790,8 +7791,8 @@ def _reorder_arrays(arrays, arr_columns, columns): # reorder according to the columns if (columns is not None and len(columns) and arr_columns is not None and len(arr_columns)): - indexer = _ensure_index(arr_columns).get_indexer(columns) - arr_columns = _ensure_index([arr_columns[i] for i in indexer]) + indexer = ensure_index(arr_columns).get_indexer(columns) + arr_columns = ensure_index([arr_columns[i] for i in indexer]) arrays = [arrays[i] for i in indexer] return arrays, arr_columns @@ -7818,7 +7819,7 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): for s in data: index = getattr(s, 'index', None) if index is None: - index = com._default_index(len(s)) + index = ibase.default_index(len(s)) if id(index) in indexer_cache: indexer = indexer_cache[id(index)] @@ -7855,7 +7856,7 @@ def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): def _convert_object_array(content, columns, coerce_float=False, dtype=None): if columns is None: - columns = com._default_index(len(content)) + columns = ibase.default_index(len(content)) else: if len(columns) != len(content): # pragma: no cover # caller's responsibility to check for this... @@ -7878,7 +7879,7 @@ def convert(arr): def _get_names_from_index(data): has_some_name = any(getattr(s, 'name', None) is not None for s in data) if not has_some_name: - return com._default_index(len(data)) + return ibase.default_index(len(data)) index = lrange(len(data)) count = 0 diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8da678e0adec0..7305da4f56506 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -12,8 +12,8 @@ from pandas._libs import tslib, properties from pandas.core.dtypes.common import ( - _ensure_int64, - _ensure_object, + ensure_int64, + ensure_object, is_scalar, is_number, is_integer, is_bool, @@ -35,7 +35,7 @@ from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame from pandas.core.base import PandasObject, SelectionMixin -from pandas.core.index import (Index, MultiIndex, _ensure_index, +from pandas.core.index import (Index, MultiIndex, ensure_index, InvalidIndexError, RangeIndex) import pandas.core.indexing as indexing from pandas.core.indexes.datetimes import DatetimeIndex @@ -3235,7 +3235,7 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'): # Case for non-unique axis else: - labels = _ensure_object(com._index_labels_to_array(labels)) + labels = ensure_object(com._index_labels_to_array(labels)) if level is not None: if not isinstance(axis, MultiIndex): raise AssertionError('axis must be a MultiIndex') @@ -3889,9 +3889,9 @@ def _reindex_with_indexers(self, reindexers, fill_value=None, copy=False, if index is None: continue - index = _ensure_index(index) + index = ensure_index(index) if indexer is not None: - indexer = _ensure_int64(indexer) + indexer = ensure_int64(indexer) # TODO: speed up on homogeneous DataFrame objects new_data = new_data.reindex_indexer(index, indexer, axis=baxis, diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 78631bfae9e01..169416d6f8211 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -18,6 +18,7 @@ from pandas.util._decorators import Substitution, Appender from pandas import compat +import pandas.core.indexes.base as ibase import pandas.core.common as com from pandas.core.panel import Panel from pandas.compat import lzip, map @@ -35,8 +36,8 @@ is_numeric_dtype, is_integer_dtype, is_interval_dtype, - _ensure_platform_int, - _ensure_int64) + ensure_platform_int, + ensure_int64) from pandas.core.dtypes.missing import isna, notna import pandas.core.algorithms as algorithms from pandas.core.frame import DataFrame @@ -1165,7 +1166,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, verify_integrity=False) if is_integer_dtype(out): - out = _ensure_int64(out) + out = ensure_int64(out) return Series(out, index=mi, name=self._selection_name) # for compat. with libgroupby.value_counts need to ensure every @@ -1196,7 +1197,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, verify_integrity=False) if is_integer_dtype(out): - out = _ensure_int64(out) + out = ensure_int64(out) return Series(out, index=mi, name=self._selection_name) def count(self): @@ -1205,7 +1206,7 @@ def count(self): val = self.obj.get_values() mask = (ids != -1) & ~isna(val) - ids = _ensure_platform_int(ids) + ids = ensure_platform_int(ids) out = np.bincount(ids[mask], minlength=ngroups or 0) return Series(out, @@ -1567,7 +1568,7 @@ def groupby_series(obj, col=None): results = concat(results, axis=1) if not self.as_index: - results.index = com._default_index(len(results)) + results.index = ibase.default_index(len(results)) return results boxplot = boxplot_frame_groupby diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index ff2ed6970ee76..cb045b08f3629 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -28,7 +28,7 @@ class providing the base-class of operations. from pandas.core.dtypes.common import ( is_numeric_dtype, is_scalar, - _ensure_float) + ensure_float) from pandas.core.dtypes.cast import maybe_downcast_to_dtype from pandas.core.dtypes.missing import isna, notna @@ -842,7 +842,7 @@ def _python_agg_general(self, func, *args, **kwargs): # since we are masking, make sure that we have a float object values = result if is_numeric_dtype(values.dtype): - values = _ensure_float(values) + values = ensure_float(values) output[name] = self._try_cast(values[mask], result) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index d5c4c2946a632..a1511b726c705 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -16,7 +16,7 @@ from pandas.core.index import ( Index, MultiIndex, CategoricalIndex) from pandas.core.dtypes.common import ( - _ensure_categorical, + ensure_categorical, is_hashable, is_list_like, is_timedelta64_dtype, @@ -360,7 +360,7 @@ def indices(self): if isinstance(self.grouper, BaseGrouper): return self.grouper.indices - values = _ensure_categorical(self.grouper) + values = ensure_categorical(self.grouper) return values._reverse_indexer() @property diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 65b9144c0ddc9..f2c55a56b119d 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -18,12 +18,12 @@ from pandas.core.base import SelectionMixin from pandas.core.dtypes.missing import isna, _maybe_fill from pandas.core.index import ( - Index, MultiIndex, _ensure_index) + Index, MultiIndex, ensure_index) from pandas.core.dtypes.common import ( - _ensure_float64, - _ensure_platform_int, - _ensure_int64, - _ensure_object, + ensure_float64, + ensure_platform_int, + ensure_int64, + ensure_object, needs_i8_conversion, is_integer_dtype, is_complex_dtype, @@ -231,7 +231,7 @@ def size(self): """ ids, _, ngroup = self.group_info - ids = _ensure_platform_int(ids) + ids = ensure_platform_int(ids) if ngroup: out = np.bincount(ids[ids != -1], minlength=ngroup) else: @@ -260,7 +260,7 @@ def group_info(self): comp_ids, obs_group_ids = self._get_compressed_labels() ngroups = len(obs_group_ids) - comp_ids = _ensure_int64(comp_ids) + comp_ids = ensure_int64(comp_ids) return comp_ids, obs_group_ids, ngroups @cache_readonly @@ -312,7 +312,7 @@ def get_group_levels(self): name_list = [] for ping, labels in zip(self.groupings, self.recons_labels): - labels = _ensure_platform_int(labels) + labels = ensure_platform_int(labels) levels = ping.result_index.take(labels) name_list.append(levels) @@ -464,16 +464,16 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, values = values.view('int64') is_numeric = True elif is_bool_dtype(values.dtype): - values = _ensure_float64(values) + values = ensure_float64(values) elif is_integer_dtype(values): # we use iNaT for the missing value on ints # so pre-convert to guard this condition if (values == iNaT).any(): - values = _ensure_float64(values) + values = ensure_float64(values) else: values = values.astype('int64', copy=False) elif is_numeric and not is_complex_dtype(values): - values = _ensure_float64(values) + values = ensure_float64(values) else: values = values.astype(object) @@ -482,7 +482,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, kind, how, values, is_numeric) except NotImplementedError: if is_numeric: - values = _ensure_float64(values) + values = ensure_float64(values) func = self._get_cython_function( kind, how, values, is_numeric) else: @@ -528,7 +528,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, result, (counts > 0).view(np.uint8)) except ValueError: result = lib.row_bool_subset_object( - _ensure_object(result), + ensure_object(result), (counts > 0).view(np.uint8)) else: result = result[counts > 0] @@ -671,8 +671,8 @@ class BinGrouper(BaseGrouper): def __init__(self, bins, binlabels, filter_empty=False, mutated=False, indexer=None): - self.bins = _ensure_int64(bins) - self.binlabels = _ensure_index(binlabels) + self.bins = ensure_int64(bins) + self.binlabels = ensure_index(binlabels) self._filter_empty_groups = filter_empty self.mutated = mutated self.indexer = indexer @@ -737,7 +737,7 @@ def group_info(self): obs_group_ids = np.arange(ngroups) rep = np.diff(np.r_[0, self.bins]) - rep = _ensure_platform_int(rep) + rep = ensure_platform_int(rep) if ngroups == len(self.bins): comp_ids = np.repeat(np.arange(ngroups), rep) else: @@ -808,7 +808,7 @@ class DataSplitter(object): def __init__(self, data, labels, ngroups, axis=0): self.data = data - self.labels = _ensure_int64(labels) + self.labels = ensure_int64(labels) self.ngroups = ngroups self.axis = axis diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 2286033e97d85..b409d695a73e8 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -3,8 +3,8 @@ from pandas.core.indexes.base import (Index, _new_Index, - _ensure_index, - _ensure_index_from_sequences, + ensure_index, + ensure_index_from_sequences, InvalidIndexError) # noqa from pandas.core.indexes.category import CategoricalIndex # noqa from pandas.core.indexes.multi import MultiIndex # noqa @@ -36,7 +36,7 @@ 'InvalidIndexError', 'TimedeltaIndex', 'PeriodIndex', 'DatetimeIndex', '_new_Index', 'NaT', - '_ensure_index', '_ensure_index_from_sequences', + 'ensure_index', 'ensure_index_from_sequences', '_get_combined_index', '_get_objs_combined_axis', '_union_indexes', '_get_consensus_names', @@ -66,7 +66,7 @@ def _get_combined_index(indexes, intersect=False, sort=False): index = index.intersection(other) else: index = _union_indexes(indexes, sort=sort) - index = _ensure_index(index) + index = ensure_index(index) if sort: try: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b9639fc804a36..83b70baf4065b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -23,10 +23,10 @@ from pandas.core.dtypes.missing import isna, array_equivalent from pandas.core.dtypes.cast import maybe_cast_to_integer_array from pandas.core.dtypes.common import ( - _ensure_int64, - _ensure_object, - _ensure_categorical, - _ensure_platform_int, + ensure_int64, + ensure_object, + ensure_categorical, + ensure_platform_int, is_integer, is_float, is_dtype_equal, @@ -1867,7 +1867,7 @@ def is_type_compatible(self, kind): def is_all_dates(self): if self._data is None: return False - return is_datetime_array(_ensure_object(self.values)) + return is_datetime_array(ensure_object(self.values)) def __reduce__(self): d = dict(data=self._data) @@ -2071,7 +2071,7 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): if kwargs: nv.validate_take(tuple(), kwargs) - indices = _ensure_platform_int(indices) + indices = ensure_platform_int(indices) if self._can_hold_na: taken = self._assert_take_fillable(self.values, indices, allow_fill=allow_fill, @@ -2087,7 +2087,7 @@ def take(self, indices, axis=0, allow_fill=True, def _assert_take_fillable(self, values, indices, allow_fill=True, fill_value=None, na_value=np.nan): """ Internal method to handle NA filling of take """ - indices = _ensure_platform_int(indices) + indices = ensure_platform_int(indices) # only fill if we are passing a non-None fill_value if allow_fill and fill_value is not None: @@ -2679,7 +2679,7 @@ def union(self, other): """ self._assert_can_do_setop(other) - other = _ensure_index(other) + other = ensure_index(other) if len(other) == 0 or self.equals(other): return self._get_consensus_name(other) @@ -2779,7 +2779,7 @@ def intersection(self, other): """ self._assert_can_do_setop(other) - other = _ensure_index(other) + other = ensure_index(other) if self.equals(other): return self._get_consensus_name(other) @@ -3234,7 +3234,7 @@ def droplevel(self, level=0): @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs) def get_indexer(self, target, method=None, limit=None, tolerance=None): method = missing.clean_reindex_fill_method(method) - target = _ensure_index(target) + target = ensure_index(target) if tolerance is not None: tolerance = self._convert_tolerance(tolerance, target) @@ -3242,7 +3242,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): # this fix False and True would be treated as 0 and 1 respectively. # (GH #16877) if target.is_boolean() and self.is_numeric(): - return _ensure_platform_int(np.repeat(-1, target.size)) + return ensure_platform_int(np.repeat(-1, target.size)) pself, ptarget = self._maybe_promote(target) if pself is not self or ptarget is not target: @@ -3273,7 +3273,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): indexer = self._engine.get_indexer(target._ndarray_values) - return _ensure_platform_int(indexer) + return ensure_platform_int(indexer) def _convert_tolerance(self, tolerance, target): # override this method on subclasses @@ -3375,7 +3375,7 @@ def _filter_indexer_tolerance(self, target, indexer, tolerance): @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs) def get_indexer_non_unique(self, target): - target = _ensure_index(target) + target = ensure_index(target) if is_categorical(target): target = target.astype(target.dtype.categories.dtype) pself, ptarget = self._maybe_promote(target) @@ -3389,7 +3389,7 @@ def get_indexer_non_unique(self, target): tgt_values = target._ndarray_values indexer, missing = self._engine.get_indexer_non_unique(tgt_values) - return _ensure_platform_int(indexer), missing + return ensure_platform_int(indexer), missing def get_indexer_for(self, target, **kwargs): """ @@ -3431,7 +3431,7 @@ def groupby(self, values): from .multi import MultiIndex if isinstance(values, MultiIndex): values = values.values - values = _ensure_categorical(values) + values = ensure_categorical(values) result = values._reverse_indexer() # map to the label @@ -3619,7 +3619,7 @@ def reindex(self, target, method=None, level=None, limit=None, attrs.pop('freq', None) # don't preserve freq target = self._simple_new(None, dtype=self.dtype, **attrs) else: - target = _ensure_index(target) + target = ensure_index(target) if level is not None: if method is not None: @@ -3667,7 +3667,7 @@ def _reindex_non_unique(self, target): """ - target = _ensure_index(target) + target = ensure_index(target) indexer, missing = self.get_indexer_non_unique(target) check = indexer != -1 new_labels = self.take(indexer[check]) @@ -3676,11 +3676,11 @@ def _reindex_non_unique(self, target): if len(missing): length = np.arange(len(indexer)) - missing = _ensure_platform_int(missing) + missing = ensure_platform_int(missing) missing_labels = target.take(missing) - missing_indexer = _ensure_int64(length[~check]) + missing_indexer = ensure_int64(length[~check]) cur_labels = self.take(indexer[check]).values - cur_indexer = _ensure_int64(length[check]) + cur_indexer = ensure_int64(length[check]) new_labels = np.empty(tuple([len(indexer)]), dtype=object) new_labels[cur_indexer] = cur_labels @@ -3754,7 +3754,7 @@ def join(self, other, how='left', level=None, return_indexers=False, return self._join_level(other, level, how=how, return_indexers=return_indexers) - other = _ensure_index(other) + other = ensure_index(other) if len(other) == 0 and how in ('left', 'outer'): join_index = self._shallow_copy() @@ -3881,8 +3881,8 @@ def _join_non_unique(self, other, how='left', return_indexers=False): how=how, sort=True) - left_idx = _ensure_platform_int(left_idx) - right_idx = _ensure_platform_int(right_idx) + left_idx = ensure_platform_int(left_idx) + right_idx = ensure_platform_int(right_idx) join_index = np.asarray(self._ndarray_values.take(left_idx)) mask = left_idx == -1 @@ -3915,7 +3915,7 @@ def _get_leaf_sorter(labels): return np.empty(0, dtype='int64') if len(labels) == 1: - lab = _ensure_int64(labels[0]) + lab = ensure_int64(labels[0]) sorter, _ = libalgos.groupsort_indexer(lab, 1 + lab.max()) return sorter @@ -3926,8 +3926,8 @@ def _get_leaf_sorter(labels): tic |= lab[:-1] != lab[1:] starts = np.hstack(([True], tic, [True])).nonzero()[0] - lab = _ensure_int64(labels[-1]) - return lib.get_level_sorter(lab, _ensure_int64(starts)) + lab = ensure_int64(labels[-1]) + return lib.get_level_sorter(lab, ensure_int64(starts)) if isinstance(self, MultiIndex) and isinstance(other, MultiIndex): raise TypeError('Join on level between two MultiIndex objects ' @@ -3959,7 +3959,7 @@ def _get_leaf_sorter(labels): join_index = left[left_indexer] else: - left_lev_indexer = _ensure_int64(left_lev_indexer) + left_lev_indexer = ensure_int64(left_lev_indexer) rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level)) @@ -4018,9 +4018,9 @@ def _get_leaf_sorter(labels): if return_indexers: left_indexer = (None if left_indexer is None - else _ensure_platform_int(left_indexer)) + else ensure_platform_int(left_indexer)) right_indexer = (None if right_indexer is None - else _ensure_platform_int(right_indexer)) + else ensure_platform_int(right_indexer)) return join_index, left_indexer, right_indexer else: return join_index @@ -4064,8 +4064,8 @@ def _join_monotonic(self, other, how='left', return_indexers=False): join_index = self._wrap_joined_index(join_index, other) if return_indexers: - lidx = None if lidx is None else _ensure_platform_int(lidx) - ridx = None if ridx is None else _ensure_platform_int(ridx) + lidx = None if lidx is None else ensure_platform_int(lidx) + ridx = None if ridx is None else ensure_platform_int(ridx) return join_index, lidx, ridx else: return join_index @@ -4883,7 +4883,7 @@ def _add_logical_methods_disabled(cls): Index._add_comparison_methods() -def _ensure_index_from_sequences(sequences, names=None): +def ensure_index_from_sequences(sequences, names=None): """Construct an index from sequences of data. A single sequence returns an Index. Many sequences returns a @@ -4900,18 +4900,18 @@ def _ensure_index_from_sequences(sequences, names=None): Examples -------- - >>> _ensure_index_from_sequences([[1, 2, 3]], names=['name']) + >>> ensure_index_from_sequences([[1, 2, 3]], names=['name']) Int64Index([1, 2, 3], dtype='int64', name='name') - >>> _ensure_index_from_sequences([['a', 'a'], ['a', 'b']], - names=['L1', 'L2']) + >>> ensure_index_from_sequences([['a', 'a'], ['a', 'b']], + names=['L1', 'L2']) MultiIndex(levels=[['a'], ['a', 'b']], labels=[[0, 0], [0, 1]], names=['L1', 'L2']) See Also -------- - _ensure_index + ensure_index """ from .multi import MultiIndex @@ -4923,7 +4923,7 @@ def _ensure_index_from_sequences(sequences, names=None): return MultiIndex.from_arrays(sequences, names=names) -def _ensure_index(index_like, copy=False): +def ensure_index(index_like, copy=False): """ Ensure that we have an index from some index-like object @@ -4939,19 +4939,19 @@ def _ensure_index(index_like, copy=False): Examples -------- - >>> _ensure_index(['a', 'b']) + >>> ensure_index(['a', 'b']) Index(['a', 'b'], dtype='object') - >>> _ensure_index([('a', 'a'), ('b', 'c')]) + >>> ensure_index([('a', 'a'), ('b', 'c')]) Index([('a', 'a'), ('b', 'c')], dtype='object') - >>> _ensure_index([['a', 'a'], ['b', 'c']]) + >>> ensure_index([['a', 'a'], ['b', 'c']]) MultiIndex(levels=[['a'], ['b', 'c']], labels=[[0, 0], [0, 1]]) See Also -------- - _ensure_index_from_sequences + ensure_index_from_sequences """ if isinstance(index_like, Index): if copy: @@ -5009,3 +5009,8 @@ def _trim_front(strings): def _validate_join_method(method): if method not in ['left', 'right', 'inner', 'outer']: raise ValueError('do not recognize join method %s' % method) + + +def default_index(n): + from pandas.core.index import RangeIndex + return RangeIndex(0, n, name=None) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 7c63b3c667c01..a03e478f81caf 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -9,7 +9,7 @@ from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.dtypes.common import ( is_categorical_dtype, - _ensure_platform_int, + ensure_platform_int, is_list_like, is_interval_dtype, is_scalar) @@ -489,7 +489,7 @@ def reindex(self, target, method=None, level=None, limit=None, raise NotImplementedError("argument limit is not implemented for " "CategoricalIndex.reindex") - target = ibase._ensure_index(target) + target = ibase.ensure_index(target) if not is_categorical_dtype(target) and not target.is_unique: raise ValueError("cannot reindex with a non-unique indexer") @@ -554,7 +554,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): from pandas.core.arrays.categorical import _recode_for_categories method = missing.clean_reindex_fill_method(method) - target = ibase._ensure_index(target) + target = ibase.ensure_index(target) if self.is_unique and self.equals(target): return np.arange(len(self), dtype='intp') @@ -583,23 +583,23 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): codes = self.categories.get_indexer(target) indexer, _ = self._engine.get_indexer_non_unique(codes) - return _ensure_platform_int(indexer) + return ensure_platform_int(indexer) @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs) def get_indexer_non_unique(self, target): - target = ibase._ensure_index(target) + target = ibase.ensure_index(target) if isinstance(target, CategoricalIndex): # Indexing on codes is more efficient if categories are the same: if target.categories is self.categories: target = target.codes indexer, missing = self._engine.get_indexer_non_unique(target) - return _ensure_platform_int(indexer), missing + return ensure_platform_int(indexer), missing target = target.values codes = self.categories.get_indexer(target) indexer, missing = self._engine.get_indexer_non_unique(codes) - return _ensure_platform_int(indexer), missing + return ensure_platform_int(indexer), missing @Appender(_index_shared_docs['_convert_scalar_indexer']) def _convert_scalar_indexer(self, key, kind=None): @@ -644,7 +644,7 @@ def _convert_index_indexer(self, keyarr): def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) - indices = _ensure_platform_int(indices) + indices = ensure_platform_int(indices) taken = self._assert_take_fillable(self.codes, indices, allow_fill=allow_fill, fill_value=fill_value, diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 3f0bdf18f7230..3ae5eb3a8dbf5 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -17,7 +17,7 @@ from pandas._libs.tslibs.timestamps import round_ns from pandas.core.dtypes.common import ( - _ensure_int64, + ensure_int64, is_dtype_equal, is_float, is_integer, @@ -391,7 +391,7 @@ def sort_values(self, return_indexer=False, ascending=True): def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) - indices = _ensure_int64(indices) + indices = ensure_int64(indices) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) if isinstance(maybe_slice, slice): diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 4732178d552be..7257be421c3e1 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -25,7 +25,7 @@ is_list_like, is_scalar, pandas_dtype, - _ensure_int64) + ensure_int64) from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import isna @@ -504,7 +504,7 @@ def _generate_range(cls, start, end, periods, name, freq, tz=None, periods, freq) if tz is not None and getattr(index, 'tz', None) is None: - arr = conversion.tz_localize_to_utc(_ensure_int64(index), + arr = conversion.tz_localize_to_utc(ensure_int64(index), tz, ambiguous=ambiguous) @@ -563,7 +563,7 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, values = np.array(values, copy=False) if not is_datetime64_dtype(values): - values = _ensure_int64(values).view(_NS_DTYPE) + values = ensure_int64(values).view(_NS_DTYPE) values = getattr(values, 'values', values) @@ -1607,7 +1607,7 @@ def delete(self, loc): else: if is_list_like(loc): loc = lib.maybe_indices_to_slice( - _ensure_int64(np.array(loc)), len(self)) + ensure_int64(np.array(loc)), len(self)) if isinstance(loc, slice) and loc.step in (1, None): if (loc.start in (0, None) or loc.stop in (len(self), None)): freq = self.freq diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 9375a60d0964c..e92f980caf3dc 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -8,7 +8,7 @@ from pandas.core.dtypes.missing import isna from pandas.core.dtypes.cast import find_common_type, maybe_downcast_to_dtype from pandas.core.dtypes.common import ( - _ensure_platform_int, + ensure_platform_int, is_list_like, is_datetime_or_timedelta_dtype, is_datetime64tz_dtype, @@ -21,7 +21,7 @@ is_number, is_integer) from pandas.core.indexes.base import ( - Index, _ensure_index, + Index, ensure_index, default_pprint, _index_shared_docs) from pandas._libs import Timestamp, Timedelta @@ -700,7 +700,7 @@ def get_value(self, series, key): def get_indexer(self, target, method=None, limit=None, tolerance=None): self._check_method(method) - target = _ensure_index(target) + target = ensure_index(target) target = self._maybe_cast_indexed(target) if self.equals(target): @@ -724,7 +724,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): else: indexer = np.concatenate([self.get_loc(i) for i in target]) - return _ensure_platform_int(indexer) + return ensure_platform_int(indexer) def _get_reindexer(self, target): """ @@ -799,7 +799,7 @@ def _get_reindexer(self, target): @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs) def get_indexer_non_unique(self, target): - target = self._maybe_cast_indexed(_ensure_index(target)) + target = self._maybe_cast_indexed(ensure_index(target)) return super(IntervalIndex, self).get_indexer_non_unique(target) @Appender(_index_shared_docs['where']) @@ -855,7 +855,7 @@ def insert(self, loc, item): def _as_like_interval_index(self, other): self._assert_can_do_setop(other) - other = _ensure_index(other) + other = ensure_index(other) if not isinstance(other, IntervalIndex): msg = ('the other index needs to be an IntervalIndex too, but ' 'was type {}').format(other.__class__.__name__) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index a791ce1d87264..0d4ceb2783bad 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -14,8 +14,8 @@ from pandas.core.dtypes.dtypes import ( ExtensionDtype, PandasExtensionDtype) from pandas.core.dtypes.common import ( - _ensure_int64, - _ensure_platform_int, + ensure_int64, + ensure_platform_int, is_categorical_dtype, is_object_dtype, is_hashable, @@ -36,7 +36,7 @@ from pandas.core.config import get_option from pandas.core.indexes.base import ( - Index, _ensure_index, + Index, ensure_index, InvalidIndexError, _index_shared_docs) from pandas.core.indexes.frozen import ( @@ -302,13 +302,13 @@ def _set_levels(self, levels, level=None, copy=False, validate=True, if level is None: new_levels = FrozenList( - _ensure_index(lev, copy=copy)._shallow_copy() + ensure_index(lev, copy=copy)._shallow_copy() for lev in levels) else: level = [self._get_level_number(l) for l in level] new_levels = list(self._levels) for l, v in zip(level, levels): - new_levels[l] = _ensure_index(v, copy=copy)._shallow_copy() + new_levels[l] = ensure_index(v, copy=copy)._shallow_copy() new_levels = FrozenList(new_levels) if verify_integrity: @@ -1227,7 +1227,7 @@ def lexsort_depth(self): else: return 0 - int64_labels = [_ensure_int64(lab) for lab in self.labels] + int64_labels = [ensure_int64(lab) for lab in self.labels] for k in range(self.nlevels, 0, -1): if libalgos.is_lexsorted(int64_labels[:k]): return k @@ -1431,7 +1431,7 @@ def _sort_levels_monotonic(self): lev = lev.take(indexer) # indexer to reorder the labels - indexer = _ensure_int64(indexer) + indexer = ensure_int64(indexer) ri = lib.get_reverse_indexer(indexer, len(indexer)) lab = algos.take_1d(ri, lab) @@ -1594,7 +1594,7 @@ def __getitem__(self, key): def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) - indices = _ensure_platform_int(indices) + indices = ensure_platform_int(indices) taken = self._assert_take_fillable(self.labels, indices, allow_fill=allow_fill, fill_value=fill_value, @@ -1895,7 +1895,7 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): if not ascending: indexer = indexer[::-1] - indexer = _ensure_platform_int(indexer) + indexer = ensure_platform_int(indexer) new_labels = [lab.take(indexer) for lab in self.labels] new_index = MultiIndex(labels=new_labels, levels=self.levels, @@ -1940,11 +1940,11 @@ def _convert_listlike_indexer(self, keyarr, kind=None): @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs) def get_indexer(self, target, method=None, limit=None, tolerance=None): method = missing.clean_reindex_fill_method(method) - target = _ensure_index(target) + target = ensure_index(target) # empty indexer if is_list_like(target) and not len(target): - return _ensure_platform_int(np.array([])) + return ensure_platform_int(np.array([])) if not isinstance(target, MultiIndex): try: @@ -1973,7 +1973,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): else: indexer = self._engine.get_indexer(target) - return _ensure_platform_int(indexer) + return ensure_platform_int(indexer) @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs) def get_indexer_non_unique(self, target): @@ -2010,12 +2010,12 @@ def reindex(self, target, method=None, level=None, limit=None, target = type(idx)._simple_new(np.empty(0, dtype=idx.dtype), **attrs) else: - target = _ensure_index(target) + target = ensure_index(target) target, indexer, _ = self._join_level(target, level, how='right', return_indexers=True, keep_order=False) else: - target = _ensure_index(target) + target = ensure_index(target) if self.equals(target): indexer = None else: @@ -2399,7 +2399,7 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels): # selected from pandas import Series mapper = Series(indexer) - indexer = labels.take(_ensure_platform_int(indexer)) + indexer = labels.take(ensure_platform_int(indexer)) result = Series(Index(indexer).isin(r).nonzero()[0]) m = result.map(mapper)._ndarray_values @@ -2628,7 +2628,7 @@ def equals(self, other): return False if not isinstance(other, MultiIndex): - other_vals = com._values_from_object(_ensure_index(other)) + other_vals = com._values_from_object(ensure_index(other)) return array_equivalent(self._ndarray_values, other_vals) if self.nlevels != other.nlevels: @@ -2826,7 +2826,7 @@ def insert(self, loc, item): lev_loc = level.get_loc(k) new_levels.append(level) - new_labels.append(np.insert(_ensure_int64(labels), loc, lev_loc)) + new_labels.append(np.insert(ensure_int64(labels), loc, lev_loc)) return MultiIndex(levels=new_levels, labels=new_labels, names=self.names, verify_integrity=False) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index a8e0c7f1aaa6a..841d1e69485ca 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -15,7 +15,7 @@ is_period_dtype, is_bool_dtype, pandas_dtype, - _ensure_object) + ensure_object) import pandas.tseries.frequencies as frequencies from pandas.tseries.frequencies import get_freq_code as _gfc @@ -33,7 +33,7 @@ from pandas.core.arrays.period import PeriodArrayMixin from pandas.core.base import _shared_docs -from pandas.core.indexes.base import _index_shared_docs, _ensure_index +from pandas.core.indexes.base import _index_shared_docs, ensure_index from pandas import compat from pandas.util._decorators import (Appender, Substitution, cache_readonly, @@ -255,7 +255,7 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, "floating point in construction") # anything else, likely an array of strings or periods - data = _ensure_object(data) + data = ensure_object(data) freq = freq or period.extract_freq(data) data = period.extract_ordinals(data, freq) return cls._from_ordinals(data, name=name, freq=freq) @@ -567,7 +567,7 @@ def get_value(self, series, key): @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs) def get_indexer(self, target, method=None, limit=None, tolerance=None): - target = _ensure_index(target) + target = ensure_index(target) if hasattr(target, 'freq') and target.freq != self.freq: msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, target.freqstr) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 4e192548a1f2d..939ec0b79ac6b 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -81,7 +81,7 @@ def __new__(cls, start=None, stop=None, step=None, **dict(start._get_data_as_items())) # validate the arguments - def _ensure_int(value, field): + def ensure_int(value, field): msg = ("RangeIndex(...) must be called with integers," " {value} was passed for {field}") if not is_scalar(value): @@ -102,18 +102,18 @@ def _ensure_int(value, field): elif start is None: start = 0 else: - start = _ensure_int(start, 'start') + start = ensure_int(start, 'start') if stop is None: stop = start start = 0 else: - stop = _ensure_int(stop, 'stop') + stop = ensure_int(stop, 'stop') if step is None: step = 1 elif step == 0: raise ValueError("Step must not be zero") else: - step = _ensure_int(step, 'step') + step = ensure_int(step, 'step') return cls._simple_new(start, stop, step, name) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 1ed6145f01a44..dc26c9cc0c248 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -11,7 +11,7 @@ is_timedelta64_dtype, is_timedelta64_ns_dtype, pandas_dtype, - _ensure_int64) + ensure_int64) from pandas.core.dtypes.missing import isna from pandas.core.dtypes.generic import ABCSeries @@ -736,7 +736,7 @@ def delete(self, loc): else: if is_list_like(loc): loc = lib.maybe_indices_to_slice( - _ensure_int64(np.array(loc)), len(self)) + ensure_int64(np.array(loc)), len(self)) if isinstance(loc, slice) and loc.step in (1, None): if (loc.start in (0, None) or loc.stop in (len(self), None)): freq = self.freq diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ec06099e3bbd2..8ffc7548059b7 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -13,7 +13,7 @@ is_iterator, is_scalar, is_sparse, - _ensure_platform_int) + ensure_platform_int) from pandas.core.dtypes.missing import isna, _infer_fill_value from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender @@ -1483,7 +1483,7 @@ def _convert_for_reindex(self, key, axis=None): keyarr = labels._convert_arr_indexer(keyarr) if not labels.is_integer(): - keyarr = _ensure_platform_int(keyarr) + keyarr = ensure_platform_int(keyarr) return labels.take(keyarr) return keyarr diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 208d7b8bcf8a7..5a5418dcc1e7f 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -21,7 +21,7 @@ CategoricalDtype) from pandas.core.dtypes.common import ( _TD_DTYPE, _NS_DTYPE, - _ensure_int64, _ensure_platform_int, + ensure_int64, ensure_platform_int, is_integer, is_dtype_equal, is_timedelta64_dtype, @@ -65,7 +65,7 @@ import pandas.core.common as com import pandas.core.algorithms as algos -from pandas.core.index import Index, MultiIndex, _ensure_index +from pandas.core.index import Index, MultiIndex, ensure_index from pandas.core.indexing import maybe_convert_indices, check_setitem_lengths from pandas.core.arrays import Categorical from pandas.core.indexes.datetimes import DatetimeIndex @@ -1297,7 +1297,7 @@ def shift(self, periods, axis=0, mgr=None): axis = new_values.ndim - axis - 1 if np.prod(new_values.shape): - new_values = np.roll(new_values, _ensure_platform_int(periods), + new_values = np.roll(new_values, ensure_platform_int(periods), axis=axis) axis_indexer = [slice(None)] * self.ndim @@ -3271,7 +3271,7 @@ class BlockManager(PandasObject): '_is_consolidated', '_blknos', '_blklocs'] def __init__(self, blocks, axes, do_integrity_check=True): - self.axes = [_ensure_index(ax) for ax in axes] + self.axes = [ensure_index(ax) for ax in axes] self.blocks = tuple(blocks) for block in blocks: @@ -3296,8 +3296,8 @@ def __init__(self, blocks, axes, do_integrity_check=True): def make_empty(self, axes=None): """ return an empty BlockManager with the items axis of len 0 """ if axes is None: - axes = [_ensure_index([])] + [_ensure_index(a) - for a in self.axes[1:]] + axes = [ensure_index([])] + [ensure_index(a) + for a in self.axes[1:]] # preserve dtype if possible if self.ndim == 1: @@ -3321,7 +3321,7 @@ def ndim(self): return len(self.axes) def set_axis(self, axis, new_labels): - new_labels = _ensure_index(new_labels) + new_labels = ensure_index(new_labels) old_len = len(self.axes[axis]) new_len = len(new_labels) @@ -3444,7 +3444,7 @@ def unpickle_block(values, mgr_locs): if (isinstance(state, tuple) and len(state) >= 4 and '0.14.1' in state[3]): state = state[3]['0.14.1'] - self.axes = [_ensure_index(ax) for ax in state['axes']] + self.axes = [ensure_index(ax) for ax in state['axes']] self.blocks = tuple(unpickle_block(b['values'], b['mgr_locs']) for b in state['blocks']) else: @@ -3452,7 +3452,7 @@ def unpickle_block(values, mgr_locs): # little while longer ax_arrays, bvalues, bitems = state[:3] - self.axes = [_ensure_index(ax) for ax in ax_arrays] + self.axes = [ensure_index(ax) for ax in ax_arrays] if len(bitems) == 1 and self.axes[0].equals(bitems[0]): # This is a workaround for pre-0.14.1 pickles that didn't @@ -4386,7 +4386,7 @@ def reindex_axis(self, new_index, axis, method=None, limit=None, """ Conform block manager to new index. """ - new_index = _ensure_index(new_index) + new_index = ensure_index(new_index) new_index, indexer = self.axes[axis].reindex(new_index, method=method, limit=limit) @@ -4665,7 +4665,7 @@ def __init__(self, block, axis, do_integrity_check=False, fastpath=False): 'more than 1 block') block = block[0] else: - self.axes = [_ensure_index(axis)] + self.axes = [ensure_index(axis)] # create the block here if isinstance(block, list): @@ -4891,7 +4891,7 @@ def form_blocks(arrays, names, axes): items_dict = defaultdict(list) extra_locs = [] - names_idx = _ensure_index(names) + names_idx = ensure_index(names) if names_idx.equals(axes[0]): names_indexer = np.arange(len(names_idx)) else: @@ -5209,7 +5209,7 @@ def _factor_indexer(shape, labels): expanded label indexer """ mult = np.array(shape)[::-1].cumprod()[::-1] - return _ensure_platform_int( + return ensure_platform_int( np.sum(np.array(labels).T * np.append(mult, [1]), axis=1).T) @@ -5229,7 +5229,7 @@ def _get_blkno_placements(blknos, blk_count, group=True): """ - blknos = _ensure_int64(blknos) + blknos = ensure_int64(blknos) # FIXME: blk_count is unused, but it may avoid the use of dicts in cython for blkno, indexer in libinternals.get_blkno_indexers(blknos, group): diff --git a/pandas/core/missing.py b/pandas/core/missing.py index e9b9a734ec5f5..16820dcbb55bc 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -18,7 +18,7 @@ is_scalar, is_integer, needs_i8_conversion, - _ensure_float64) + ensure_float64) from pandas.core.dtypes.cast import infer_dtype_from_array from pandas.core.dtypes.missing import isna @@ -480,7 +480,7 @@ def pad_1d(values, limit=None, mask=None, dtype=None): elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): _method = _pad_1d_datetime elif is_integer_dtype(values): - values = _ensure_float64(values) + values = ensure_float64(values) _method = algos.pad_inplace_float64 elif values.dtype == np.object_: _method = algos.pad_inplace_object @@ -506,7 +506,7 @@ def backfill_1d(values, limit=None, mask=None, dtype=None): elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): _method = _backfill_1d_datetime elif is_integer_dtype(values): - values = _ensure_float64(values) + values = ensure_float64(values) _method = algos.backfill_inplace_float64 elif values.dtype == np.object_: _method = algos.backfill_inplace_object @@ -533,7 +533,7 @@ def pad_2d(values, limit=None, mask=None, dtype=None): elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): _method = _pad_2d_datetime elif is_integer_dtype(values): - values = _ensure_float64(values) + values = ensure_float64(values) _method = algos.pad_2d_inplace_float64 elif values.dtype == np.object_: _method = algos.pad_2d_inplace_object @@ -564,7 +564,7 @@ def backfill_2d(values, limit=None, mask=None, dtype=None): elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): _method = _backfill_2d_datetime elif is_integer_dtype(values): - values = _ensure_float64(values) + values = ensure_float64(values) _method = algos.backfill_2d_inplace_float64 elif values.dtype == np.object_: _method = algos.backfill_2d_inplace_object diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 1ddf77cf71a11..bccc5a587bd83 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -34,7 +34,7 @@ is_list_like, is_scalar, is_extension_array_dtype, - _ensure_object) + ensure_object) from pandas.core.dtypes.cast import ( maybe_upcast_putmask, find_common_type, construct_1d_object_array_from_listlike) @@ -1387,8 +1387,8 @@ def na_op(x, y): if (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)): result = op(x, y) # when would this be hit? else: - x = _ensure_object(x) - y = _ensure_object(y) + x = ensure_object(x) + y = ensure_object(y) result = libops.vec_binop(x, y, op) else: # let null fall thru diff --git a/pandas/core/panel.py b/pandas/core/panel.py index a1812cb5801b9..16ade3fae90a1 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -17,12 +17,13 @@ import pandas.core.ops as ops import pandas.core.common as com +import pandas.core.indexes.base as ibase from pandas import compat from pandas.compat import (map, zip, range, u, OrderedDict) from pandas.compat.numpy import function as nv from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame, _shared_docs -from pandas.core.index import (Index, MultiIndex, _ensure_index, +from pandas.core.index import (Index, MultiIndex, ensure_index, _get_objs_combined_axis) from pandas.io.formats.printing import pprint_thing from pandas.core.indexing import maybe_droplevels @@ -198,7 +199,7 @@ def _init_dict(self, data, axes, dtype=None): # prefilter if haxis passed if haxis is not None: - haxis = _ensure_index(haxis) + haxis = ensure_index(haxis) data = OrderedDict((k, v) for k, v in compat.iteritems(data) if k in haxis) @@ -319,9 +320,9 @@ def _init_matrix(self, data, axes, dtype=None, copy=False): fixed_axes = [] for i, ax in enumerate(axes): if ax is None: - ax = com._default_index(shape[i]) + ax = ibase.default_index(shape[i]) else: - ax = _ensure_index(ax) + ax = ensure_index(ax) fixed_axes.append(ax) return create_block_manager_from_blocks([values], fixed_axes) @@ -1536,7 +1537,7 @@ def _extract_axis(self, data, axis=0, intersect=False): if index is None: index = Index([]) - return _ensure_index(index) + return ensure_index(index) Panel._setup_axes(axes=['items', 'major_axis', 'minor_axis'], info_axis=0, diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index b36e9b8d900fd..1d6105cb68bf1 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -5,12 +5,13 @@ import numpy as np from pandas import compat, DataFrame, Series, Index, MultiIndex from pandas.core.index import (_get_objs_combined_axis, - _ensure_index, _get_consensus_names, + ensure_index, _get_consensus_names, _all_indexes_same) from pandas.core.arrays.categorical import (_factorize_from_iterable, _factorize_from_iterables) from pandas.core.internals import concatenate_block_managers from pandas.core import common as com +import pandas.core.indexes.base as ibase from pandas.core.generic import NDFrame import pandas.core.dtypes.concat as _concat @@ -477,7 +478,7 @@ def _get_concat_axis(self): if self.axis == 0: indexes = [x.index for x in self.objs] elif self.ignore_index: - idx = com._default_index(len(self.objs)) + idx = ibase.default_index(len(self.objs)) return idx elif self.keys is None: names = [None] * len(self.objs) @@ -497,14 +498,14 @@ def _get_concat_axis(self): if has_names: return Index(names) else: - return com._default_index(len(self.objs)) + return ibase.default_index(len(self.objs)) else: - return _ensure_index(self.keys) + return ensure_index(self.keys) else: indexes = [x._data.axes[self.axis] for x in self.objs] if self.ignore_index: - idx = com._default_index(sum(len(i) for i in indexes)) + idx = ibase.default_index(sum(len(i) for i in indexes)) return idx if self.keys is None: @@ -540,16 +541,16 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): if levels is None: _, levels = _factorize_from_iterables(zipped) else: - levels = [_ensure_index(x) for x in levels] + levels = [ensure_index(x) for x in levels] else: zipped = [keys] if names is None: names = [None] if levels is None: - levels = [_ensure_index(keys)] + levels = [ensure_index(keys)] else: - levels = [_ensure_index(x) for x in levels] + levels = [ensure_index(x) for x in levels] if not _all_indexes_same(indexes): label_list = [] @@ -608,7 +609,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): # do something a bit more speedy for hlevel, level in zip(zipped, levels): - hlevel = _ensure_index(hlevel) + hlevel = ensure_index(hlevel) mapped = level.get_indexer(hlevel) mask = mapped == -1 diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index e38c069b3c3fb..25d8cb4e804a2 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -31,9 +31,9 @@ is_bool_dtype, is_list_like, is_datetimelike, - _ensure_int64, - _ensure_float64, - _ensure_object, + ensure_int64, + ensure_float64, + ensure_object, _get_dtype) from pandas.core.dtypes.missing import na_value_for_dtype from pandas.core.internals import (items_overlap_with_suffix, @@ -1212,9 +1212,9 @@ def _asof_by_function(direction, on_type, by_type): _type_casters = { - 'int64_t': _ensure_int64, - 'double': _ensure_float64, - 'object': _ensure_object, + 'int64_t': ensure_int64, + 'double': ensure_float64, + 'object': ensure_object, } _cython_types = { @@ -1490,8 +1490,8 @@ def _get_single_indexer(join_key, index, sort=False): left_key, right_key, count = _factorize_keys(join_key, index, sort=sort) left_indexer, right_indexer = libjoin.left_outer_join( - _ensure_int64(left_key), - _ensure_int64(right_key), + ensure_int64(left_key), + ensure_int64(right_key), count, sort=sort) return left_indexer, right_indexer @@ -1553,16 +1553,16 @@ def _factorize_keys(lk, rk, sort=True): # Same categories in different orders -> recode rk = _recode_for_categories(rk.codes, rk.categories, lk.categories) - lk = _ensure_int64(lk.codes) - rk = _ensure_int64(rk) + lk = ensure_int64(lk.codes) + rk = ensure_int64(rk) elif is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk): klass = libhashtable.Int64Factorizer - lk = _ensure_int64(com._values_from_object(lk)) - rk = _ensure_int64(com._values_from_object(rk)) + lk = ensure_int64(com._values_from_object(lk)) + rk = ensure_int64(com._values_from_object(rk)) else: klass = libhashtable.Factorizer - lk = _ensure_object(lk) - rk = _ensure_object(rk) + lk = ensure_object(lk) + rk = ensure_object(rk) rizer = klass(max(len(lk), len(rk))) @@ -1600,7 +1600,7 @@ def _sort_labels(uniques, left, right): labels = np.concatenate([left, right]) _, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1) - new_labels = _ensure_int64(new_labels) + new_labels = ensure_int64(new_labels) new_left, new_right = new_labels[:llength], new_labels[llength:] return new_left, new_right diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index d5d2e594b8d6b..2f2dc1264e996 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -8,7 +8,7 @@ import numpy as np from pandas.core.dtypes.common import ( - _ensure_platform_int, + ensure_platform_int, is_list_like, is_bool_dtype, needs_i8_conversion, is_sparse, is_object_dtype) from pandas.core.dtypes.cast import maybe_promote @@ -141,7 +141,7 @@ def _make_sorted_values_labels(self): ngroups = len(obs_ids) indexer = _algos.groupsort_indexer(comp_index, ngroups)[0] - indexer = _ensure_platform_int(indexer) + indexer = ensure_platform_int(indexer) self.sorted_values = algos.take_nd(self.values, indexer, axis=0) self.sorted_labels = [l.take(indexer) for l in to_sort] @@ -156,7 +156,7 @@ def _make_selectors(self): comp_index, obs_ids = get_compressed_ids(remaining_labels, level_sizes) ngroups = len(obs_ids) - comp_index = _ensure_platform_int(comp_index) + comp_index = ensure_platform_int(comp_index) stride = self.index.levshape[self.level] + self.lift self.full_shape = ngroups, stride diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index bbdce762feee3..031c94c06d3c8 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -12,7 +12,7 @@ is_timedelta64_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, - _ensure_int64) + ensure_int64) import pandas.core.algorithms as algos import pandas.core.nanops as nanops @@ -335,7 +335,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None, bins = unique_bins side = 'left' if right else 'right' - ids = _ensure_int64(bins.searchsorted(x, side=side)) + ids = ensure_int64(bins.searchsorted(x, side=side)) if include_lowest: # Numpy 1.9 support: ensure this mask is a Numpy array diff --git a/pandas/core/series.py b/pandas/core/series.py index 0bdb9d9cc23a6..77445159129f2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -32,7 +32,7 @@ is_dict_like, is_scalar, _is_unorderable_exception, - _ensure_platform_int, + ensure_platform_int, pandas_dtype) from pandas.core.dtypes.generic import ( ABCSparseArray, ABCDataFrame, ABCIndexClass) @@ -51,7 +51,7 @@ na_value_for_dtype) from pandas.core.index import (Index, MultiIndex, InvalidIndexError, - Float64Index, _ensure_index) + Float64Index, ensure_index) from pandas.core.indexing import check_bool_indexer, maybe_convert_indices from pandas.core import generic, base from pandas.core.internals import SingleBlockManager @@ -71,6 +71,8 @@ import pandas.core.common as com import pandas.core.nanops as nanops +import pandas.core.indexes.base as ibase + import pandas.io.formats.format as fmt from pandas.util._decorators import ( Appender, deprecate, deprecate_kwarg, Substitution) @@ -187,7 +189,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None, else: if index is not None: - index = _ensure_index(index) + index = ensure_index(index) if data is None: data = {} @@ -256,7 +258,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None, if index is None: if not is_list_like(data): data = [data] - index = com._default_index(len(data)) + index = ibase.default_index(len(data)) elif is_list_like(data): # a scalar numpy array is list-like but doesn't @@ -373,7 +375,7 @@ def _set_axis(self, axis, labels, fastpath=False): """ override generic, we want to set the _typ here """ if not fastpath: - labels = _ensure_index(labels) + labels = ensure_index(labels) is_all_dates = labels.is_all_dates if is_all_dates: @@ -1202,7 +1204,7 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False): """ inplace = validate_bool_kwarg(inplace, 'inplace') if drop: - new_index = com._default_index(len(self)) + new_index = ibase.default_index(len(self)) if level is not None: if not isinstance(level, (tuple, list)): level = [level] @@ -2079,7 +2081,7 @@ def __rmatmul__(self, other): @deprecate_kwarg(old_arg_name='v', new_arg_name='value') def searchsorted(self, value, side='left', sorter=None): if sorter is not None: - sorter = _ensure_platform_int(sorter) + sorter = ensure_platform_int(sorter) return self._values.searchsorted(Series(value)._values, side=side, sorter=sorter) @@ -2500,7 +2502,7 @@ def _try_kind_sort(arr): bad = isna(arr) good = ~bad - idx = com._default_index(len(self)) + idx = ibase.default_index(len(self)) argsorted = _try_kind_sort(arr[good]) @@ -2676,7 +2678,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, indexer = nargsort(index, kind=kind, ascending=ascending, na_position=na_position) - indexer = _ensure_platform_int(indexer) + indexer = ensure_platform_int(indexer) new_index = index.take(indexer) new_index = new_index._sort_levels_monotonic() @@ -3537,7 +3539,7 @@ def memory_usage(self, index=True, deep=False): @Appender(generic._shared_docs['_take']) def _take(self, indices, axis=0, is_copy=False): - indices = _ensure_platform_int(indices) + indices = ensure_platform_int(indices) new_index = self.index.take(indices) if is_categorical_dtype(self): diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 212f44e55c489..5aa9ea658482b 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -3,8 +3,8 @@ import numpy as np from pandas.compat import long, string_types, PY3 from pandas.core.dtypes.common import ( - _ensure_platform_int, - _ensure_int64, + ensure_platform_int, + ensure_int64, is_list_like, is_categorical_dtype) from pandas.core.dtypes.cast import infer_dtype_from_array @@ -57,7 +57,7 @@ def maybe_lift(lab, size): # so that all output values are non-negative return (lab + 1, size + 1) if (lab == -1).any() else (lab, size) - labels = map(_ensure_int64, labels) + labels = map(ensure_int64, labels) if not xnull: labels, shape = map(list, zip(*map(maybe_lift, labels, shape))) @@ -338,9 +338,9 @@ def get_group_index_sorter(group_index, ngroups): do_groupsort = (count > 0 and ((alpha + beta * ngroups) < (count * np.log(count)))) if do_groupsort: - sorter, _ = algos.groupsort_indexer(_ensure_int64(group_index), + sorter, _ = algos.groupsort_indexer(ensure_int64(group_index), ngroups) - return _ensure_platform_int(sorter) + return ensure_platform_int(sorter) else: return group_index.argsort(kind='mergesort') @@ -355,7 +355,7 @@ def compress_group_index(group_index, sort=True): size_hint = min(len(group_index), hashtable._SIZE_HINT_LIMIT) table = hashtable.Int64HashTable(size_hint) - group_index = _ensure_int64(group_index) + group_index = ensure_int64(group_index) # note, group labels come out ascending (ie, 1,2,3 etc) comp_ids, obs_group_ids = table.get_labels_groupby(group_index) @@ -462,7 +462,7 @@ def sort_mixed(values): if not is_list_like(labels): raise TypeError("Only list-like objects or None are allowed to be" "passed to safe_sort as labels") - labels = _ensure_platform_int(np.asarray(labels)) + labels = ensure_platform_int(np.asarray(labels)) from pandas import Index if not assume_unique and not Index(values).is_unique: @@ -474,7 +474,7 @@ def sort_mixed(values): values, algorithms._hashtables) t = hash_klass(len(values)) t.map_locations(values) - sorter = _ensure_platform_int(t.lookup(ordered)) + sorter = ensure_platform_int(t.lookup(ordered)) reverse_indexer = np.empty(len(sorter), dtype=np.int_) reverse_indexer.put(sorter, np.arange(len(sorter))) @@ -487,4 +487,4 @@ def sort_mixed(values): new_labels = reverse_indexer.take(labels, mode='wrap') np.putmask(new_labels, mask, na_sentinel) - return ordered, _ensure_platform_int(new_labels) + return ordered, ensure_platform_int(new_labels) diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index ff58f7d104ff9..6f0ffbff22028 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -16,7 +16,7 @@ from pandas.core.dtypes.generic import ABCSparseSeries from pandas.core.dtypes.common import ( - _ensure_platform_int, + ensure_platform_int, is_float, is_integer, is_object_dtype, is_integer_dtype, @@ -468,7 +468,7 @@ def take(self, indices, axis=0, allow_fill=True, # return scalar return self[indices] - indices = _ensure_platform_int(indices) + indices = ensure_platform_int(indices) n = len(self) if allow_fill and fill_value is not None: # allow -1 to indicate self.fill_value, diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 1feddf004058a..f7071061d07ab 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -12,10 +12,10 @@ from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.cast import maybe_upcast, find_common_type -from pandas.core.dtypes.common import _ensure_platform_int, is_scipy_sparse +from pandas.core.dtypes.common import ensure_platform_int, is_scipy_sparse from pandas.compat.numpy import function as nv -from pandas.core.index import Index, MultiIndex, _ensure_index +from pandas.core.index import Index, MultiIndex, ensure_index from pandas.core.series import Series from pandas.core.frame import DataFrame, extract_index, _prep_ndarray import pandas.core.algorithms as algos @@ -27,6 +27,7 @@ from pandas.util._decorators import Appender import pandas.core.ops as ops import pandas.core.common as com +import pandas.core.indexes.base as ibase _shared_doc_kwargs = dict(klass='SparseDataFrame') @@ -111,7 +112,7 @@ def __init__(self, data=None, index=None, columns=None, default_kind=None, if index is None: index = Index([]) else: - index = _ensure_index(index) + index = ensure_index(index) if columns is None: columns = Index([]) @@ -139,7 +140,7 @@ def _constructor(self): def _init_dict(self, data, index, columns, dtype=None): # pre-filter out columns if we passed it if columns is not None: - columns = _ensure_index(columns) + columns = ensure_index(columns) data = {k: v for k, v in compat.iteritems(data) if k in columns} else: keys = com._dict_keys_to_ordered_list(data) @@ -219,9 +220,9 @@ def _init_spmatrix(self, data, index, columns, dtype=None, def _prep_index(self, data, index, columns): N, K = data.shape if index is None: - index = com._default_index(N) + index = ibase.default_index(N) if columns is None: - columns = com._default_index(K) + columns = ibase.default_index(K) if len(columns) != K: raise ValueError('Column length mismatch: {columns} vs. {K}' @@ -650,7 +651,7 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan, index=index, columns=self.columns).__finalize__(self) indexer = self.index.get_indexer(index, method, limit=limit) - indexer = _ensure_platform_int(indexer) + indexer = ensure_platform_int(indexer) mask = indexer == -1 need_mask = mask.any() @@ -926,7 +927,7 @@ def to_manager(sdf, columns, index): """ # from BlockManager perspective - axes = [_ensure_index(columns), _ensure_index(index)] + axes = [ensure_index(columns), ensure_index(index)] return create_block_manager_from_arrays( [sdf[c] for c in columns], columns, axes) diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index fb337d71fcf8d..96ee5b7954f45 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -11,11 +11,12 @@ from pandas.core.dtypes.missing import isna, notna from pandas.compat.numpy import function as nv -from pandas.core.index import Index, _ensure_index, InvalidIndexError +from pandas.core.index import Index, ensure_index, InvalidIndexError from pandas.core.series import Series from pandas.core.internals import SingleBlockManager from pandas.core import generic import pandas.core.common as com +import pandas.core.indexes.base as ibase import pandas.core.ops as ops import pandas._libs.index as libindex from pandas.util._decorators import Appender @@ -149,8 +150,8 @@ def __init__(self, data=None, index=None, sparse_index=None, kind='block', data.fill(v) if index is None: - index = com._default_index(sparse_index.length) - index = _ensure_index(index) + index = ibase.default_index(sparse_index.length) + index = ensure_index(index) # create/copy the manager if isinstance(data, SingleBlockManager): diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index c8204faa55cf8..83de83ab76a2c 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -14,7 +14,7 @@ _guess_datetime_format) from pandas.core.dtypes.common import ( - _ensure_object, + ensure_object, is_datetime64_ns_dtype, is_datetime64_dtype, is_datetime64tz_dtype, @@ -216,7 +216,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') - arg = _ensure_object(arg) + arg = ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: @@ -787,7 +787,7 @@ def _convert_listlike(arg, format): raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') - arg = _ensure_object(arg) + arg = ensure_object(arg) if infer_time_format and format is None: format = _guess_time_format_for_array(arg) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index f1d13ccf36cf6..4bb5c223d1bcc 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -6,7 +6,7 @@ is_decimal, is_datetime_or_timedelta_dtype, is_number, - _ensure_object) + ensure_object) from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass from pandas.core.dtypes.cast import maybe_downcast_to_dtype from pandas._libs import lib @@ -130,7 +130,7 @@ def to_numeric(arg, errors='raise', downcast=None): elif is_datetime_or_timedelta_dtype(values): values = values.astype(np.int64) else: - values = _ensure_object(values) + values = ensure_object(values) coerce_numeric = False if errors in ('ignore', 'raise') else True values = lib.maybe_convert_numeric(values, set(), coerce_numeric=coerce_numeric) diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index ed2659973cc6a..63ab120833ba1 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -9,7 +9,7 @@ array_to_timedelta64) from pandas.core.dtypes.common import ( - _ensure_object, + ensure_object, is_integer_dtype, is_timedelta64_dtype, is_list_like) @@ -171,7 +171,7 @@ def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None): 'timedelta64[ns]', copy=False) else: try: - value = array_to_timedelta64(_ensure_object(arg), + value = array_to_timedelta64(ensure_object(arg), unit=unit, errors=errors) value = value.astype('timedelta64[ns]', copy=False) except ValueError: diff --git a/pandas/core/window.py b/pandas/core/window.py index e20db4df2cb2a..6b6f27bcb3863 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -27,7 +27,7 @@ needs_i8_conversion, is_timedelta64_dtype, is_list_like, - _ensure_float64, + ensure_float64, is_scalar) from pandas.core.base import PandasObject, SelectionMixin @@ -208,9 +208,9 @@ def _prep_values(self, values=None, kill_inf=True): # GH #12373 : rolling functions error on float32 data # make sure the data is coerced to float64 if is_float_dtype(values.dtype): - values = _ensure_float64(values) + values = ensure_float64(values) elif is_integer_dtype(values.dtype): - values = _ensure_float64(values) + values = ensure_float64(values) elif needs_i8_conversion(values.dtype): raise NotImplementedError("ops for {action} for this " "dtype {dtype} are not " @@ -219,7 +219,7 @@ def _prep_values(self, values=None, kill_inf=True): dtype=values.dtype)) else: try: - values = _ensure_float64(values) + values = ensure_float64(values) except (ValueError, TypeError): raise TypeError("cannot handle this type -> {0}" "".format(values.dtype)) @@ -265,7 +265,7 @@ def _wrap_results(self, results, blocks, obj): """ from pandas import Series, concat - from pandas.core.index import _ensure_index + from pandas.core.index import ensure_index final = [] for result, block in zip(results, blocks): @@ -286,7 +286,7 @@ def _wrap_results(self, results, blocks, obj): if self._selection is not None: - selection = _ensure_index(self._selection) + selection = ensure_index(self._selection) # need to reorder to include original location of # the on column (if its not already there) @@ -857,7 +857,7 @@ def _apply(self, func, name=None, window=None, center=None, def func(arg, window, min_periods=None, closed=None): minp = check_minp(min_periods, window) # ensure we are only rolling on floats - arg = _ensure_float64(arg) + arg = ensure_float64(arg) return cfunc(arg, window, minp, indexi, closed, **kwargs) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 5f97447d29cbc..f69e4a484d177 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -36,7 +36,7 @@ from pandas.core.dtypes.generic import ABCSparseArray, ABCMultiIndex from pandas.core.base import PandasObject import pandas.core.common as com -from pandas.core.index import Index, _ensure_index +from pandas.core.index import Index, ensure_index from pandas.core.config import get_option, set_option from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.period import PeriodIndex @@ -426,7 +426,7 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, self.kwds = kwds if columns is not None: - self.columns = _ensure_index(columns) + self.columns = ensure_index(columns) self.frame = self.frame[self.columns] else: self.columns = frame.columns diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 65df2bffb4abf..486040fa52f35 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -16,7 +16,7 @@ from pandas.compat import (range, lrange, PY3, StringIO, lzip, zip, string_types, map, u) from pandas.core.dtypes.common import ( - is_integer, _ensure_object, + is_integer, ensure_object, is_list_like, is_integer_dtype, is_float, is_dtype_equal, is_object_dtype, is_string_dtype, @@ -25,7 +25,7 @@ from pandas.core.dtypes.missing import isna from pandas.core.dtypes.cast import astype_nansafe from pandas.core.index import (Index, MultiIndex, RangeIndex, - _ensure_index_from_sequences) + ensure_index_from_sequences) from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.arrays import Categorical @@ -1521,7 +1521,7 @@ def _agg_index(self, index, try_parse_dates=True): arrays.append(arr) names = self.index_names - index = _ensure_index_from_sequences(arrays, names) + index = ensure_index_from_sequences(arrays, names) return index @@ -1889,7 +1889,7 @@ def read(self, nrows=None): try_parse_dates=True) arrays.append(values) - index = _ensure_index_from_sequences(arrays) + index = ensure_index_from_sequences(arrays) if self.usecols is not None: names = self._filter_usecols(names) @@ -3005,7 +3005,7 @@ def converter(*date_cols): try: return tools.to_datetime( - _ensure_object(strs), + ensure_object(strs), utc=None, box=False, dayfirst=dayfirst, @@ -3222,7 +3222,7 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None): index = Index([]) else: data = [Series([], dtype=dtype[name]) for name in index_names] - index = _ensure_index_from_sequences(data, names=index_names) + index = ensure_index_from_sequences(data, names=index_names) index_col.sort() for i, n in enumerate(index_col): diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 35e244bf2f9eb..f2d6fe01e0573 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -28,9 +28,9 @@ is_timedelta64_dtype, is_datetime64tz_dtype, is_datetime64_dtype, - _ensure_object, - _ensure_int64, - _ensure_platform_int) + ensure_object, + ensure_int64, + ensure_platform_int) from pandas.core.dtypes.missing import array_equivalent from pandas.core import config @@ -44,7 +44,7 @@ from pandas.core.internals import (BlockManager, make_block, _block2d_to_blocknd, _factor_indexer, _block_shape) -from pandas.core.index import _ensure_index +from pandas.core.index import ensure_index from pandas.core.computation.pytables import Expr, maybe_expression from pandas.io.common import _stringify_path @@ -3725,8 +3725,8 @@ def process_filter(field, filt): elif field in axis_values: # we need to filter on this dimension - values = _ensure_index(getattr(obj, field).values) - filt = _ensure_index(filt) + values = ensure_index(getattr(obj, field).values) + filt = ensure_index(filt) # hack until we support reversed dim flags if isinstance(obj, DataFrame): @@ -3892,8 +3892,8 @@ def read(self, where=None, columns=None, **kwargs): if len(unique(key)) == len(key): sorter, _ = algos.groupsort_indexer( - _ensure_int64(key), np.prod(N)) - sorter = _ensure_platform_int(sorter) + ensure_int64(key), np.prod(N)) + sorter = ensure_platform_int(sorter) # create the objs for c in self.values_axes: @@ -3938,7 +3938,7 @@ def read(self, where=None, columns=None, **kwargs): unique_tuples = com._asarray_tuplesafe(unique_tuples) indexer = match(unique_tuples, tuple_index) - indexer = _ensure_platform_int(indexer) + indexer = ensure_platform_int(indexer) new_index = long_index.take(indexer) new_values = lp.values.take(indexer, axis=0) @@ -4236,7 +4236,7 @@ def read(self, where=None, columns=None, **kwargs): for a in self.values_axes: # we could have a multi-index constructor here - # _ensure_index doesn't recognized our list-of-tuples here + # ensure_index doesn't recognized our list-of-tuples here if info.get('type') == 'MultiIndex': cols = MultiIndex.from_tuples(a.values) else: @@ -4437,18 +4437,18 @@ def is_transposed(self): def _reindex_axis(obj, axis, labels, other=None): ax = obj._get_axis(axis) - labels = _ensure_index(labels) + labels = ensure_index(labels) # try not to reindex even if other is provided # if it equals our current index if other is not None: - other = _ensure_index(other) + other = ensure_index(other) if (other is None or labels.equals(other)) and labels.equals(ax): return obj - labels = _ensure_index(labels.unique()) + labels = ensure_index(labels.unique()) if other is not None: - labels = _ensure_index(other.unique()) & labels + labels = ensure_index(other.unique()) & labels if not labels.equals(ax): slicer = [slice(None, None)] * obj.ndim slicer[axis] = labels @@ -4656,7 +4656,7 @@ def _convert_string_array(data, encoding, errors, itemsize=None): # create the sized dtype if itemsize is None: - ensured = _ensure_object(data.ravel()) + ensured = ensure_object(data.ravel()) itemsize = libwriters.max_len_string_array(ensured) data = np.asarray(data, dtype="S%d" % itemsize) @@ -4688,7 +4688,7 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None, encoding = _ensure_encoding(encoding) if encoding is not None and len(data): - itemsize = libwriters.max_len_string_array(_ensure_object(data)) + itemsize = libwriters.max_len_string_array(ensure_object(data)) if compat.PY3: dtype = "U{0}".format(itemsize) else: diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 4ce2ed4e36139..efd5f337fdf69 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -28,7 +28,7 @@ zip, BytesIO) from pandas.core.arrays import Categorical from pandas.core.base import StringMixin -from pandas.core.dtypes.common import (is_categorical_dtype, _ensure_object, +from pandas.core.dtypes.common import (is_categorical_dtype, ensure_object, is_datetime64_dtype) from pandas.core.frame import DataFrame from pandas.core.series import Series @@ -1818,7 +1818,7 @@ def _dtype_to_stata_type(dtype, column): if dtype.type == np.object_: # try to coerce it to the biggest string # not memory efficient, what else could we # do? - itemsize = max_len_string_array(_ensure_object(column.values)) + itemsize = max_len_string_array(ensure_object(column.values)) return max(itemsize, 1) elif dtype == np.float64: return 255 @@ -1863,7 +1863,7 @@ def _dtype_to_default_stata_fmt(dtype, column, dta_version=114, if not (inferred_dtype in ('string', 'unicode') or len(column) == 0): raise ValueError('Writing general object arrays is not supported') - itemsize = max_len_string_array(_ensure_object(column.values)) + itemsize = max_len_string_array(ensure_object(column.values)) if itemsize > max_str_len: if dta_version >= 117: return '%9s' @@ -2418,7 +2418,7 @@ def _dtype_to_stata_type_117(dtype, column, force_strl): if dtype.type == np.object_: # try to coerce it to the biggest string # not memory efficient, what else could we # do? - itemsize = max_len_string_array(_ensure_object(column.values)) + itemsize = max_len_string_array(ensure_object(column.values)) itemsize = max(itemsize, 1) if itemsize <= 2045: return itemsize diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index f81767156b255..5f1f6dc5bca87 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -35,8 +35,8 @@ is_bool, is_scalar, is_scipy_sparse, - _ensure_int32, - _ensure_categorical) + ensure_int32, + ensure_categorical) from pandas.util import testing as tm import pandas.util._test_decorators as td @@ -1217,19 +1217,19 @@ def test_is_scipy_sparse(spmatrix): # noqa: F811 def test_ensure_int32(): values = np.arange(10, dtype=np.int32) - result = _ensure_int32(values) + result = ensure_int32(values) assert (result.dtype == np.int32) values = np.arange(10, dtype=np.int64) - result = _ensure_int32(values) + result = ensure_int32(values) assert (result.dtype == np.int32) def test_ensure_categorical(): values = np.arange(10, dtype=np.int32) - result = _ensure_categorical(values) + result = ensure_categorical(values) assert (result.dtype == 'category') values = Categorical(values) - result = _ensure_categorical(values) + result = ensure_categorical(values) tm.assert_categorical_equal(result, values) diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index 9a838341c7d8c..9dcc13c15736f 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -5,7 +5,7 @@ from numpy import nan import numpy as np -from pandas.core.dtypes.common import _ensure_int64 +from pandas.core.dtypes.common import ensure_int64 from pandas import Index, isna from pandas.core.groupby.ops import generate_bins_generic from pandas.util.testing import assert_almost_equal @@ -90,8 +90,8 @@ def _check(dtype): bins = np.array([6, 12, 20]) out = np.zeros((3, 4), dtype) counts = np.zeros(len(out), dtype=np.int64) - labels = _ensure_int64(np.repeat(np.arange(3), - np.diff(np.r_[0, bins]))) + labels = ensure_int64(np.repeat(np.arange(3), + np.diff(np.r_[0, bins]))) func = getattr(groupby, 'group_ohlc_%s' % dtype) func(out, counts, obj[:, None], labels) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 7fccf1f57a886..57b04bfd82528 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -7,7 +7,7 @@ from pandas.util import testing as tm from pandas import Series, DataFrame, Timestamp, MultiIndex, concat, date_range from pandas.core.dtypes.common import ( - _ensure_platform_int, is_timedelta64_dtype) + ensure_platform_int, is_timedelta64_dtype) from pandas.compat import StringIO from pandas._libs import groupby @@ -76,7 +76,7 @@ def test_transform_fast(): grp = df.groupby('id')['val'] values = np.repeat(grp.mean().values, - _ensure_platform_int(grp.count().values)) + ensure_platform_int(grp.count().values)) expected = pd.Series(values, index=df.index, name='val') result = grp.transform(np.mean) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 639e51e9361ab..7b105390db40b 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -21,7 +21,7 @@ DataFrame, Float64Index, Int64Index, UInt64Index, CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, RangeIndex, isna) -from pandas.core.index import _get_combined_index, _ensure_index_from_sequences +from pandas.core.index import _get_combined_index, ensure_index_from_sequences from pandas.util.testing import assert_almost_equal from pandas.compat.numpy import np_datetime64_compat @@ -2455,7 +2455,7 @@ class TestIndexUtils(object): names=['L1', 'L2'])), ]) def test_ensure_index_from_sequences(self, data, names, expected): - result = _ensure_index_from_sequences(data, names) + result = ensure_index_from_sequences(data, names) tm.assert_index_equal(result, expected)