Skip to content

Commit

Permalink
add tests for engines and add is_monotonic_uint(32|16)
Browse files Browse the repository at this point in the history
  • Loading branch information
topper-123 committed Oct 23, 2018
1 parent 5e2ccc8 commit a519cc4
Show file tree
Hide file tree
Showing 6 changed files with 233 additions and 6 deletions.
9 changes: 5 additions & 4 deletions asv_bench/benchmarks/indexing_engines.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np

from pandas._libs import index as li
from pandas._libs import index as libindex


def _get_numeric_engines():
Expand All @@ -11,8 +11,9 @@ def _get_numeric_engines():
('UInt16engine', np.uint16), ('UInt8Engine', np.uint8),
('Float64Engine', np.float64), ('Float32Engine', np.float32),
]
return [(getattr(li, engine_name), dtype)
for engine_name, dtype in engine_names if hasattr(li, engine_name)]
return [(getattr(libindex, engine_name), dtype)
for engine_name, dtype in engine_names
if hasattr(libindex, engine_name)]


class NumericEngineIndexing(object):
Expand Down Expand Up @@ -55,7 +56,7 @@ def setup(self, index_type):
'non_monotonic': np.array(list('abc') * N, dtype=object),
}[index_type]

self.data = li.ObjectEngine(lambda: arr, len(arr))
self.data = libindex.ObjectEngine(lambda: arr, len(arr))
# code belows avoids populating the mapping etc. while timing.
self.data.get_loc('b')

Expand Down
15 changes: 15 additions & 0 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,8 @@ ctypedef fused algos_t:
int16_t
int8_t
uint64_t
uint32_t
uint16_t
uint8_t


Expand Down Expand Up @@ -462,7 +464,12 @@ pad_float32 = pad["float32_t"]
pad_object = pad["object"]
pad_int64 = pad["int64_t"]
pad_int32 = pad["int32_t"]
pad_int16 = pad["int16_t"]
pad_int8 = pad["int8_t"]
pad_uint64 = pad["uint64_t"]
pad_uint32 = pad["uint32_t"]
pad_uint16 = pad["uint16_t"]
pad_uint8 = pad["uint8_t"]
pad_bool = pad["uint8_t"]


Expand Down Expand Up @@ -656,7 +663,12 @@ backfill_float32 = backfill["float32_t"]
backfill_object = backfill["object"]
backfill_int64 = backfill["int64_t"]
backfill_int32 = backfill["int32_t"]
backfill_int16 = backfill["int16_t"]
backfill_int8 = backfill["int8_t"]
backfill_uint64 = backfill["uint64_t"]
backfill_uint32 = backfill["uint32_t"]
backfill_uint16 = backfill["uint16_t"]
backfill_uint8 = backfill["uint8_t"]
backfill_bool = backfill["uint8_t"]


Expand Down Expand Up @@ -872,6 +884,9 @@ is_monotonic_int32 = is_monotonic["int32_t"]
is_monotonic_int16 = is_monotonic["int16_t"]
is_monotonic_int8 = is_monotonic["int8_t"]
is_monotonic_uint64 = is_monotonic["uint64_t"]
is_monotonic_uint32 = is_monotonic["uint32_t"]
is_monotonic_uint16 = is_monotonic["uint16_t"]
is_monotonic_uint8 = is_monotonic["uint8_t"]
is_monotonic_bool = is_monotonic["uint8_t"]


Expand Down
3 changes: 3 additions & 0 deletions pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ dtypes = [('float64', 'FLOAT64', 'float64'),
('int16', 'INT16', 'int16'),
('int32', 'INT32', 'int32'),
('int64', 'INT64', 'int64'),
('uint8', 'UINT8', 'uint8'),
('uint16', 'UINT16', 'uint16'),
('uint32', 'UINT32', 'uint32'),
('uint64', 'UINT64', 'uint64'),
# ('platform_int', 'INT', 'int_'),
# ('object', 'OBJECT', 'object_'),
Expand Down
24 changes: 22 additions & 2 deletions pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
# -*- coding: utf-8 -*-

import pytest
import numpy as np

import pandas.util.testing as tm
from pandas.core.indexes.api import Index, CategoricalIndex
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas._libs import index as libindex
from .common import Base

from pandas.compat import range, PY3

import numpy as np

from pandas import Categorical, IntervalIndex, compat
from pandas.util.testing import assert_almost_equal
import pandas.core.config as cf
Expand Down Expand Up @@ -1117,3 +1117,23 @@ def test_take_invalid_kwargs(self):
msg = "the 'mode' parameter is not supported"
tm.assert_raises_regex(ValueError, msg, idx.take,
indices, mode='clip')

@pytest.mark.parametrize('dtype, engine_type', [
(np.int8, libindex.Int8Engine),
(np.int16, libindex.Int16Engine),
(np.int32, libindex.Int32Engine),
(np.int64, libindex.Int64Engine),
])
def test_engine_type(self, dtype, engine_type):
if dtype != np.int64:
# num. of uniques required to push CategoricalIndex.codes to a
# dtype (128 categories required for .codes dtype to be int16 etc.)
num_uniques = {np.int8: 1, np.int16: 128, np.int32: 32768}[dtype]
ci = pd.CategoricalIndex(range(num_uniques))
else:
# having 2**32 - 2**31 categories would be very memory-intensive,
# so we cheat a bit with the dtype
ci = pd.CategoricalIndex(range(32768)) # == 2**16 - 2**(16 - 1)
ci.values._codes = ci.values._codes.astype('int64')
assert np.issubdtype(ci.codes.dtype, dtype)
assert isinstance(ci._engine, engine_type)
20 changes: 20 additions & 0 deletions pandas/tests/indexing/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import numpy as np
import pytest

from pandas._libs import index as libindex


@pytest.fixture(params=[
(libindex.Int64Engine, np.int64),
(libindex.Int32Engine, np.int32),
(libindex.Int16Engine, np.int16),
(libindex.Int8Engine, np.int8),
(libindex.UInt64Engine, np.uint64),
(libindex.UInt32Engine, np.uint32),
(libindex.UInt16Engine, np.uint16),
(libindex.UInt8Engine, np.uint8),
(libindex.Float64Engine, np.float64),
(libindex.Float32Engine, np.float32),
], ids=lambda x: x[0].__name__)
def numeric_indexing_engine_type_and_dtype(request):
return request.param
168 changes: 168 additions & 0 deletions pandas/tests/indexing/test_indexing_engines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import numpy as np

import pandas.util.testing as tm
from pandas import compat
from pandas._libs import algos as libalgos, index as libindex


class TestNumericEngine(object):
def test_is_monotonic(self, numeric_indexing_engine_type_and_dtype):
engine_type, dtype = numeric_indexing_engine_type_and_dtype
num = 1000
arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)

# monotonic increasing
engine = engine_type(lambda: arr, len(arr))
assert engine.is_monotonic_increasing is True
assert engine.is_monotonic_decreasing is False

# monotonic decreasing
engine = engine_type(lambda: arr[::-1], len(arr))
assert engine.is_monotonic_increasing is False
assert engine.is_monotonic_decreasing is True

# neither monotonic increasing or decreasing
arr = np.array([1] * num + [2] * num + [1] * num, dtype=dtype)
engine = engine_type(lambda: arr[::-1], len(arr))
assert engine.is_monotonic_increasing is False
assert engine.is_monotonic_decreasing is False

def test_is_unique(self, numeric_indexing_engine_type_and_dtype):
engine_type, dtype = numeric_indexing_engine_type_and_dtype

# unique
arr = np.array([1, 3, 2], dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
assert engine.is_unique is True

# not unique
arr = np.array([1, 2, 1], dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
assert engine.is_unique is False

def test_get_loc(self, numeric_indexing_engine_type_and_dtype):
engine_type, dtype = numeric_indexing_engine_type_and_dtype

# unique
arr = np.array([1, 2, 3], dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
assert engine.get_loc(2) == 1

# monotonic
num = 1000
arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
assert engine.get_loc(2) == slice(1000, 2000)

# not monotonic
arr = np.array([1, 2, 3] * num, dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
expected = np.array([False, True, False] * num, dtype=bool)
result = engine.get_loc(2)
assert (result == expected).all()

def test_get_backfill_indexer(
self, numeric_indexing_engine_type_and_dtype):
engine_type, dtype = numeric_indexing_engine_type_and_dtype

arr = np.array([1, 5, 10], dtype=dtype)
engine = engine_type(lambda: arr, len(arr))

new = np.array(compat.range(12), dtype=dtype)
result = engine.get_backfill_indexer(new)

expected = libalgos.backfill(arr, new)
tm.assert_numpy_array_equal(result, expected)

def test_get_pad_indexer(
self, numeric_indexing_engine_type_and_dtype):
engine_type, dtype = numeric_indexing_engine_type_and_dtype

arr = np.array([1, 5, 10], dtype=dtype)
engine = engine_type(lambda: arr, len(arr))

new = np.array(compat.range(12), dtype=dtype)
result = engine.get_pad_indexer(new)

expected = libalgos.pad(arr, new)
tm.assert_numpy_array_equal(result, expected)


class TestObjectEngine(object):
engine_type = libindex.ObjectEngine
dtype = np.object_
values = list('abc')

def test_is_monotonic(self):

num = 1000
arr = np.array(['a'] * num + ['a'] * num + ['c'] * num,
dtype=self.dtype)

# monotonic increasing
engine = self.engine_type(lambda: arr, len(arr))
assert engine.is_monotonic_increasing is True
assert engine.is_monotonic_decreasing is False

# monotonic decreasing
engine = self.engine_type(lambda: arr[::-1], len(arr))
assert engine.is_monotonic_increasing is False
assert engine.is_monotonic_decreasing is True

# neither monotonic increasing or decreasing
arr = np.array(['a'] * num + ['b'] * num + ['a'] * num,
dtype=self.dtype)
engine = self.engine_type(lambda: arr[::-1], len(arr))
assert engine.is_monotonic_increasing is False
assert engine.is_monotonic_decreasing is False

def test_is_unique(self):
# unique
arr = np.array(self.values, dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
assert engine.is_unique is True

# not unique
arr = np.array(['a', 'b', 'a'], dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
assert engine.is_unique is False

def test_get_loc(self):
# unique
arr = np.array(self.values, dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
assert engine.get_loc('b') == 1

# monotonic
num = 1000
arr = np.array(['a'] * num + ['b'] * num + ['c'] * num,
dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
assert engine.get_loc('b') == slice(1000, 2000)

# not monotonic
arr = np.array(self.values * num, dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
expected = np.array([False, True, False] * num, dtype=bool)
result = engine.get_loc('b')
assert (result == expected).all()

def test_get_backfill_indexer(self):
arr = np.array(['a', 'e', 'j'], dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))

new = np.array(list('abcdefghij'), dtype=self.dtype)
result = engine.get_backfill_indexer(new)

expected = libalgos.backfill_object(arr, new)
tm.assert_numpy_array_equal(result, expected)

def test_get_pad_indexer(self):
arr = np.array(['a', 'e', 'j'], dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))

new = np.array(list('abcdefghij'), dtype=self.dtype)
result = engine.get_pad_indexer(new)

expected = libalgos.pad_object(arr, new)
tm.assert_numpy_array_equal(result, expected)

0 comments on commit a519cc4

Please sign in to comment.