diff --git a/asv_bench/benchmarks/indexing_engines.py b/asv_bench/benchmarks/indexing_engines.py
index 1e9283c7fb92b..f3d063ee31bc8 100644
--- a/asv_bench/benchmarks/indexing_engines.py
+++ b/asv_bench/benchmarks/indexing_engines.py
@@ -1,18 +1,30 @@
 import numpy as np
 
-from pandas._libs.index import (Int64Engine, UInt64Engine, Float64Engine,
-                                ObjectEngine)
+from pandas._libs import index as libindex
+
+
+def _get_numeric_engines():
+    engine_names = [
+        ('Int64Engine', np.int64), ('Int32Engine', np.int32),
+        ('Int16Engine', np.int16), ('Int8Engine', np.int8),
+        ('UInt64Engine', np.uint64), ('UInt32Engine', np.uint32),
+        ('UInt16engine', np.uint16), ('UInt8Engine', np.uint8),
+        ('Float64Engine', np.float64), ('Float32Engine', np.float32),
+    ]
+    return [(getattr(libindex, engine_name), dtype)
+            for engine_name, dtype in engine_names
+            if hasattr(libindex, engine_name)]
 
 
 class NumericEngineIndexing(object):
 
-    params = [[Int64Engine, UInt64Engine, Float64Engine],
-              [np.int64, np.uint64, np.float64],
+    params = [_get_numeric_engines(),
               ['monotonic_incr', 'monotonic_decr', 'non_monotonic'],
               ]
-    param_names = ['engine', 'dtype', 'index_type']
+    param_names = ['engine_and_dtype', 'index_type']
 
-    def setup(self, engine, dtype, index_type):
+    def setup(self, engine_and_dtype, index_type):
+        engine, dtype = engine_and_dtype
         N = 10**5
         values = list([1] * N + [2] * N + [3] * N)
         arr = {
@@ -26,7 +38,7 @@ def setup(self, engine, dtype, index_type):
         # code belows avoids populating the mapping etc. while timing.
         self.data.get_loc(2)
 
-    def time_get_loc(self, engine, dtype, index_type):
+    def time_get_loc(self, engine_and_dtype, index_type):
         self.data.get_loc(2)
 
 
@@ -44,7 +56,7 @@ def setup(self, index_type):
             'non_monotonic': np.array(list('abc') * N, dtype=object),
         }[index_type]
 
-        self.data = ObjectEngine(lambda: arr, len(arr))
+        self.data = libindex.ObjectEngine(lambda: arr, len(arr))
         # code belows avoids populating the mapping etc. while timing.
         self.data.get_loc('b')
 
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 5fa391c3433ea..a2c89e16d0ecd 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -948,9 +948,11 @@ Removal of prior version deprecations/changes
 Performance Improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-- Very large improvement in performance of slicing when the index is a :class:`CategoricalIndex`,
-  both when indexing by label (using .loc) and position(.iloc).
-  Likewise, slicing a ``CategoricalIndex`` itself (i.e. ``ci[100:200]``) shows similar speed improvements (:issue:`21659`)
+- Slicing Series and Dataframes with an monotonically increasing :class:`CategoricalIndex`
+  is now very fast and has speed comparable to slicing with an ``Int64Index``.
+  The speed increase is both when indexing by label (using .loc) and position(.iloc) (:issue:`20395`)
+  Slicing a monotonically increasing :class:`CategoricalIndex` itself (i.e. ``ci[1000:2000]``)
+  shows similar speed improvements as above (:issue:`21659`)
 - Improved performance of :func:`Series.describe` in case of numeric dtpyes (:issue:`21274`)
 - Improved performance of :func:`pandas.core.groupby.GroupBy.rank` when dealing with tied rankings (:issue:`21237`)
 - Improved performance of :func:`DataFrame.set_index` with columns consisting of :class:`Period` objects (:issue:`21582`, :issue:`21606`)
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index d2914dc8ac751..3ba4c2375b4e8 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -10,7 +10,8 @@ from libc.math cimport fabs, sqrt
 import numpy as np
 cimport numpy as cnp
 from numpy cimport (ndarray,
-                    NPY_INT64, NPY_UINT64, NPY_INT32, NPY_INT16, NPY_INT8,
+                    NPY_INT64, NPY_INT32, NPY_INT16, NPY_INT8,
+                    NPY_UINT64, NPY_UINT32, NPY_UINT16, NPY_UINT8,
                     NPY_FLOAT32, NPY_FLOAT64,
                     NPY_OBJECT,
                     int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
@@ -359,9 +360,13 @@ ctypedef fused algos_t:
     float64_t
     float32_t
     object
-    int32_t
     int64_t
+    int32_t
+    int16_t
+    int8_t
     uint64_t
+    uint32_t
+    uint16_t
     uint8_t
 
 
@@ -459,7 +464,12 @@ pad_float32 = pad["float32_t"]
 pad_object = pad["object"]
 pad_int64 = pad["int64_t"]
 pad_int32 = pad["int32_t"]
+pad_int16 = pad["int16_t"]
+pad_int8 = pad["int8_t"]
 pad_uint64 = pad["uint64_t"]
+pad_uint32 = pad["uint32_t"]
+pad_uint16 = pad["uint16_t"]
+pad_uint8 = pad["uint8_t"]
 pad_bool = pad["uint8_t"]
 
 
@@ -653,7 +663,12 @@ backfill_float32 = backfill["float32_t"]
 backfill_object = backfill["object"]
 backfill_int64 = backfill["int64_t"]
 backfill_int32 = backfill["int32_t"]
+backfill_int16 = backfill["int16_t"]
+backfill_int8 = backfill["int8_t"]
 backfill_uint64 = backfill["uint64_t"]
+backfill_uint32 = backfill["uint32_t"]
+backfill_uint16 = backfill["uint16_t"]
+backfill_uint8 = backfill["uint8_t"]
 backfill_bool = backfill["uint8_t"]
 
 
@@ -866,7 +881,12 @@ is_monotonic_float32 = is_monotonic["float32_t"]
 is_monotonic_object = is_monotonic["object"]
 is_monotonic_int64 = is_monotonic["int64_t"]
 is_monotonic_int32 = is_monotonic["int32_t"]
+is_monotonic_int16 = is_monotonic["int16_t"]
+is_monotonic_int8 = is_monotonic["int8_t"]
 is_monotonic_uint64 = is_monotonic["uint64_t"]
+is_monotonic_uint32 = is_monotonic["uint32_t"]
+is_monotonic_uint16 = is_monotonic["uint16_t"]
+is_monotonic_uint8 = is_monotonic["uint8_t"]
 is_monotonic_bool = is_monotonic["uint8_t"]
 
 
diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in
index b39b5eaced8fd..518664d70cf06 100644
--- a/pandas/_libs/algos_common_helper.pxi.in
+++ b/pandas/_libs/algos_common_helper.pxi.in
@@ -133,6 +133,9 @@ dtypes = [('float64', 'FLOAT64', 'float64'),
           ('int16', 'INT16', 'int16'),
           ('int32', 'INT32', 'int32'),
           ('int64', 'INT64', 'int64'),
+          ('uint8', 'UINT8', 'uint8'),
+          ('uint16', 'UINT16', 'uint16'),
+          ('uint32', 'UINT32', 'uint32'),
           ('uint64', 'UINT64', 'uint64'),
           # ('platform_int', 'INT', 'int_'),
           # ('object', 'OBJECT', 'object_'),
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index 3f76915655f58..d418ac63a4ac8 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -5,8 +5,10 @@ import cython
 
 import numpy as np
 cimport numpy as cnp
-from numpy cimport (ndarray, float64_t, int32_t,
-                    int64_t, uint8_t, uint64_t, intp_t,
+from numpy cimport (ndarray, intp_t,
+                    float64_t, float32_t,
+                    int64_t, int32_t, int16_t, int8_t,
+                    uint64_t, uint32_t, uint16_t, uint8_t,
                     # Note: NPY_DATETIME, NPY_TIMEDELTA are only available
                     # for cimport in cython>=0.27.3
                     NPY_DATETIME, NPY_TIMEDELTA)
diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in
index 4ea35da0626f3..c19812efaaa35 100644
--- a/pandas/_libs/index_class_helper.pxi.in
+++ b/pandas/_libs/index_class_helper.pxi.in
@@ -10,14 +10,22 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 
 {{py:
 
-# name, dtype, ctype
-dtypes = [('Float64', 'float64', 'float64_t'),
-          ('UInt64', 'uint64', 'uint64_t'),
-          ('Int64', 'int64', 'int64_t'),
-          ('Object', 'object', 'object')]
+# name, dtype, ctype, hashtable_name, hashtable_dtype
+dtypes = [('Float64', 'float64', 'float64_t', 'Float64', 'float64'),
+          ('Float32', 'float32', 'float32_t', 'Float64', 'float64'),
+          ('Int64', 'int64', 'int64_t', 'Int64', 'int64'),
+          ('Int32', 'int32', 'int32_t', 'Int64', 'int64'),
+          ('Int16', 'int16', 'int16_t', 'Int64', 'int64'),
+          ('Int8', 'int8', 'int8_t', 'Int64', 'int64'),
+          ('UInt64', 'uint64', 'uint64_t', 'UInt64', 'uint64'),
+          ('UInt32', 'uint32', 'uint32_t', 'UInt64', 'uint64'),
+          ('UInt16', 'uint16', 'uint16_t', 'UInt64', 'uint64'),
+          ('UInt8', 'uint8', 'uint8_t', 'UInt64', 'uint64'),
+          ('Object', 'object', 'object', 'PyObject', 'object'),
+          ]
 }}
 
-{{for name, dtype, ctype in dtypes}}
+{{for name, dtype, ctype, hashtable_name, hashtable_dtype in dtypes}}
 
 
 cdef class {{name}}Engine(IndexEngine):
@@ -34,13 +42,9 @@ cdef class {{name}}Engine(IndexEngine):
                                    other, limit=limit)
 
     cdef _make_hash_table(self, n):
-        {{if name == 'Object'}}
-        return _hash.PyObjectHashTable(n)
-        {{else}}
-        return _hash.{{name}}HashTable(n)
-        {{endif}}
+        return _hash.{{hashtable_name}}HashTable(n)
 
-    {{if name != 'Float64' and name != 'Object'}}
+    {{if name not in {'Float64', 'Float32', 'Object'} }}
     cdef _check_type(self, object val):
         hash(val)
         if util.is_bool_object(val):
@@ -50,6 +54,11 @@ cdef class {{name}}Engine(IndexEngine):
     {{endif}}
 
     {{if name != 'Object'}}
+    cpdef _call_map_locations(self, values):
+        # self.mapping is of type {{hashtable_name}}HashTable,
+        # so convert dtype of values
+        self.mapping.map_locations(algos.ensure_{{hashtable_dtype}}(values))
+
     cdef _get_index_values(self):
         return algos.ensure_{{dtype}}(self.vgetter())
 
@@ -60,7 +69,7 @@ cdef class {{name}}Engine(IndexEngine):
             ndarray[{{ctype}}] values
             int count = 0
 
-        {{if name != 'Float64'}}
+        {{if name not in {'Float64', 'Float32'} }}
         if not util.is_integer_object(val):
             raise KeyError(val)
         {{endif}}
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index e4250ae790553..278e395d65014 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -84,7 +84,17 @@ class CategoricalIndex(Index, accessor.PandasDelegate):
     """
 
     _typ = 'categoricalindex'
-    _engine_type = libindex.Int64Engine
+
+    @property
+    def _engine_type(self):
+        # self.codes can have dtype int8, int16, int32 or int64, so we need
+        # to return the corresponding engine type (libindex.Int8Engine, etc.).
+        return {np.int8: libindex.Int8Engine,
+                np.int16: libindex.Int16Engine,
+                np.int32: libindex.Int32Engine,
+                np.int64: libindex.Int64Engine,
+                }[self.codes.dtype.type]
+
     _attributes = ['name']
 
     def __new__(cls, data=None, categories=None, ordered=None, dtype=None,
@@ -382,7 +392,7 @@ def argsort(self, *args, **kwargs):
     def _engine(self):
 
         # we are going to look things up with the codes themselves
-        return self._engine_type(lambda: self.codes.astype('i8'), len(self))
+        return self._engine_type(lambda: self.codes, len(self))
 
     # introspection
     @cache_readonly
@@ -450,6 +460,7 @@ def get_loc(self, key, method=None):
         array([False,  True, False,  True], dtype=bool)
         """
         code = self.categories.get_loc(key)
+        code = self.codes.dtype.type(code)
         try:
             return self._engine.get_loc(code)
         except KeyError:
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index 99058f883a392..d89baa41d33fe 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -1,16 +1,16 @@
 # -*- coding: utf-8 -*-
 
 import pytest
+import numpy as np
 
 import pandas.util.testing as tm
 from pandas.core.indexes.api import Index, CategoricalIndex
 from pandas.core.dtypes.dtypes import CategoricalDtype
+from pandas._libs import index as libindex
 from .common import Base
 
 from pandas.compat import range, PY3
 
-import numpy as np
-
 from pandas import Categorical, IntervalIndex, compat
 from pandas.util.testing import assert_almost_equal
 import pandas.core.config as cf
@@ -1117,3 +1117,23 @@ def test_take_invalid_kwargs(self):
         msg = "the 'mode' parameter is not supported"
         tm.assert_raises_regex(ValueError, msg, idx.take,
                                indices, mode='clip')
+
+    @pytest.mark.parametrize('dtype, engine_type', [
+        (np.int8, libindex.Int8Engine),
+        (np.int16, libindex.Int16Engine),
+        (np.int32, libindex.Int32Engine),
+        (np.int64, libindex.Int64Engine),
+    ])
+    def test_engine_type(self, dtype, engine_type):
+        if dtype != np.int64:
+            # num. of uniques required to push CategoricalIndex.codes to a
+            # dtype (128 categories required for .codes dtype to be int16 etc.)
+            num_uniques = {np.int8: 1, np.int16: 128, np.int32: 32768}[dtype]
+            ci = pd.CategoricalIndex(range(num_uniques))
+        else:
+            # having 2**32 - 2**31 categories would be very memory-intensive,
+            # so we cheat a bit with the dtype
+            ci = pd.CategoricalIndex(range(32768))  # == 2**16 - 2**(16 - 1)
+            ci.values._codes = ci.values._codes.astype('int64')
+        assert np.issubdtype(ci.codes.dtype, dtype)
+        assert isinstance(ci._engine, engine_type)
diff --git a/pandas/tests/indexing/conftest.py b/pandas/tests/indexing/conftest.py
new file mode 100644
index 0000000000000..be1cf4800a2ef
--- /dev/null
+++ b/pandas/tests/indexing/conftest.py
@@ -0,0 +1,20 @@
+import numpy as np
+import pytest
+
+from pandas._libs import index as libindex
+
+
+@pytest.fixture(params=[
+    (libindex.Int64Engine, np.int64),
+    (libindex.Int32Engine, np.int32),
+    (libindex.Int16Engine, np.int16),
+    (libindex.Int8Engine, np.int8),
+    (libindex.UInt64Engine, np.uint64),
+    (libindex.UInt32Engine, np.uint32),
+    (libindex.UInt16Engine, np.uint16),
+    (libindex.UInt8Engine, np.uint8),
+    (libindex.Float64Engine, np.float64),
+    (libindex.Float32Engine, np.float32),
+], ids=lambda x: x[0].__name__)
+def numeric_indexing_engine_type_and_dtype(request):
+    return request.param
diff --git a/pandas/tests/indexing/test_indexing_engines.py b/pandas/tests/indexing/test_indexing_engines.py
new file mode 100644
index 0000000000000..410eba99948ce
--- /dev/null
+++ b/pandas/tests/indexing/test_indexing_engines.py
@@ -0,0 +1,168 @@
+import numpy as np
+
+import pandas.util.testing as tm
+from pandas import compat
+from pandas._libs import algos as libalgos, index as libindex
+
+
+class TestNumericEngine(object):
+    def test_is_monotonic(self, numeric_indexing_engine_type_and_dtype):
+        engine_type, dtype = numeric_indexing_engine_type_and_dtype
+        num = 1000
+        arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)
+
+        # monotonic increasing
+        engine = engine_type(lambda: arr, len(arr))
+        assert engine.is_monotonic_increasing is True
+        assert engine.is_monotonic_decreasing is False
+
+        # monotonic decreasing
+        engine = engine_type(lambda: arr[::-1], len(arr))
+        assert engine.is_monotonic_increasing is False
+        assert engine.is_monotonic_decreasing is True
+
+        # neither monotonic increasing or decreasing
+        arr = np.array([1] * num + [2] * num + [1] * num, dtype=dtype)
+        engine = engine_type(lambda: arr[::-1], len(arr))
+        assert engine.is_monotonic_increasing is False
+        assert engine.is_monotonic_decreasing is False
+
+    def test_is_unique(self, numeric_indexing_engine_type_and_dtype):
+        engine_type, dtype = numeric_indexing_engine_type_and_dtype
+
+        # unique
+        arr = np.array([1, 3, 2], dtype=dtype)
+        engine = engine_type(lambda: arr, len(arr))
+        assert engine.is_unique is True
+
+        # not unique
+        arr = np.array([1, 2, 1], dtype=dtype)
+        engine = engine_type(lambda: arr, len(arr))
+        assert engine.is_unique is False
+
+    def test_get_loc(self, numeric_indexing_engine_type_and_dtype):
+        engine_type, dtype = numeric_indexing_engine_type_and_dtype
+
+        # unique
+        arr = np.array([1, 2, 3], dtype=dtype)
+        engine = engine_type(lambda: arr, len(arr))
+        assert engine.get_loc(2) == 1
+
+        # monotonic
+        num = 1000
+        arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)
+        engine = engine_type(lambda: arr, len(arr))
+        assert engine.get_loc(2) == slice(1000, 2000)
+
+        # not monotonic
+        arr = np.array([1, 2, 3] * num, dtype=dtype)
+        engine = engine_type(lambda: arr, len(arr))
+        expected = np.array([False, True, False] * num, dtype=bool)
+        result = engine.get_loc(2)
+        assert (result == expected).all()
+
+    def test_get_backfill_indexer(
+            self, numeric_indexing_engine_type_and_dtype):
+        engine_type, dtype = numeric_indexing_engine_type_and_dtype
+
+        arr = np.array([1, 5, 10], dtype=dtype)
+        engine = engine_type(lambda: arr, len(arr))
+
+        new = np.array(compat.range(12), dtype=dtype)
+        result = engine.get_backfill_indexer(new)
+
+        expected = libalgos.backfill(arr, new)
+        tm.assert_numpy_array_equal(result, expected)
+
+    def test_get_pad_indexer(
+            self, numeric_indexing_engine_type_and_dtype):
+        engine_type, dtype = numeric_indexing_engine_type_and_dtype
+
+        arr = np.array([1, 5, 10], dtype=dtype)
+        engine = engine_type(lambda: arr, len(arr))
+
+        new = np.array(compat.range(12), dtype=dtype)
+        result = engine.get_pad_indexer(new)
+
+        expected = libalgos.pad(arr, new)
+        tm.assert_numpy_array_equal(result, expected)
+
+
+class TestObjectEngine(object):
+    engine_type = libindex.ObjectEngine
+    dtype = np.object_
+    values = list('abc')
+
+    def test_is_monotonic(self):
+
+        num = 1000
+        arr = np.array(['a'] * num + ['a'] * num + ['c'] * num,
+                       dtype=self.dtype)
+
+        # monotonic increasing
+        engine = self.engine_type(lambda: arr, len(arr))
+        assert engine.is_monotonic_increasing is True
+        assert engine.is_monotonic_decreasing is False
+
+        # monotonic decreasing
+        engine = self.engine_type(lambda: arr[::-1], len(arr))
+        assert engine.is_monotonic_increasing is False
+        assert engine.is_monotonic_decreasing is True
+
+        # neither monotonic increasing or decreasing
+        arr = np.array(['a'] * num + ['b'] * num + ['a'] * num,
+                       dtype=self.dtype)
+        engine = self.engine_type(lambda: arr[::-1], len(arr))
+        assert engine.is_monotonic_increasing is False
+        assert engine.is_monotonic_decreasing is False
+
+    def test_is_unique(self):
+        # unique
+        arr = np.array(self.values, dtype=self.dtype)
+        engine = self.engine_type(lambda: arr, len(arr))
+        assert engine.is_unique is True
+
+        # not unique
+        arr = np.array(['a', 'b', 'a'], dtype=self.dtype)
+        engine = self.engine_type(lambda: arr, len(arr))
+        assert engine.is_unique is False
+
+    def test_get_loc(self):
+        # unique
+        arr = np.array(self.values, dtype=self.dtype)
+        engine = self.engine_type(lambda: arr, len(arr))
+        assert engine.get_loc('b') == 1
+
+        # monotonic
+        num = 1000
+        arr = np.array(['a'] * num + ['b'] * num + ['c'] * num,
+                       dtype=self.dtype)
+        engine = self.engine_type(lambda: arr, len(arr))
+        assert engine.get_loc('b') == slice(1000, 2000)
+
+        # not monotonic
+        arr = np.array(self.values * num, dtype=self.dtype)
+        engine = self.engine_type(lambda: arr, len(arr))
+        expected = np.array([False, True, False] * num, dtype=bool)
+        result = engine.get_loc('b')
+        assert (result == expected).all()
+
+    def test_get_backfill_indexer(self):
+        arr = np.array(['a', 'e', 'j'], dtype=self.dtype)
+        engine = self.engine_type(lambda: arr, len(arr))
+
+        new = np.array(list('abcdefghij'), dtype=self.dtype)
+        result = engine.get_backfill_indexer(new)
+
+        expected = libalgos.backfill_object(arr, new)
+        tm.assert_numpy_array_equal(result, expected)
+
+    def test_get_pad_indexer(self):
+        arr = np.array(['a', 'e', 'j'], dtype=self.dtype)
+        engine = self.engine_type(lambda: arr, len(arr))
+
+        new = np.array(list('abcdefghij'), dtype=self.dtype)
+        result = engine.get_pad_indexer(new)
+
+        expected = libalgos.pad_object(arr, new)
+        tm.assert_numpy_array_equal(result, expected)