diff --git a/PROTO_tests/tests/testing/asserters_test.py b/PROTO_tests/tests/testing/asserters_test.py new file mode 100644 index 0000000000..84d6e86a2e --- /dev/null +++ b/PROTO_tests/tests/testing/asserters_test.py @@ -0,0 +1,607 @@ +import pytest + +import arkouda as ak +from arkouda import Categorical, DataFrame, Index, MultiIndex, Series, cast +from arkouda.testing import ( + assert_almost_equal, + assert_arkouda_array_equal, + assert_arkouda_segarray_equal, + assert_arkouda_strings_equal, + assert_attr_equal, + assert_categorical_equal, + assert_class_equal, + assert_contains_all, + assert_copy, + assert_dict_equal, + assert_equal, + assert_frame_equal, + assert_index_equal, + assert_is_sorted, + assert_series_equal, +) + + +class TestDataFrame: + @staticmethod + def build_index(self) -> Index: + idx = ak.Index(ak.arange(5), name="test1") + return idx + + @staticmethod + def build_multi_index(self) -> MultiIndex: + midx = ak.MultiIndex([ak.arange(5), -1 * ak.arange(5)], names=["test1", "test2"]) + return midx + + @staticmethod + def build_ak_df(self, index_dtype="int64", index_name=None) -> DataFrame: + username = ak.array( + [ + "Alice", + "Alice", + "Alice", + "Bob", + "Bob", + "Carol", + ] + ) + userid = ak.array([111, 222, 111, 333, 222, 111]) + item = ak.array([0, 0, 1, 1, 2, 0]) + day = ak.array([5, 5, 6, 5, 6, 6]) + amount = ak.array([0.5, 0.6, 1.1, 1.2, 4.3, 0.6]) + bi = ak.arange(2**200, 2**200 + 6) + return ak.DataFrame( + { + "userName": username, + "userID": userid, + "item": item, + "day": day, + "amount": amount, + "bi": bi, + }, + index=Index(ak.arange(6, dtype=index_dtype), name=index_name), + ) + + def test_assert_almost_equal(self): + size = 10 + + rng = ak.random.default_rng() + atol = 0.001 + rtol = 0.001 + a = ak.arange(size, dtype="float64") + a2 = a + rtol * a + atol * rng.random() + a3 = a + rtol + atol + + assert_almost_equal(a, a2, atol=atol, rtol=rtol) + with pytest.raises(AssertionError): + assert_almost_equal(a, a3, atol=atol, rtol=rtol) + + idx = Index(a) + idx2 = Index(a2) + idx3 = Index(a3) + + assert_almost_equal(idx, idx2, atol=atol, rtol=rtol) + with pytest.raises(AssertionError): + assert_almost_equal(idx, idx3, atol=atol, rtol=rtol) + + s = Series(a) + s2 = Series(a2) + s3 = Series(a3) + + assert_almost_equal(s, s2, atol=atol, rtol=rtol) + with pytest.raises(AssertionError): + assert_almost_equal(s, s3, atol=atol, rtol=rtol) + + df = DataFrame({"col1": a}, index=idx) + df2 = DataFrame({"col1": a2}, index=idx2) + df3 = DataFrame({"col1": a3}, index=idx3) + + assert_almost_equal(df, df2, atol=atol, rtol=rtol) + with pytest.raises(AssertionError): + assert_almost_equal(df, df3, atol=atol, rtol=rtol) + + assert_almost_equal(True, True, atol=atol, rtol=rtol) + with pytest.raises(AssertionError): + assert_almost_equal(True, False, atol=atol, rtol=rtol) + + assert_almost_equal(1.0, 1.0, atol=atol, rtol=rtol) + with pytest.raises(AssertionError): + assert_almost_equal(1.0, 1.5, atol=atol, rtol=rtol) + + def test_assert_index_equal(self): + size = 10 + + # exact + i1 = Index(ak.arange(size, dtype="float64")) + i2 = Index(ak.arange(size, dtype="int64")) + assert_index_equal(i1, i2, exact=False) + with pytest.raises(AssertionError): + assert_index_equal(i1, i2, exact=True) + + # check_names + i3 = Index(ak.arange(size), name="name1") + i4 = Index(ak.arange(size), name="name1") + i5 = Index(ak.arange(size), name="name2") + + assert_index_equal(i3, i4, check_names=True) + assert_index_equal(i3, i5, check_names=False) + with pytest.raises(AssertionError): + assert_index_equal(i3, i5, check_names=True) + + def test_assert_index_equal_categorical(self): + # check_categorical + # check_order + i1 = Index(Categorical(ak.array(["a", "a", "b"]))) + i3 = Index(Categorical(ak.array(["a", "b", "a"]))) + i4 = Index(Categorical(ak.array(["a", "b", "c"]))) + i5 = Index(Categorical(ak.array(["a", "a", "b"])).sort_values()) + + assert_index_equal(i1, i1) + assert_index_equal(i1, i3, check_order=False) + with pytest.raises(AssertionError): + assert_index_equal(i1, i3, check_order=True) + with pytest.raises(AssertionError): + assert_index_equal(i1, i3, check_categorical=False) + with pytest.raises(AssertionError): + assert_index_equal(i1, i4, check_categorical=False) + assert_index_equal(i1, i5, check_order=True, check_categorical=True) + + def test_assert_index_equal_check_exact(self): + size = 10 + + # check_exact + i1 = Index(ak.arange(size, dtype="float64")) + i2 = Index(ak.arange(size) + 1e-9) + assert_index_equal(i1, i2, check_exact=False) + with pytest.raises(AssertionError): + assert_index_equal(i1, i2, check_exact=True) + + # rtol + # atol + i3_float = Index(ak.arange(size, dtype="float64")) + + rng = ak.random.default_rng() + atol = 0.001 + rtol = 0.001 + + i3_atol = Index(ak.arange(size) + atol * rng.random()) + assert_index_equal(i3_float, i3_atol, check_exact=False, atol=atol) + + i3_atol_rtol = Index(ak.arange(size) + rtol * ak.arange(size) + atol * rng.random()) + assert_index_equal(i3_float, i3_atol_rtol, check_exact=False, atol=atol, rtol=rtol) + + i3_2rtol = Index(ak.arange(size) + ak.arange(size) * 2 * rtol) + with pytest.raises(AssertionError): + assert_index_equal(i3_float, i3_2rtol, check_exact=False, rtol=rtol) + + i3_2atol = Index(ak.arange(size) + 2 * atol) + with pytest.raises(AssertionError): + assert_index_equal(i3_float, i3_2atol, check_exact=False, atol=atol) + + def test_assert_index_equal_multiindex(self): + m1 = self.build_multi_index(self) + m2 = self.build_multi_index(self) + + assert_index_equal(m1, m2) + + def test_assert_attr_equal_index(self): + idx = self.build_index(self) + idx2 = self.build_index(self) + + assert_attr_equal("name", idx, idx2, obj="Index") + assert_attr_equal("names", idx, idx2, obj="Index") + assert_attr_equal("max_list_size", idx, idx2, obj="Index") + + idx2.name = "test2" + with pytest.raises(AssertionError): + assert_attr_equal("name", idx, idx2, obj="Index") + with pytest.raises(AssertionError): + assert_attr_equal("names", idx, idx2, obj="Index") + + def test_assert_attr_equal_multiindex(self): + idx = self.build_index(self) + midx = self.build_multi_index(self) + midx2 = self.build_multi_index(self) + + assert_attr_equal("names", midx, midx2, obj="MultiIndex") + + midx3 = ak.MultiIndex([ak.arange(5), -1 * ak.arange(5)], names=["test1", "test3"]) + with pytest.raises(AssertionError): + assert_attr_equal("names", midx, midx3, obj="Index") + with pytest.raises(AssertionError): + assert_attr_equal("names", idx, midx, obj="Index") + + assert_attr_equal("nlevels", midx, midx2, obj="MultiIndex") + + def test_assert_class_equal(self): + idx = self.build_index(self) + midx = self.build_multi_index(self) + midx2 = self.build_multi_index(self) + df = self.build_ak_df(self) + s = ak.Series(-1 * ak.arange(5), index=ak.arange(5)) + + assert_class_equal(idx, idx) + assert_class_equal(midx, midx2) + assert_class_equal(s, s) + assert_class_equal(df, df) + with pytest.raises(AssertionError): + assert_class_equal(midx, idx) + with pytest.raises(AssertionError): + assert_class_equal(s, idx) + with pytest.raises(AssertionError): + assert_class_equal(df, s) + + def test_assert_arkouda_strings_equal(self): + a = ak.array(["a", "a", "b", "c"]) + a2 = ak.array(["a", "d", "b", "c"]) + a3 = ak.array(["a", "a", "b", "c", "d"]) + + assert_arkouda_strings_equal(a, a) + assert_arkouda_strings_equal(a, a, index_values=ak.arange(4)) + with pytest.raises(AssertionError): + assert_arkouda_strings_equal(a, a2) + with pytest.raises(AssertionError): + assert_arkouda_strings_equal(a, a3) + + # check_same + a_copy = a[:] + assert_arkouda_strings_equal(a, a_copy) + + assert_arkouda_strings_equal(a, a, check_same="same") + with pytest.raises(AssertionError): + assert_arkouda_strings_equal(a, a, check_same="copy") + + assert_arkouda_strings_equal(a, a_copy, check_same="copy") + with pytest.raises(AssertionError): + assert_arkouda_strings_equal(a, a_copy, check_same="same") + + def test_assert_dict_equal(self): + size = 10 + dict1 = {"a": ak.arange(size), "b": -1 * ak.arange(size)} + dict2 = {"a": ak.arange(size), "b": -1 * ak.arange(size)} + dict3 = {"a": ak.arange(size), "c": -2 * ak.arange(size)} + dict4 = {"a": ak.arange(size), "b": -1 * ak.arange(size), "c": -2 * ak.arange(size)} + dict5 = {"a": ak.arange(size), "b": -2 * ak.arange(size)} + + assert_dict_equal(dict1, dict2) + + for d in [dict3, dict4, dict5]: + with pytest.raises(AssertionError): + assert_dict_equal(dict1, d) + + def test_assert_is_sorted(self): + size = 10 + a = ak.arange(size) + b = -1 * a + c = ak.array([1, 2, 5, 4, 3]) + + assert_is_sorted(a) + with pytest.raises(AssertionError): + assert_is_sorted(b) + with pytest.raises(AssertionError): + assert_is_sorted(c) + + idx_a = Index(a) + idx_b = Index(b) + idx_c = Index(c) + + assert_is_sorted(idx_a) + with pytest.raises(AssertionError): + assert_is_sorted(idx_b) + with pytest.raises(AssertionError): + assert_is_sorted(idx_c) + + series_a = Series(a) + series_b = Series(b) + series_c = Series(c) + + assert_is_sorted(series_a) + with pytest.raises(AssertionError): + assert_is_sorted(series_b) + with pytest.raises(AssertionError): + assert_is_sorted(series_c) + + def test_assert_categorical_equal(self): + c1 = Categorical( + ak.array( + [ + "Alice", + "Alice", + "Alice", + "Bob", + "Bob", + "Carol", + ] + ) + ) + c2 = Categorical(ak.array(["Alice", "Bob", "Alice", "Carol", "Bob", "Alice"])).sort_values() + assert_categorical_equal(c1, c2, check_category_order=False) + with pytest.raises(AssertionError): + assert_categorical_equal(c1, c2, check_category_order=True) + + def test_assert_series_equal_check_names(self): + s = Series(ak.array(["a", "b", "c"]), index=Index(ak.arange(3)), name="test") + assert_series_equal(s, s) + + # check_names + s_diff_name = Series(ak.array(["a", "b", "c"]), index=Index(ak.arange(3)), name="different_name") + assert_series_equal(s, s_diff_name, check_names=False) + with pytest.raises(AssertionError): + assert_series_equal(s, s_diff_name, check_names=True) + + def test_assert_series_equal(self): + s = Series(ak.array([1, 0, 2]), index=Index(ak.arange(3))) + s_float = Series(ak.array([1.0, 0.0, 2.0]), index=Index(ak.arange(3) * 1.0)) + + assert_series_equal(s, s) + assert_series_equal(s_float, s_float) + + # check_dtype + assert_series_equal(s, s_float, check_dtype=False, check_index_type=False) + with pytest.raises(AssertionError): + assert_series_equal(s, s_float, check_dtype=False, check_index_type=True) + with pytest.raises(AssertionError): + assert_series_equal(s, s_float, check_dtype=True, check_index_type=False) + + # check_index + s_diff_index = Series(ak.array([1, 0, 2]), index=Index(ak.arange(3) * 2.0)) + assert_series_equal(s, s_diff_index, check_index=False) + with pytest.raises(AssertionError): + assert_series_equal(s, s_diff_index, check_index=True) + + rng = ak.random.default_rng() + atol = 0.001 + rtol = 0.001 + s_atol = Series( + ak.array([1, 0, 2]) + rng.random() * atol, index=Index(ak.arange(3) + rng.random() * atol) + ) + + diff_rtol_atol = rtol * ak.array([1, 0, 2]) + rng.random() * atol + d2 = rtol * ak.arange(3) + rng.random() * atol + + s_rtol_atol = Series( + ak.array([1, 0, 2]) + diff_rtol_atol, + index=Index(ak.arange(3) + d2), + ) + + s_2rtol = Series( + ak.array([1, 0, 2]) + ak.array([1, 0, 2]) * 2 * rtol, + index=Index(ak.arange(3) + ak.array([1, 0, 2]) * 2 * rtol), + ) + + s_2atol = Series( + ak.array([1, 0, 2]) + 2 * atol, + index=Index(ak.arange(3) + 2 * atol), + ) + + assert_series_equal(s_float, s_atol, check_exact=False, atol=atol) + assert_series_equal(s_float, s_rtol_atol, check_exact=False, atol=atol, rtol=rtol) + with pytest.raises(AssertionError): + assert_series_equal(s_float, s_2rtol, check_exact=False, rtol=rtol) + with pytest.raises(AssertionError): + assert_series_equal(s_float, s_2atol, check_exact=False, atol=atol) + + def test_assert_series_equal_check_like(self): + # check_like + s_unordered_index = Series(ak.array([1, 0, 2]), index=Index(ak.array([0, 2, 1]))) + s_ordered_index = s_unordered_index.sort_index() + assert_series_equal(s_ordered_index, s_unordered_index, check_like=True) + with pytest.raises(AssertionError): + assert_series_equal(s_ordered_index, s_unordered_index, check_like=False) + + def test_assert_series_equal_categorical(self): + # check_categorical + # check_category_order + + s3a = Series( + Categorical(ak.array(["a", "b", "c"])), + index=Index(Categorical(ak.array(["a", "a", "b"]))), + name="test", + ) + s3b = Series( + Categorical(ak.array(["a", "b", "c"])).sort_values(), + index=Index(Categorical(ak.array(["a", "a", "b"]))), + name="test", + ) + assert_series_equal(s3a, s3a) + with pytest.raises(AssertionError): + assert_series_equal(s3a, s3b, check_categorical=True, check_category_order=True) + assert_series_equal(s3a, s3b, check_categorical=True, check_category_order=False) + + def test_assert_frame_equal(self): + df = self.build_ak_df(self) + df2 = self.build_ak_df(self) + assert_frame_equal(df, df2) + + def test_assert_frame_equal_segarray(self): + akdf = ak.DataFrame({"rand": ak.SegArray(ak.array([0, 3, 9]), ak.arange(10))}) + assert_frame_equal(akdf, akdf) + + def test_assert_frame_equal_check_dtype(self): + df = self.build_ak_df(self) + + # check_dtype + df_cpy = df.copy(deep=True) + assert_frame_equal(df, df_cpy, check_dtype=True) + df_cpy["day"] = cast(df_cpy["day"], dt="float64") + assert_frame_equal(df_cpy, df_cpy, check_dtype=True) + assert_frame_equal(df, df_cpy, check_dtype=False) + with pytest.raises(AssertionError): + assert_frame_equal(df, df_cpy, check_dtype=True) + + def test_assert_frame_equal_check_index_type(self): + df = self.build_ak_df(self) + + # check_index_type + df_float_index = self.build_ak_df(self, index_dtype="float64") + assert_frame_equal(df, df_float_index, check_index_type=False) + with pytest.raises(AssertionError): + assert_frame_equal(df, df_float_index, check_index_type=True) + + def test_assert_frame_equal_check_names(self): + # check_names + df_name1 = self.build_ak_df(self, index_name="name1") + df_name2 = self.build_ak_df(self, index_name="name2") + assert_frame_equal(df_name1, df_name2, check_names=False) + with pytest.raises(AssertionError): + assert_frame_equal(df_name1, df_name2, check_names=True) + + def test_assert_frame_equal_check_like(self): + df = self.build_ak_df(self) + + # check_like + df_sorted = df.sort_values("amount") + assert_frame_equal(df, df_sorted, check_like=True) + with pytest.raises(AssertionError): + assert_frame_equal(df, df_sorted, check_like=False) + + df_new_col_order = df[["bi", "userID", "day", "item", "amount", "userName"]] + assert_frame_equal(df, df_new_col_order, check_like=True) + with pytest.raises(AssertionError): + assert_frame_equal(df, df_new_col_order, check_column_type=True) + + def test_assert_frame_equal_check_categorical(self): + # check_categorical + df = self.build_ak_df(self) + df["userName"] = Categorical(df["userName"]) + df_ordered = self.build_ak_df(self) + df_ordered["userName"] = Categorical(df_ordered["userName"]).sort_values() + + assert_frame_equal(df, df_ordered, check_categorical=False) + with pytest.raises(AssertionError): + assert_frame_equal(df, df_ordered, check_categorical=True) + + def test_assert_frame_equal_check_exact(self): + # check_exact + # rtol + # atol + rng = ak.random.default_rng() + atol = 0.001 + rtol = 0.001 + + df = self.build_ak_df(self) + df_rtol_atol = self.build_ak_df(self) + df_rtol_atol["amount"] = ( + df_rtol_atol["amount"] + rtol * df_rtol_atol["amount"] + rng.random() * atol + ) + + assert_frame_equal(df, df_rtol_atol, check_exact=False, atol=atol, rtol=rtol) + + with pytest.raises(AssertionError): + assert_frame_equal(df, df_rtol_atol, check_exact=True) + with pytest.raises(AssertionError): + assert_frame_equal(df, df_rtol_atol, check_exact=False, rtol=rtol) + with pytest.raises(AssertionError): + assert_frame_equal(df, df_rtol_atol, check_exact=False, atol=atol) + + def test_assert_equal(self): + size = 10 + a = ak.arange(size) + a2 = a + 1 + idx = Index(a) + idx2 = Index(a2) + s = Series(a) + s2 = Series(a2) + df = DataFrame({"col": a}, index=idx) + df2 = DataFrame({"col": a2}, index=idx2) + + assert_equal(a, a) + with pytest.raises(AssertionError): + assert_equal(a, a2) + + assert_equal(idx, idx) + with pytest.raises(AssertionError): + assert_equal(idx, idx2) + + assert_equal(s, s) + with pytest.raises(AssertionError): + assert_equal(s, s2) + + assert_equal(df, df) + with pytest.raises(AssertionError): + assert_equal(df, df2) + + st = "string1" + st2 = "string2" + assert_equal(st, st) + with pytest.raises(AssertionError): + assert_equal(st, st2) + + n = 1.0 + n2 = 1.5 + assert_equal(n, n) + with pytest.raises(AssertionError): + assert_equal(n, n2) + + def test_assert_contains_all(self): + d = {"a": 1, "b": 2, "c": 3} + + assert_contains_all([], d) + assert_contains_all(["a", "b"], d) + with pytest.raises(AssertionError): + assert_contains_all(["a", "d"], d) + + def test_assert_copy(self): + arrays = [ak.arange(10), ak.arange(10)] + + with pytest.raises(AssertionError): + assert_copy(arrays, arrays) + + arrays2 = [arry for arry in arrays] + with pytest.raises(AssertionError): + assert_copy(arrays, arrays2) + + arrays3 = [ak.arange(10), ak.arange(10)] + assert_copy(arrays, arrays3) + + def test_assert_arkouda_array_equal(self): + size = 10 + a = ak.arange(size) + a2 = a + 1 + assert_arkouda_array_equal(a, a) + with pytest.raises(AssertionError): + assert_arkouda_array_equal(a, a2) + + s = ak.array(["a", "b", "b"]) + s2 = ak.array(["a", "b", "c"]) + assert_arkouda_array_equal(s, s) + with pytest.raises(AssertionError): + assert_arkouda_array_equal(s, s2) + + c = Categorical(s) + c2 = Categorical(s2) + assert_arkouda_array_equal(c, c) + with pytest.raises(AssertionError): + assert_arkouda_array_equal(c, c2) + + with pytest.raises(AssertionError): + assert_arkouda_array_equal(a, s) + + with pytest.raises(AssertionError): + assert_arkouda_array_equal(s, c) + + def test_assert_arkouda_segarray_equal(self): + seg = ak.SegArray(ak.array([0, 3, 9]), ak.arange(10)) + seg_cpy = ak.SegArray(ak.array([0, 3, 9]), ak.arange(10)) + seg_float = ak.SegArray(ak.array([0, 3, 9]), ak.arange(10, dtype="float64")) + + assert_arkouda_segarray_equal(seg, seg) + assert_arkouda_segarray_equal(seg, seg, check_same="same") + with pytest.raises(AssertionError): + assert_arkouda_segarray_equal(seg, seg, check_same="copy") + + assert_arkouda_segarray_equal(seg, seg_cpy) + assert_arkouda_segarray_equal(seg, seg_cpy, check_same="copy") + with pytest.raises(AssertionError): + assert_arkouda_segarray_equal(seg, seg_cpy, check_same="same") + + assert_arkouda_segarray_equal(seg, seg_float, check_dtype=False) + with pytest.raises(AssertionError): + assert_arkouda_segarray_equal(seg, seg_float, check_dtype=True) + + def test_assert_arkouda_array_equal_bigint(self): + size = 10 + a = ak.arange(size, dtype=ak.bigint) + (2**64 - size - 1) + a2 = a + 1 + assert_arkouda_array_equal(a, a) + with pytest.raises(AssertionError): + assert_arkouda_array_equal(a, a2) diff --git a/arkouda/__init__.py b/arkouda/__init__.py index d6b8b50cf4..679951de16 100644 --- a/arkouda/__init__.py +++ b/arkouda/__init__.py @@ -43,3 +43,4 @@ from arkouda.scipy.special import * from arkouda.scipy import * from arkouda.random import * +from arkouda.testing import * diff --git a/arkouda/dataframe.py b/arkouda/dataframe.py index 267d35c7cd..52e366d024 100644 --- a/arkouda/dataframe.py +++ b/arkouda/dataframe.py @@ -2922,19 +2922,27 @@ def _to_hdf_snapshot(self, path, dataset="DataFrame", mode="truncate", file_type from arkouda.io import _file_type_to_int, _mode_str_to_int column_data = [ - obj.name - if not isinstance(obj, (Categorical_, SegArray)) - else json.dumps( - { - "codes": obj.codes.name, - "categories": obj.categories.name, - "NA_codes": obj._akNAcode.name, - **({"permutation": obj.permutation.name} if obj.permutation is not None else {}), - **({"segments": obj.segments.name} if obj.segments is not None else {}), - } + ( + obj.name + if not isinstance(obj, (Categorical_, SegArray)) + else ( + json.dumps( + { + "codes": obj.codes.name, + "categories": obj.categories.name, + "NA_codes": obj._akNAcode.name, + **( + {"permutation": obj.permutation.name} + if obj.permutation is not None + else {} + ), + **({"segments": obj.segments.name} if obj.segments is not None else {}), + } + ) + if isinstance(obj, Categorical_) + else json.dumps({"segments": obj.segments.name, "values": obj.values.name}) + ) ) - if isinstance(obj, Categorical_) - else json.dumps({"segments": obj.segments.name, "values": obj.values.name}) for k, obj in self.items() ] dtypes = [ @@ -3900,7 +3908,7 @@ def copy(self, deep=True): """ - if deep: + if deep is True: res = DataFrame() res._size = self._nrows res._bytes = self._bytes @@ -5015,26 +5023,36 @@ def register(self, user_defined_name: str) -> DataFrame: if self.registered_name is not None and self.is_registered(): raise RegistrationError(f"This object is already registered as {self.registered_name}") column_data = [ - obj.name - if not isinstance(obj, (Categorical_, SegArray, BitVector)) - else json.dumps( - { - "codes": obj.codes.name, - "categories": obj.categories.name, - "NA_codes": obj._akNAcode.name, - **({"permutation": obj.permutation.name} if obj.permutation is not None else {}), - **({"segments": obj.segments.name} if obj.segments is not None else {}), - } - ) - if isinstance(obj, Categorical_) - else json.dumps({"segments": obj.segments.name, "values": obj.values.name}) - if isinstance(obj, SegArray) - else json.dumps( - { - "name": obj.name, - "width": obj.width, - "reverse": obj.reverse, - } # BitVector Case + ( + obj.name + if not isinstance(obj, (Categorical_, SegArray, BitVector)) + else ( + json.dumps( + { + "codes": obj.codes.name, + "categories": obj.categories.name, + "NA_codes": obj._akNAcode.name, + **( + {"permutation": obj.permutation.name} + if obj.permutation is not None + else {} + ), + **({"segments": obj.segments.name} if obj.segments is not None else {}), + } + ) + if isinstance(obj, Categorical_) + else ( + json.dumps({"segments": obj.segments.name, "values": obj.values.name}) + if isinstance(obj, SegArray) + else json.dumps( + { + "name": obj.name, + "width": obj.width, + "reverse": obj.reverse, + } # BitVector Case + ) + ) + ) ) for obj in self.values() ] diff --git a/arkouda/index.py b/arkouda/index.py index 26d50c1db5..028ba64d1f 100644 --- a/arkouda/index.py +++ b/arkouda/index.py @@ -1110,6 +1110,7 @@ def index(self): def nlevels(self) -> int: """ Integer number of levels in this MultiIndex. + See Also -------- Index.nlevels @@ -1131,6 +1132,13 @@ def ndim(self): def inferred_type(self) -> str: return "mixed" + @property + def dtype(self) -> npdtype: + """ + Return the dtype object of the underlying data. + """ + return npdtype("O") + def get_level_values(self, level: Union[str, int]): if isinstance(level, str): if self.names is None: @@ -1153,13 +1161,6 @@ def get_level_values(self, level: Union[str, int]): "an integer with absolute value less than the number of levels." ) - @property - def dtype(self) -> npdtype: - """ - Return the dtype object of the underlying data. - """ - return npdtype("O") - def equal_levels(self, other: MultiIndex) -> builtins.bool: """ Return True if the levels of both MultiIndex objects are the same diff --git a/arkouda/testing/__init__.py b/arkouda/testing/__init__.py new file mode 100644 index 0000000000..af956e108f --- /dev/null +++ b/arkouda/testing/__init__.py @@ -0,0 +1,37 @@ +from ._asserters import ( + assert_almost_equal, + assert_arkouda_array_equal, + assert_arkouda_pdarray_equal, + assert_arkouda_segarray_equal, + assert_arkouda_strings_equal, + assert_attr_equal, + assert_categorical_equal, + assert_class_equal, + assert_contains_all, + assert_copy, + assert_dict_equal, + assert_equal, + assert_frame_equal, + assert_index_equal, + assert_is_sorted, + assert_series_equal, +) + +__all__ = [ + "assert_almost_equal", + "assert_arkouda_array_equal", + "assert_arkouda_pdarray_equal", + "assert_arkouda_segarray_equal", + "assert_arkouda_strings_equal", + "assert_attr_equal", + "assert_categorical_equal", + "assert_class_equal", + "assert_contains_all", + "assert_copy", + "assert_dict_equal", + "assert_equal", + "assert_frame_equal", + "assert_index_equal", + "assert_is_sorted", + "assert_series_equal", +] diff --git a/arkouda/testing/_asserters.py b/arkouda/testing/_asserters.py new file mode 100644 index 0000000000..64ae92f166 --- /dev/null +++ b/arkouda/testing/_asserters.py @@ -0,0 +1,1142 @@ +from __future__ import annotations + +from typing import NoReturn, cast + +import numpy as np +from pandas.api.types import is_bool, is_number +from pandas.io.formats.printing import pprint_thing # type: ignore + +from arkouda import ( + Categorical, + DataFrame, + Index, + MultiIndex, + Series, + Strings, + argsort, + array, + pdarray, + sort, +) +from arkouda import SegArray +from arkouda import sum as aksum +from arkouda.util import is_numeric + +DEBUG = True + +__all__ = [ + "assert_almost_equal", + "assert_arkouda_array_equal", + "assert_arkouda_pdarray_equal", + "assert_arkouda_segarray_equal", + "assert_arkouda_strings_equal", + "assert_attr_equal", + "assert_categorical_equal", + "assert_class_equal", + "assert_contains_all", + "assert_copy", + "assert_dict_equal", + "assert_equal", + "assert_frame_equal", + "assert_index_equal", + "assert_is_sorted", + "assert_series_equal", +] + + +def assert_almost_equal( + left, + right, + rtol: float = 1.0e-5, + atol: float = 1.0e-8, + **kwargs, +) -> None: + """ + Check that the left and right objects are approximately equal. + + By approximately equal, we refer to objects that are numbers or that + contain numbers which may be equivalent to specific levels of precision. + + Parameters + ---------- + left : object + right : object + rtol : float, default 1e-5 + Relative tolerance. + atol : float, default 1e-8 + Absolute tolerance. + + Warning + ------- + This function cannot be used on pdarray of size > ak.client.maxTransferBytes + because it converts pdarrays to numpy arrays and calls np.allclose. + """ + if isinstance(left, Index): + assert_index_equal( + left, + right, + check_exact=False, + rtol=rtol, + atol=atol, + **kwargs, + ) + + elif isinstance(left, Series): + assert_series_equal( + left, + right, + check_exact=False, + rtol=rtol, + atol=atol, + **kwargs, + ) + + elif isinstance(left, DataFrame): + assert_frame_equal( + left, + right, + check_exact=False, + rtol=rtol, + atol=atol, + **kwargs, + ) + + else: + # Other sequences. + if is_number(left) and is_number(right): + # Do not compare numeric classes, like np.float64 and float. + pass + elif is_bool(left) and is_bool(right): + # Do not compare bool classes, like np.bool_ and bool. + pass + else: + if isinstance(left, pdarray) or isinstance(right, pdarray): + obj = "pdarray" + else: + obj = "Input" + assert_class_equal(left, right, obj=obj) + + if isinstance(left, pdarray) and isinstance(right, pdarray): + assert np.allclose( + left.to_ndarray(), right.to_ndarray(), rtol=rtol, atol=atol, equal_nan=True + ) + else: + assert np.allclose(left, right, rtol=rtol, atol=atol, equal_nan=True) + + +def _check_isinstance(left, right, cls) -> None: + """ + Helper method for our assert_* methods that ensures that + the two objects being compared have the right type before + proceeding with the comparison. + + Parameters + ---------- + left : The first object being compared. + right : The second object being compared. + cls : The class type to check against. + + Raises + ------ + AssertionError : Either `left` or `right` is not an instance of `cls`. + """ + cls_name = cls.__name__ + + if not isinstance(left, cls): + raise AssertionError(f"{cls_name} Expected type {cls}, found {type(left)} instead") + if not isinstance(right, cls): + raise AssertionError(f"{cls_name} Expected type {cls}, found {type(right)} instead") + + +def assert_dict_equal(left, right, compare_keys: bool = True) -> None: + """ + Assert that two dictionaries are equal. + Values must be arkouda objects. + Parameters + ---------- + left, right: dict + The dictionaries to be compared. + compare_keys: bool, default = True + Whether to compare the keys. + If False, only the values are compared. + """ + _check_isinstance(left, right, dict) + + left_keys = frozenset(left.keys()) + right_keys = frozenset(right.keys()) + + if compare_keys: + assert left_keys == right_keys + + for k in left_keys: + assert_almost_equal(left[k], right[k]) + + return None + + +def assert_index_equal( + left: Index, + right: Index, + exact: bool = True, + check_names: bool = True, + check_exact: bool = True, + check_categorical: bool = True, + check_order: bool = True, + rtol: float = 1.0e-5, + atol: float = 1.0e-8, + obj: str = "Index", +) -> None: + """ + Check that left and right Index are equal. + + Parameters + ---------- + left : Index + right : Index + exact : True + Whether to check the Index class, dtype and inferred_type + are identical. + check_names : bool, default True + Whether to check the names attribute. + check_exact : bool, default True + Whether to compare number exactly. + check_categorical : bool, default True + Whether to compare internal Categorical exactly. + check_order : bool, default True + Whether to compare the order of index entries as well as their values. + If True, both indexes must contain the same elements, in the same order. + If False, both indexes must contain the same elements, but in any order. + rtol : float, default 1e-5 + Relative tolerance. Only used when check_exact is False. + atol : float, default 1e-8 + Absolute tolerance. Only used when check_exact is False. + obj : str, default 'Index' + Specify object name being compared, internally used to show appropriate + assertion message. + + Examples + -------- + >>> from arkouda import testing as tm + >>> a = ak.Index([1, 2, 3]) + >>> b = ak.Index([1, 2, 3]) + >>> tm.assert_index_equal(a, b) + """ + __tracebackhide__ = not DEBUG + + def _check_types(left, right, obj: str = "Index") -> None: + if not exact: + return + + assert_class_equal(left, right, exact=exact, obj=obj) + assert_attr_equal("inferred_type", left, right, obj=obj) + + # Skip exact dtype checking when `check_categorical` is False + if isinstance(left.dtype, Categorical) and isinstance(right.dtype, Categorical): + if check_categorical: + assert_attr_equal("dtype", left, right, obj=obj) + assert_index_equal(left.categories, right.categories, exact=exact) + return + + assert_attr_equal("dtype", left, right, obj=obj) + + # instance validation + _check_isinstance(left, right, Index) + + # class / dtype comparison + _check_types(left, right, obj=obj) + + # level comparison + if left.nlevels != right.nlevels: + msg1 = f"{obj} levels are different" + msg2 = f"{left.nlevels}, {left}" + msg3 = f"{right.nlevels}, {right}" + raise_assert_detail(obj, msg1, msg2, msg3) + + # length comparison + if len(left) != len(right): + msg1 = f"{obj} length are different" + msg2 = f"{len(left)}, {left}" + msg3 = f"{len(right)}, {right}" + raise_assert_detail(obj, msg1, msg2, msg3) + + # If order doesn't matter then sort the index entries + if not check_order: + left = left[left.argsort()] + right = right[right.argsort()] + + # MultiIndex special comparison for little-friendly error messages + if isinstance(left, MultiIndex): + right = cast(MultiIndex, right) + + for level in range(left.nlevels): + lobj = f"MultiIndex level [{level}]" + try: + # try comparison on levels/codes to avoid densifying MultiIndex + assert_index_equal( + left.levels[level], + right.levels[level], + exact=exact, + check_names=check_names, + check_exact=check_exact, + check_categorical=check_categorical, + rtol=rtol, + atol=atol, + obj=lobj, + ) + except AssertionError: + llevel = left.get_level_values(level) + rlevel = right.get_level_values(level) + + assert_index_equal( + llevel, + rlevel, + exact=exact, + check_names=check_names, + check_exact=check_exact, + check_categorical=check_categorical, + rtol=rtol, + atol=atol, + obj=lobj, + ) + # get_level_values may change dtype + _check_types(left.levels[level], right.levels[level], obj=obj) + + # skip exact index checking when `check_categorical` is False + # differed from pandas due to unintuitive pandas behavior. + elif check_exact is True or not is_numeric(left) or not is_numeric(right): + if not left.equals(right): + if isinstance(left, list) and isinstance(right, list): + mismatch = np.array(left) != np.array(right) + else: + mismatch = left != right + + diff = aksum(mismatch.astype(int)) * 100.0 / len(left) + msg = f"{obj} values are different ({np.round(diff, 5)} %)" + raise_assert_detail(obj, msg, left, right) + else: + # @TODO Use new ak.allclose function + assert_almost_equal( + left.values, + right.values, + rtol=rtol, + atol=atol, + check_dtype=exact, + obj=obj, + lobj=left, + robj=right, + ) + + # metadata comparison + if check_names: + assert_attr_equal("names", left, right, obj=obj) + + if check_categorical: + if isinstance(left, Categorical) or isinstance(right, Categorical): + assert_categorical_equal(left.values, right.values, obj=f"{obj} category") + + +def assert_class_equal(left, right, exact: bool = True, obj: str = "Input") -> None: + """ + Checks classes are equal. + """ + __tracebackhide__ = not DEBUG + + def repr_class(x): + if isinstance(x, Index): + # return Index as it is to include values in the error message + return x + + return type(x).__name__ + + if type(left) is type(right): + return + + msg = f"{obj} classes are different" + raise_assert_detail(obj, msg, repr_class(left), repr_class(right)) + + +def assert_attr_equal(attr: str, left, right, obj: str = "Attributes") -> None: + """ + Check attributes are equal. Both objects must have attribute. + + Parameters + ---------- + attr : str + Attribute name being compared. + left : object + right : object + obj : str, default 'Attributes' + Specify object name being compared, internally used to show appropriate + assertion message + """ + __tracebackhide__ = not DEBUG + + left_attr = getattr(left, attr) + right_attr = getattr(right, attr) + + if left_attr is right_attr: + return None + + try: + result = left_attr == right_attr + except TypeError: + result = False + if (left_attr is None) ^ (right_attr is None): + result = False + elif not isinstance(result, bool): + result = result.all() + + if not result: + msg = f'Attribute "{attr}" are different' + raise_assert_detail(obj, msg, left_attr, right_attr) + return None + + +def assert_is_sorted(seq) -> None: + """Assert that the sequence is sorted.""" + if isinstance(seq, (Index, Series)): + seq = seq.values + + # sorting does not change precisions + assert_arkouda_array_equal(seq, sort(array(seq))) + + +def assert_categorical_equal( + left, + right, + check_dtype: bool = True, + check_category_order: bool = True, + obj: str = "Categorical", +) -> None: + """ + Test that Categoricals are equivalent. + + Parameters + ---------- + left : Categorical + right : Categorical + check_dtype : bool, default True + Check that integer dtype of the codes are the same. + check_category_order : bool, default True + Whether the order of the categories should be compared, which + implies identical integer codes. If False, only the resulting + values are compared. The ordered attribute is + checked regardless. + obj : str, default 'Categorical' + Specify object name being compared, internally used to show appropriate + assertion message. + """ + _check_isinstance(left, right, Categorical) + + exact = True + + if check_category_order: + assert_index_equal( + Index(left.categories), Index(right.categories), obj=f"{obj}.categories", exact=exact + ) + assert_arkouda_array_equal(left.codes, right.codes, check_dtype=check_dtype, obj=f"{obj}.codes") + else: + try: + # @TODO replace with Index.sort_values + lc = Index( + left.categories[argsort(left.categories)] + ) # .sort_values() # left.sort().categories + rc = Index( + right.categories[argsort(right.categories)] + ) # .sort_values() # right.sort().categories + except TypeError: + # e.g. '<' not supported between instances of 'int' and 'str' + lc, rc = Index(left.categories), Index(right.categories) + assert_index_equal(lc, rc, obj=f"{obj}.categories", exact=exact) + # @TODO Replace with Index.take + assert_index_equal( + Index(left.categories[left.codes]), + Index(right.categories[right.codes]), + obj=f"{obj}.values", + exact=exact, + ) + + # @TODO uncomment when Categorical.ordered is added + # assert_attr_equal("ordered", left, right, obj=obj) + + +def raise_assert_detail( + obj, message, left, right, diff=None, first_diff=None, index_values=None +) -> NoReturn: + __tracebackhide__ = not DEBUG + + msg = f"""{obj} are different + +{message}""" + + if isinstance(index_values, Index): + index_values = index_values.values.to_ndarray() + + if isinstance(index_values, pdarray): + index_values = index_values.to_ndarray() + + if isinstance(index_values, np.ndarray): + msg += f"\n[index]: {pprint_thing(index_values)}" + + if isinstance(left, np.ndarray): + left = pprint_thing(left) + elif isinstance(left, (Categorical, Strings, pdarray)): + left = repr(left) + + if isinstance(right, pdarray): + right = right.to_ndarray() + if isinstance(right, np.ndarray): + right = pprint_thing(right) + elif isinstance(right, (Categorical, Strings)): + right = repr(right) + + msg += f""" +[left]: {left} +[right]: {right}""" + + if diff is not None: + msg += f"\n[diff]: {diff}" + + if first_diff is not None: + msg += f"\n{first_diff}" + + raise AssertionError(msg) + + +def assert_arkouda_pdarray_equal( + left, + right, + check_dtype: bool = True, + err_msg=None, + check_same=None, + obj: str = "pdarray", + index_values=None, +) -> None: + """ + Check that the two 'ak.pdarray's are equivalent. + + Parameters + ---------- + left, right : arkouda.pdarray + The two arrays to be compared. + check_dtype : bool, default True + Check dtype if both a and b are ak.pdarray. + err_msg : str, default None + If provided, used as assertion message. + check_same : None|'copy'|'same', default None + Ensure left and right refer/do not refer to the same memory area. + obj : str, default 'pdarray' + Specify object name being compared, internally used to show appropriate + assertion message. + index_values : Index | arkouda.pdarray, default None + optional index (shared by both left and right), used in output. + """ + __tracebackhide__ = not DEBUG + + # instance validation + # Show a detailed error message when classes are different + assert_class_equal(left, right, obj=obj) + # both classes must be an ak.pdarray + _check_isinstance(left, right, pdarray) + + def _get_base(obj): + return obj.base if getattr(obj, "base", None) is not None else obj + + left_base = _get_base(left) + right_base = _get_base(right) + + if check_same == "same": + if left_base is not right_base: + raise AssertionError(f"{repr(left_base)} is not {repr(right_base)}") + elif check_same == "copy": + if left_base is right_base: + raise AssertionError(f"{repr(left_base)} is {repr(right_base)}") + + def _raise(left: pdarray, right: pdarray, err_msg): + if err_msg is None: + if left.shape != right.shape: + raise_assert_detail(obj, f"{obj} shapes are different", left.shape, right.shape) + + diff = aksum(left != right) + + diff = diff * 100.0 / left.size + msg = f"{obj} values are different ({np.round(diff, 5)} %)" + raise_assert_detail(obj, msg, left, right, index_values=index_values) + + raise AssertionError(err_msg) + + from arkouda import all as akall + from arkouda.dtypes import bigint, dtype + + # compare shape and values + # @TODO use ak.allclose + if isinstance(left, pdarray) and isinstance(right, pdarray) and left.dtype == dtype(bigint): + if not akall(left == right): + _raise(left, right, err_msg) + elif not np.allclose(left.to_ndarray(), right.to_ndarray(), atol=0, rtol=0, equal_nan=True): + _raise(left, right, err_msg) + + if check_dtype: + if isinstance(left, pdarray) and isinstance(right, pdarray): + assert_attr_equal("dtype", left, right, obj=obj) + + +def assert_arkouda_segarray_equal( + left: SegArray, + right: SegArray, + check_dtype: bool = True, + err_msg=None, + check_same=None, + obj: str = "segarray", +) -> None: + """ + Check that the two 'ak.segarray's are equivalent. + + Parameters + ---------- + left, right : arkouda.segarray + The two segarrays to be compared. + check_dtype : bool, default True + Check dtype if both a and b are ak.pdarray. + err_msg : str, default None + If provided, used as assertion message. + check_same : None|'copy'|'same', default None + Ensure left and right refer/do not refer to the same memory area. + obj : str, default 'pdarray' + Specify object name being compared, internally used to show appropriate + assertion message. + """ + __tracebackhide__ = not DEBUG + + # instance validation + # Show a detailed error message when classes are different + assert_class_equal(left, right, obj=obj) + # both classes must be an ak.SegArray + _check_isinstance(left, right, SegArray) + + def _get_base(obj): + return obj.base if getattr(obj, "base", None) is not None else obj + + left_base = _get_base(left) + right_base = _get_base(right) + + if check_same == "same": + if left_base is not right_base: + raise AssertionError(f"{repr(left_base)} is not {repr(right_base)}") + elif check_same == "copy": + if left_base is right_base: + raise AssertionError(f"{repr(left_base)} is {repr(right_base)}") + + if check_dtype: + if isinstance(left, SegArray) and isinstance(right, SegArray): + assert_attr_equal("dtype", left, right, obj=obj) + + assert_arkouda_pdarray_equal( + left.values, + right.values, + check_dtype=check_dtype, + err_msg=err_msg, + check_same=check_same, + obj="segarray values", + index_values=None, + ) + + assert_arkouda_pdarray_equal( + left.segments, + right.segments, + check_dtype=True, + err_msg=None, + check_same=None, + obj="segarray segments", + index_values=None, + ) + + +def assert_arkouda_strings_equal( + left, + right, + err_msg=None, + check_same=None, + obj: str = "Strings", + index_values=None, +) -> None: + """ + Check that 'ak.Strings' is equivalent. + + Parameters + ---------- + left, right : arkouda.Strings + The two Strings to be compared. + err_msg : str, default None + If provided, used as assertion message. + check_same : None|'copy'|'same', default None + Ensure left and right refer/do not refer to the same memory area. + obj : str, default 'Strings' + Specify object name being compared, internally used to show appropriate + assertion message. + index_values : Index | arkouda.pdarray, default None + optional index (shared by both left and right), used in output. + """ + __tracebackhide__ = not DEBUG + + # instance validation + # Show a detailed error message when classes are different + assert_class_equal(left, right, obj=obj) + # both classes must be an ak.pdarray + _check_isinstance(left, right, Strings) + + def _get_base(obj): + return obj.base if getattr(obj, "base", None) is not None else obj + + left_base = _get_base(left) + right_base = _get_base(right) + + if check_same == "same": + if left_base is not right_base: + raise AssertionError(f"{repr(left_base)} is not {repr(right_base)}") + elif check_same == "copy": + if left_base is right_base: + raise AssertionError(f"{repr(left_base)} is {repr(right_base)}") + + def _raise(left: Strings, right: Strings, err_msg): + if err_msg is None: + diff = aksum(left != right) + diff = diff * 100.0 / left.size + msg = f"{obj} values are different ({np.round(diff, 5)} %)" + raise_assert_detail(obj, msg, left, right, index_values=index_values) + + raise AssertionError(err_msg) + + if left.shape != right.shape: + raise_assert_detail(obj, f"{obj} shapes are different", left.shape, right.shape) + + if not aksum(left != right) == 0: + _raise(left, right, err_msg) + + +def assert_arkouda_array_equal( + left: pdarray | Strings | Categorical, + right: pdarray | Strings | Categorical, + check_dtype: bool = True, + err_msg=None, + check_same=None, + obj: str = "pdarray", + index_values=None, +) -> None: + """ + Check that 'ak.pdarray' or 'ak.Strings' or 'ak.Categorical' is equivalent. + + Parameters + ---------- + left, right : arkouda.pdarray or arkouda.Strings or arkouda.Categorical + The two arrays to be compared. + check_dtype : bool, default True + Check dtype if both a and b are ak.pdarray. + err_msg : str, default None + If provided, used as assertion message. + check_same : None|'copy'|'same', default None + Ensure left and right refer/do not refer to the same memory area. + obj : str, default 'numpy array' + Specify object name being compared, internally used to show appropriate + assertion message. + index_values : Index | arkouda.pdarray, default None + optional index (shared by both left and right), used in output. + """ + assert_class_equal(left, right) + + if isinstance(left, Strings) and isinstance(right, Strings): + assert_arkouda_strings_equal( + left, right, err_msg=err_msg, check_same=check_same, obj=obj, index_values=index_values + ) + elif isinstance(left, Categorical) and isinstance(right, Categorical): + assert_arkouda_array_equal( + left.categories[left.codes], + right.categories[right.codes], + check_dtype=check_dtype, + err_msg=err_msg, + check_same=check_same, + obj=obj, + index_values=index_values, + ) + elif isinstance(left, SegArray) and isinstance(right, SegArray): + assert_arkouda_segarray_equal( + left, + right, + check_dtype=check_dtype, + err_msg=err_msg, + check_same=check_same, + obj=obj, + ) + else: + assert_arkouda_pdarray_equal( + left, + right, + check_dtype=check_dtype, + err_msg=err_msg, + check_same=check_same, + obj=obj, + index_values=index_values, + ) + + +# This could be refactored to use the NDFrame.equals method +def assert_series_equal( + left, + right, + check_dtype: bool = True, + check_index_type: bool = True, + check_series_type: bool = True, + check_names: bool = True, + check_exact: bool = False, + check_categorical: bool = True, + check_category_order: bool = True, + rtol: float = 1.0e-5, + atol: float = 1.0e-8, + obj: str = "Series", + *, + check_index: bool = True, + check_like: bool = False, +) -> None: + """ + Check that left and right Series are equal. + + Parameters + ---------- + left : Series + right : Series + check_dtype : bool, default True + Whether to check the Series dtype is identical. + check_index_type : bool, default True + Whether to check the Index class, dtype and inferred_type + are identical. + check_series_type : bool, default True + Whether to check the Series class is identical. + check_names : bool, default True + Whether to check the Series and Index names attribute. + check_exact : bool, default False + Whether to compare number exactly. + check_categorical : bool, default True + Whether to compare internal Categorical exactly. + check_category_order : bool, default True + Whether to compare category order of internal Categoricals. + rtol : float, default 1e-5 + Relative tolerance. Only used when check_exact is False. + atol : float, default 1e-8 + Absolute tolerance. Only used when check_exact is False. + obj : str, default 'Series' + Specify object name being compared, internally used to show appropriate + assertion message. + check_index : bool, default True + Whether to check index equivalence. If False, then compare only values. + check_like : bool, default False + If True, ignore the order of the index. Must be False if check_index is False. + Note: same labels must be with the same data. + + Examples + -------- + >>> from arkouda import testing as tm + >>> a = ak.Series([1, 2, 3, 4]) + >>> b = ak.Series([1, 2, 3, 4]) + >>> tm.assert_series_equal(a, b) + """ + __tracebackhide__ = not DEBUG + + if not check_index and check_like: + raise ValueError("check_like must be False if check_index is False") + + # instance validation + _check_isinstance(left, right, Series) + + if check_series_type: + assert_class_equal(left, right, obj=obj) + + # length comparison + if len(left) != len(right): + msg1 = f"{len(left)}, {left.index}" + msg2 = f"{len(right)}, {right.index}" + raise_assert_detail(obj, "Series length are different", msg1, msg2) + + if check_index: + assert_index_equal( + left.index, + right.index, + exact=check_index_type, + check_names=check_names, + check_exact=check_exact, + check_categorical=check_categorical, + check_order=not check_like, + rtol=rtol, + atol=atol, + obj=f"{obj}.index", + ) + + if check_like: + # @TODO use Series.reindex_like + left = left[right.index.values] + + if check_dtype: + # We want to skip exact dtype checking when `check_categorical` + # is False. We'll still raise if only one is a `Categorical`, + # regardless of `check_categorical` + if isinstance(left, Categorical) and isinstance(right, Categorical) and not check_categorical: + pass + else: + assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}") + + if check_exact or not is_numeric(left.values) or not is_numeric(right.values): + assert_arkouda_array_equal( + left.values, + right.values, + check_dtype=check_dtype, + index_values=left.index, + obj=str(obj), + ) + else: + assert_almost_equal( + left.values, + right.values, + rtol=rtol, + atol=atol, + check_dtype=bool(check_dtype), + obj=str(obj), + index_values=left.index, + ) + + # metadata comparison + if check_names: + assert_attr_equal("name", left, right, obj=obj) + + if check_categorical is True: + if isinstance(left.values, Categorical) or isinstance(right.values, Categorical): + assert_categorical_equal( + left.values, + right.values, + obj=f"{obj} category", + check_category_order=check_category_order, + check_dtype=check_dtype, + ) + + +# This could be refactored to use the NDFrame.equals method +def assert_frame_equal( + left: DataFrame, + right: DataFrame, + check_dtype: bool = True, + check_index_type: bool = True, + check_column_type: bool = True, + check_frame_type: bool = True, + check_names: bool = True, + check_exact: bool = True, + check_categorical: bool = True, + check_like: bool = False, + rtol: float = 1.0e-5, + atol: float = 1.0e-8, + obj: str = "DataFrame", +) -> None: + """ + Check that left and right DataFrame are equal. + + This function is intended to compare two DataFrames and output any + differences. It is mostly intended for use in unit tests. + Additional parameters allow varying the strictness of the + equality checks performed. + + Parameters + ---------- + left : DataFrame + First DataFrame to compare. + right : DataFrame + Second DataFrame to compare. + check_dtype : bool, default True + Whether to check the DataFrame dtype is identical. + check_index_type : bool, default = True + Whether to check the Index class, dtype and inferred_type + are identical. + check_column_type : bool or {'equiv'}, default 'equiv' + Whether to check the columns class, dtype and inferred_type + are identical. Is passed as the ``exact`` argument of + :func:`assert_index_equal`. + check_frame_type : bool, default True + Whether to check the DataFrame class is identical. + check_names : bool, default True + Whether to check that the `names` attribute for both the `index` + and `column` attributes of the DataFrame is identical. + check_exact : bool, default False + Whether to compare number exactly. + check_categorical : bool, default True + Whether to compare internal Categorical exactly. + check_like : bool, default False + If True, ignore the order of index & columns. + Note: index labels must match their respective rows + (same as in columns) - same labels must be with the same data. + rtol : float, default 1e-5 + Relative tolerance. Only used when check_exact is False. + atol : float, default 1e-8 + Absolute tolerance. Only used when check_exact is False. + obj : str, default 'DataFrame' + Specify object name being compared, internally used to show appropriate + assertion message. + + See Also + -------- + assert_series_equal : Equivalent method for asserting Series equality. + + Examples + -------- + This example shows comparing two DataFrames that are equal + but with columns of differing dtypes. + + >>> from arkouda.testing import assert_frame_equal + >>> df1 = ak.DataFrame({'a': [1, 2], 'b': [3, 4]}) + >>> df2 = ak.DataFrame({'a': [1, 2], 'b': [3.0, 4.0]}) + + df1 equals itself. + + >>> assert_frame_equal(df1, df1) + + df1 differs from df2 as column 'b' is of a different type. + + >>> assert_frame_equal(df1, df2) + Traceback (most recent call last): + ... + AssertionError: Attributes of DataFrame.iloc[:, 1] (column name="b") are different + + Attribute "dtype" are different + [left]: int64 + [right]: float64 + + Ignore differing dtypes in columns with check_dtype. + + >>> assert_frame_equal(df1, df2, check_dtype=False) + """ + __tracebackhide__ = not DEBUG + + # instance validation + _check_isinstance(left, right, DataFrame) + + if check_frame_type: + assert isinstance(left, type(right)) + assert_class_equal(left, right, obj=obj) + + # shape comparison + if left.shape != right.shape: + raise_assert_detail(obj, f"{obj} shape mismatch", f"{repr(left.shape)}", f"{repr(right.shape)}") + + # index comparison + assert_index_equal( + left.index, + right.index, + exact=check_index_type, + check_names=check_names, + check_exact=check_exact, + check_categorical=check_categorical, + check_order=not check_like, + rtol=rtol, + atol=atol, + obj=f"{obj}.index", + ) + + # column comparison + assert_index_equal( + left.columns, + right.columns, + exact=check_column_type, + check_names=check_names, + check_exact=check_exact, + check_categorical=check_categorical, + check_order=not check_like, + rtol=rtol, + atol=atol, + obj=f"{obj}.columns", + ) + + if check_like: + # @TODO use left.reindex_like(right) + left = left[right.index.values] + + for col in left.columns.values: + # We have already checked that columns match, so we can do + # fast location-based lookups + lcol = left[col] + rcol = right[col] + if not isinstance(lcol, Series): + lcol = Series(lcol) + if not isinstance(rcol, Series): + rcol = Series(rcol) + + # use check_index=False, because we do not want to run + # assert_index_equal for each column, + # as we already checked it for the whole dataframe before. + assert_series_equal( + lcol, + rcol, + check_dtype=check_dtype, + check_index_type=check_index_type, + check_exact=check_exact, + check_names=check_names, + check_categorical=check_categorical, + obj=f'{obj}.(column name="{col}")', + rtol=rtol, + atol=atol, + check_index=False, + ) + + +def assert_equal(left, right, **kwargs) -> None: + """ + Wrapper for tm.assert_*_equal to dispatch to the appropriate test function. + + Parameters + ---------- + left, right : Index, Series, DataFrame, or np.pdarray + The two items to be compared. + **kwargs + All keyword arguments are passed through to the underlying assert method. + """ + __tracebackhide__ = not DEBUG + + if isinstance(left, Index): + assert_index_equal(left, right, **kwargs) + elif isinstance(left, Series): + assert_series_equal(left, right, **kwargs) + elif isinstance(left, DataFrame): + assert_frame_equal(left, right, **kwargs) + elif isinstance(left, pdarray): + assert_arkouda_array_equal(left, right, **kwargs) + elif isinstance(left, str): + assert kwargs == {} + assert left == right + else: + assert kwargs == {} + assert_almost_equal(left, right) + + +def assert_contains_all(iterable, dic) -> None: + """ + Assert that a dictionary contains all the elements of an iterable. + Parameters + ---------- + iterable: iterable + dic: dict + """ + for k in iterable: + assert k in dic, f"Did not contain item: {repr(k)}" + + +def assert_copy(iter1, iter2, **eql_kwargs) -> None: + """ + Checks that the elements are equal, but not the same object. + (Does not check that items in sequences are also not the same object.) + + Parameters + ---------- + iter1, iter2: iterable + Iterables that produce elements comparable with assert_almost_equal. + """ + for elem1, elem2 in zip(iter1, iter2): + assert_almost_equal(elem1, elem2, **eql_kwargs) + msg = ( + f"Expected object {repr(type(elem1))} and object {repr(type(elem2))} to be " + "different objects, but they were the same object." + ) + assert elem1 is not elem2, msg diff --git a/pytest_PROTO.ini b/pytest_PROTO.ini index e86793eada..e96984cc33 100644 --- a/pytest_PROTO.ini +++ b/pytest_PROTO.ini @@ -38,6 +38,7 @@ testpaths = PROTO_tests/tests/stats_test.py PROTO_tests/tests/string_test.py PROTO_tests/tests/symbol_table_test.py + PROTO_tests/tests/testing/asserters_test.py PROTO_tests/tests/util_test.py PROTO_tests/tests/where_test.py norecursedirs = diff --git a/tests/deprecated/index_test.py b/tests/deprecated/index_test.py index 75fa978e78..a138a4a589 100644 --- a/tests/deprecated/index_test.py +++ b/tests/deprecated/index_test.py @@ -2,7 +2,6 @@ import os import tempfile - import pandas as pd from base_test import ArkoudaTest from context import arkouda as ak @@ -12,7 +11,6 @@ from arkouda.dtypes import dtype from arkouda.index import Index from arkouda.pdarrayclass import pdarray -from arkouda.index import Index class IndexTest(ArkoudaTest):