Skip to content

Commit

Permalink
add gokart.testing.assert_frame_equal (#256)
Browse files Browse the repository at this point in the history
* add gokart.testing.assert_frame_equal

* fix " -> '

* fix isort

* fix review

* change assert_frame_equal -> assert_frame_contents_equal

* bug fix
  • Loading branch information
mski-iksm authored Nov 16, 2021
1 parent 49f3961 commit 00475f3
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 0 deletions.
1 change: 1 addition & 0 deletions gokart/testing/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from gokart.testing.check_if_run_with_empty_data_frame import test_run, try_to_run_test_for_empty_data_frame
from gokart.testing.pandas_assert import assert_frame_contents_equal
30 changes: 30 additions & 0 deletions gokart/testing/pandas_assert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import pandas as pd


def assert_frame_contents_equal(actual: pd.DataFrame, expected: pd.DataFrame, **kwargs):
"""
Assert that two DataFrames are equal.
This function is mostly same as pandas.testing.assert_frame_equal, however
- this fuction ignores the order of index and columns.
- this function fails when duplicated index or columns are found.
Parameters
----------
- actual, expected: pd.DataFrame
DataFrames to be compared.
- kwargs: Any
Parameters passed to pandas.testing.assert_frame_equal.
"""
assert isinstance(actual, pd.DataFrame), 'actual is not a DataFrame'
assert isinstance(expected, pd.DataFrame), 'expected is not a DataFrame'

assert actual.index.is_unique, 'actual index is not unique'
assert expected.index.is_unique, 'expected index is not unique'
assert actual.columns.is_unique, 'actual columns is not unique'
assert expected.columns.is_unique, 'expected columns is not unique'

assert set(actual.columns) == set(expected.columns), 'columns are not equal'
assert set(actual.index) == set(expected.index), 'indexes are not equal'

expected_reindexed = expected.reindex(actual.index)[actual.columns]
pd.testing.assert_frame_equal(actual, expected_reindexed, **kwargs)
37 changes: 37 additions & 0 deletions test/testing/test_pandas_assert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import unittest

import pandas as pd

import gokart


class TestPandasAssert(unittest.TestCase):
def test_assert_frame_contents_equal(self):
expected = pd.DataFrame(data=dict(f1=[1, 2, 3], f3=[111, 222, 333], f2=[4, 5, 6]), index=[0, 1, 2])
resulted = pd.DataFrame(data=dict(f2=[5, 4, 6], f1=[2, 1, 3], f3=[222, 111, 333]), index=[1, 0, 2])

gokart.testing.assert_frame_contents_equal(resulted, expected)

def test_assert_frame_contents_equal_with_small_error(self):
expected = pd.DataFrame(data=dict(f1=[1.0001, 2.0001, 3.0001], f3=[111, 222, 333], f2=[4, 5, 6]), index=[0, 1, 2])
resulted = pd.DataFrame(data=dict(f2=[5, 4, 6], f1=[2.0002, 1.0002, 3.0002], f3=[222, 111, 333]), index=[1, 0, 2])

gokart.testing.assert_frame_contents_equal(resulted, expected, atol=1e-1)

def test_assert_frame_contents_equal_with_duplicated_columns(self):
expected = pd.DataFrame(data=dict(f1=[1, 2, 3], f3=[111, 222, 333], f2=[4, 5, 6]), index=[0, 1, 2])
expected.columns = ['f1', 'f1', 'f2']
resulted = pd.DataFrame(data=dict(f2=[5, 4, 6], f1=[2, 1, 3], f3=[222, 111, 333]), index=[1, 0, 2])
resulted.columns = ['f2', 'f1', 'f1']

with self.assertRaises(AssertionError):
gokart.testing.assert_frame_contents_equal(resulted, expected)

def test_assert_frame_contents_equal_with_duplicated_indexes(self):
expected = pd.DataFrame(data=dict(f1=[1, 2, 3], f3=[111, 222, 333], f2=[4, 5, 6]), index=[0, 1, 2])
expected.index = [0, 1, 1]
resulted = pd.DataFrame(data=dict(f2=[5, 4, 6], f1=[2, 1, 3], f3=[222, 111, 333]), index=[1, 0, 2])
expected.index = [1, 0, 1]

with self.assertRaises(AssertionError):
gokart.testing.assert_frame_contents_equal(resulted, expected)

0 comments on commit 00475f3

Please sign in to comment.