Skip to content

Commit

Permalink
Bugfix 1507+1509: dynamic schema different index append fixes (#1529)
Browse files Browse the repository at this point in the history
Closes #1507 
Closes #1509

While fixing, noticed that the added test
`test_append_range_index_from_zero` would also not have passed, so fixed
this too.
  • Loading branch information
alexowens90 authored Apr 26, 2024
1 parent 86350d6 commit 3bd39c5
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 37 deletions.
20 changes: 9 additions & 11 deletions cpp/arcticdb/python/normalization_checks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,17 +122,15 @@ void update_rowcount_normalization_data(
);

size_t new_start = new_index->start();
if (new_start != 0) {
auto stop = old_index->start() + old_length * old_index->step();
normalization::check<ErrorCode::E_INCOMPATIBLE_INDEX>(
new_start == stop,
"The appending data has a RangeIndex.start={} that is not contiguous with the {}"
"stop ({}) of",
error_suffix,
new_start,
stop
);
}
auto stop = old_index->start() + old_length * old_index->step();
normalization::check<ErrorCode::E_INCOMPATIBLE_INDEX>(
new_start == stop || (new_start == 0 && new_index->step() == 1),
"The appending data has a RangeIndex.start={} that is not contiguous with the "
"stop ({}) of {}",
new_start,
stop,
error_suffix
);

new_pandas->get().mutable_index()->set_start(old_index->start());
}
Expand Down
8 changes: 3 additions & 5 deletions cpp/arcticdb/version/schema_checks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,10 @@ inline void check_normalization_index_match(
// Having this will not allow appending RowCont indexed pd.DataFrames to DateTime indexed pd.DataFrames because they would
// have different field size (the rowcount index is not stored as a field). This logic is bug prone and will become better
// after we enable the empty index.
const bool input_frame_is_series = frame.norm_meta.has_series();
normalization::check<ErrorCode::E_INCOMPATIBLE_INDEX>(
common_index_type != IndexDescriptor::UNKNOWN ||
(old_idx_kind == IndexDescriptor::TIMESTAMP && new_idx_kind == IndexDescriptor::ROWCOUNT),
(input_frame_is_series && old_idx_kind == IndexDescriptor::TIMESTAMP && new_idx_kind == IndexDescriptor::ROWCOUNT),
"Cannot append {} index to {} index",
index_type_to_str(new_idx_kind),
index_type_to_str(old_idx_kind)
Expand Down Expand Up @@ -121,12 +122,9 @@ inline void fix_descriptor_mismatch_or_throw(
const auto &old_sd = existing_isr.tsd().as_stream_descriptor();
check_normalization_index_match(operation, old_sd, new_frame, empty_types);

if (dynamic_schema)
return; // TODO: dynamic schema may need some of the checks as below

fix_normalization_or_throw(operation == APPEND, existing_isr, new_frame);

if (!columns_match(old_sd, new_frame.desc)) {
if (!dynamic_schema && !columns_match(old_sd, new_frame.desc)) {
throw StreamDescriptorMismatch(
"The columns (names and types) in the argument are not identical to that of the existing version",
StreamDescriptor{old_sd},
Expand Down
33 changes: 33 additions & 0 deletions python/tests/unit/arcticdb/version_store/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,39 @@ def test_append_simple(lmdb_version_store):
assert_frame_equal(vit.data, expected)


@pytest.mark.parametrize("empty_types", (True, False))
@pytest.mark.parametrize("dynamic_schema", (True, False))
def test_append_range_index(version_store_factory, empty_types, dynamic_schema):
lib = version_store_factory(empty_types=empty_types, dynamic_schema=dynamic_schema)
sym = "test_append_range_index"
df_0 = pd.DataFrame({"col": [0, 1]}, index=pd.RangeIndex(0, 4, 2))
lib.write(sym, df_0)

# Appending another range index following on from what is there should work
df_1 = pd.DataFrame({"col": [2, 3]}, index=pd.RangeIndex(4, 8, 2))
lib.append(sym, df_1)
expected = pd.concat([df_0, df_1])
received = lib.read(sym).data
assert_frame_equal(expected, received)

# Appending a range starting earlier or later, or with a different step size, should fail
for idx in [pd.RangeIndex(6, 10, 2), pd.RangeIndex(10, 14, 2), pd.RangeIndex(8, 14, 3)]:
with pytest.raises(NormalizationException):
lib.append(sym, pd.DataFrame({"col": [4, 5]}, index=idx))


@pytest.mark.parametrize("empty_types", (True, False))
@pytest.mark.parametrize("dynamic_schema", (True, False))
def test_append_range_index_from_zero(version_store_factory, empty_types, dynamic_schema):
lib = version_store_factory(empty_types=empty_types, dynamic_schema=dynamic_schema)
sym = "test_append_range_index_from_zero"
df_0 = pd.DataFrame({"col": [0, 1]}, index=pd.RangeIndex(-6, -2, 2))
lib.write(sym, df_0)

with pytest.raises(NormalizationException):
lib.append(sym, pd.DataFrame({"col": [2, 3]}, index=pd.RangeIndex(0, 4, 2)))


def test_append_indexed(s3_version_store):
symbol = "test_append_simple"
idx1 = np.arange(0, 10)
Expand Down
27 changes: 6 additions & 21 deletions python/tests/unit/arcticdb/version_store/test_empty_column_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import pytest
from packaging.version import Version
from arcticdb.util._versions import PANDAS_VERSION
import arcticdb
from arcticdb_ext.exceptions import NormalizationException

class DtypeGenerator:
"""
Expand Down Expand Up @@ -861,14 +861,9 @@ def test_no_cols(self, lmdb_version_store_static_and_dynamic):
assert result.index.equals(pd.DatetimeIndex([]))

def test_has_a_column(self, lmdb_version_store_static_and_dynamic):
if self.is_dynamic_schema(lmdb_version_store_static_and_dynamic):
pytest.xfail(
"""In case of empty symbols dynamic schema allows appending indexes of different types.
See https://github.com/man-group/ArcticDB/issues/1507. This is fixed with the empty index type."""
)
result = self.roundtrip(pd.DataFrame({"a": []}), lmdb_version_store_static_and_dynamic)
assert result.index.equals(pd.DatetimeIndex([]))
with pytest.raises(Exception):
with pytest.raises(NormalizationException):
lmdb_version_store_static_and_dynamic.append(self.sym(), pd.DataFrame({"a": [1.0]}))
to_append_successfuly = pd.DataFrame({"a": [1.0]}, index=pd.DatetimeIndex(["01/01/2024"]))
lmdb_version_store_static_and_dynamic.append(self.sym(), to_append_successfuly)
Expand All @@ -882,11 +877,6 @@ def test_explicit_row_range_no_columns(self, lmdb_version_store_static_and_dynam
assert result.index.equals(pd.DatetimeIndex([]))

def test_explicit_row_range_with_columns(self, lmdb_version_store_static_and_dynamic):
if self.is_dynamic_schema(lmdb_version_store_static_and_dynamic):
pytest.xfail(
"""In case of empty symbols dynamic schema allows appending indexes of different types.
See https://github.com/man-group/ArcticDB/issues/1507. This is fixed with the empty index type."""
)
result = self.roundtrip(pd.DataFrame({"a": []}, index=pd.RangeIndex(start=5, stop=5, step=100)), lmdb_version_store_static_and_dynamic)
assert result.index.equals(pd.DatetimeIndex([]))
with pytest.raises(Exception):
Expand Down Expand Up @@ -938,7 +928,7 @@ def test_no_cols(self, lmdb_version_store_static_and_dynamic):
def test_has_a_column(self, lmdb_version_store_static_and_dynamic):
result = self.roundtrip(pd.DataFrame({"a": []}), lmdb_version_store_static_and_dynamic)
assert result.index.equals(pd.RangeIndex(start=0, stop=0, step=1))
with pytest.raises(arcticdb.exceptions.NormalizationException):
with pytest.raises(NormalizationException):
lmdb_version_store_static_and_dynamic.append(self.sym(), pd.DataFrame({"a": ["a"]}, index=pd.DatetimeIndex(["01/01/2024"])))
to_append_successfuly = pd.DataFrame({"a": ["a"]})
lmdb_version_store_static_and_dynamic.append(self.sym(), to_append_successfuly)
Expand All @@ -949,22 +939,17 @@ def test_explicit_row_range_no_columns(self, lmdb_version_store_static_and_dynam
assert result.index.equals(pd.RangeIndex(start=0, stop=0, step=1))

def test_explicit_row_range_with_columns(self, lmdb_version_store_static_and_dynamic):
if self.is_dynamic_schema(lmdb_version_store_static_and_dynamic):
pytest.xfail(
"""Dynamic schema should not allow appending missmatching range indexes. See issue:
https://github.com/man-group/ArcticDB/issues/1509"""
)
result = self.roundtrip(pd.DataFrame({"a": []}, index=pd.RangeIndex(start=5, stop=5, step=100)), lmdb_version_store_static_and_dynamic)
assert result.index.equals(pd.RangeIndex(start=5, stop=5, step=100))
# Cannot append datetime indexed df to empty rowrange index
with pytest.raises(arcticdb.exceptions.NormalizationException):
with pytest.raises(NormalizationException):
lmdb_version_store_static_and_dynamic.append(self.sym(), pd.DataFrame({"a": ["a"]}, index=pd.DatetimeIndex(["01/01/2024"])))
# Cannot append rowrange indexed df if the start of the appended is not matching the stop of the empty
with pytest.raises(arcticdb.exceptions.NormalizationException):
with pytest.raises(NormalizationException):
lmdb_version_store_static_and_dynamic.append(self.sym(), pd.DataFrame({"a": ["a"]}, index=pd.RangeIndex(start=9, stop=109, step=100)))
lmdb_version_store_static_and_dynamic.append(self.sym(), pd.DataFrame({"a": ["a"]}, index=pd.RangeIndex(start=10, stop=110, step=100)))
# Cannot append rowrange indexed df if the step is different
with pytest.raises(arcticdb.exceptions.NormalizationException):
with pytest.raises(NormalizationException):
lmdb_version_store_static_and_dynamic.append(self.sym(), pd.DataFrame({"a": ["a"]}, index=pd.RangeIndex(start=5, stop=6, step=1)))
to_append_successfuly = pd.DataFrame({"a": ["a"]}, index=pd.RangeIndex(start=5, stop=105, step=100))
lmdb_version_store_static_and_dynamic.append(self.sym(), to_append_successfuly)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@


def generic_dynamic_filter_test(version_store, symbol, df, arctic_query, pandas_query, dynamic_strings=True):
version_store.delete(symbol)
expected, slices = make_dynamic(df)
for df_slice in slices:
version_store.append(symbol, df_slice, write_if_missing=True)
Expand Down

0 comments on commit 3bd39c5

Please sign in to comment.