Skip to content

Commit

Permalink
LibraryTool add read_index (#1533)
Browse files Browse the repository at this point in the history
#### Reference Issues/PRs
Implements #1391 and fixes #1532.
- Added read_index in LibraryTool (as required in #1391)
- Allowed Library to get LibraryTool and call read_index (to fix #1532).
Also updated the docs accordingly.

#### What does this implement or fix?

#### Any other comments?

#### Checklist

<details>
  <summary>
   Checklist for code changes...
  </summary>
 
- [ ] Have you updated the relevant docstrings, documentation and
copyright notice?
- [ ] Is this contribution tested against [all ArcticDB's
features](../docs/mkdocs/docs/technical/contributing.md)?
- [ ] Do all exceptions introduced raise appropriate [error
messages](https://docs.arcticdb.io/error_messages/)?
 - [ ] Are API changes highlighted in the PR description?
- [ ] Is the PR labelled as enhancement or bug so it appears in
autogenerated release notes?
</details>

<!--
Thanks for contributing a Pull Request to ArcticDB! Please ensure you
have taken a look at:
- ArcticDB's Code of Conduct:
https://github.com/man-group/ArcticDB/blob/master/CODE_OF_CONDUCT.md
- ArcticDB's Contribution Licensing:
https://github.com/man-group/ArcticDB/blob/master/docs/mkdocs/docs/technical/contributing.md#contribution-licensing
-->
  • Loading branch information
muhammadhamzasajjad authored Apr 26, 2024
1 parent 547cd6b commit 9858652
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 4 deletions.
25 changes: 24 additions & 1 deletion python/arcticdb/toolbox/library_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pandas as pd

from arcticdb.version_store._normalization import FrameData
from arcticdb.supported_types import ExplicitlySupportedDates
from arcticdb_ext.codec import decode_segment
from arcticdb_ext.storage import KeyType
from arcticdb_ext.stream import SegmentInMemory
Expand All @@ -15,6 +16,7 @@
from arcticdb.version_store._normalization import denormalize_dataframe

VariantKey = Union[AtomKey, RefKey]
VersionQueryInput = Union[int, str, ExplicitlySupportedDates, None]

_KEY_PROPERTIES = {
key_type: {k: v for k, v in vars(key_type).items() if isinstance(v, property)} for key_type in (AtomKey, RefKey)
Expand All @@ -31,6 +33,10 @@ def props_dict_to_atom_key(d: Dict[str, Any]) -> AtomKey:


class LibraryTool(LibraryToolImpl):
def __init__(self, library, nvs):
super().__init__(library)
self._nvs = nvs

@staticmethod
def key_types() -> List[KeyType]:
return list(KeyType.__members__.values())
Expand Down Expand Up @@ -135,4 +141,21 @@ def read_to_keys(
8243267225673136445
"""
df = self.read_to_dataframe(key)
return self.dataframe_to_keys(df, id if id is not None else key.id, filter_key_type)
return self.dataframe_to_keys(df, id if id is not None else key.id, filter_key_type)

def read_index(self, symbol: str, as_of: Optional[VersionQueryInput] = None, **kwargs) -> pd.DataFrame:
"""
Read the index key for the named symbol.
Parameters
----------
symbol : `str`
Symbol name.
as_of : `Optional[VersionQueryInput]`, default=None
See documentation of `read` method for more details.
Returns
-------
Pandas DataFrame representing the index key in a human-readable format.
"""
return self._nvs.read_index(symbol, as_of, **kwargs)
2 changes: 1 addition & 1 deletion python/arcticdb/version_store/_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -2819,4 +2819,4 @@ def library(self):
return self._library

def library_tool(self) -> LibraryTool:
return LibraryTool(self.library())
return LibraryTool(self.library(), self)
12 changes: 10 additions & 2 deletions python/arcticdb/version_store/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,12 @@ class StagedDataFinalizeMethod(Enum):
WRITE = auto()
APPEND = auto()

class DevTools:
def __init__(self, nvs):
self._nvs = nvs

def library_tool(self):
return self._nvs.library_tool()

class Library:
"""
Expand Down Expand Up @@ -321,6 +327,7 @@ def __init__(self, arctic_instance_description: str, nvs: NativeVersionStore):
self.arctic_instance_desc = arctic_instance_description
self._nvs = nvs
self._nvs._normalizer.df._skip_df_consolidation = True
self._dev_tools = DevTools(nvs)

def __repr__(self):
return "Library(%s, path=%s, storage=%s)" % (
Expand Down Expand Up @@ -1758,13 +1765,14 @@ def defragment_symbol_data(self, symbol: str, segment_size: Optional[int] = None
>>> lib.write("symbol", pd.DataFrame({"A": [0]}, index=[pd.Timestamp(0)]))
>>> lib.append("symbol", pd.DataFrame({"A": [1, 2]}, index=[pd.Timestamp(1), pd.Timestamp(2)]))
>>> lib.append("symbol", pd.DataFrame({"A": [3]}, index=[pd.Timestamp(3)]))
>>> lib.read_index(sym)
>>> lib_tool = lib._dev_tools.library_tool()
>>> lib_tool.read_index(sym)
start_index end_index version_id stream_id creation_ts content_hash index_type key_type start_col end_col start_row end_row
1970-01-01 00:00:00.000000000 1970-01-01 00:00:00.000000001 20 b'sym' 1678974096622685727 6872717287607530038 84 2 1 2 0 1
1970-01-01 00:00:00.000000001 1970-01-01 00:00:00.000000003 21 b'sym' 1678974096931527858 12345256156783683504 84 2 1 2 1 3
1970-01-01 00:00:00.000000003 1970-01-01 00:00:00.000000004 22 b'sym' 1678974096970045987 7952936283266921920 84 2 1 2 3 4
>>> lib.version_store.defragment_symbol_data("symbol", 2)
>>> lib.read_index(sym) # Returns two segments rather than three as a result of the defragmentation operation
>>> lib_tool.read_index(sym) # Returns two segments rather than three as a result of the defragmentation operation
start_index end_index version_id stream_id creation_ts content_hash index_type key_type start_col end_col start_row end_row
1970-01-01 00:00:00.000000000 1970-01-01 00:00:00.000000003 23 b'sym' 1678974097067271451 5576804837479525884 84 2 1 2 0 3
1970-01-01 00:00:00.000000003 1970-01-01 00:00:00.000000004 23 b'sym' 1678974097067427062 7952936283266921920 84 2 1 2 3 4
Expand Down
4 changes: 4 additions & 0 deletions python/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ def lmdb_storage(tmp_path):
with LmdbStorageFixture(tmp_path) as f:
yield f

@pytest.fixture
def lmdb_library(lmdb_storage, lib_name):
return lmdb_storage.create_arctic().create_library(lib_name)


@pytest.fixture(scope="session")
def s3_storage_factory():
Expand Down
10 changes: 10 additions & 0 deletions python/tests/integration/arcticdb/test_arctic.py
Original file line number Diff line number Diff line change
Expand Up @@ -927,6 +927,16 @@ def test_azure_sas_token(azurite_storage_factory: StorageFixtureFactory):
ac = f.create_arctic()
ac.create_library("x")

def test_lib_has_lib_tools_read_index(lmdb_library):
lib = lmdb_library
sym = "my_symbol"

df = pd.DataFrame({"col": [1, 2, 3]})
lib.write(sym, df)
lib_tool = lib._dev_tools.library_tool()

assert lib_tool.read_index(sym).equals(lib._nvs.read_index(sym))


def test_s3_force_uri_lib_config_handling(s3_storage):
# force_uri_lib_config is a obsolete configuration. However, user still includes this option in their setup.
Expand Down

0 comments on commit 9858652

Please sign in to comment.