LibraryTool add read_index (#1533)

#### Reference Issues/PRs Implements #1391 and fixes #1532. - Added read_index in LibraryTool (as required in #1391) - Allowed Library to get LibraryTool and call read_index (to fix #1532). Also updated the docs accordingly. #### What does this implement or fix? #### Any other comments? #### Checklist <details> <summary> Checklist for code changes... </summary> - [ ] Have you updated the relevant docstrings, documentation and copyright notice? - [ ] Is this contribution tested against [all ArcticDB's features](../docs/mkdocs/docs/technical/contributing.md)? - [ ] Do all exceptions introduced raise appropriate [error messages](https://docs.arcticdb.io/error_messages/)? - [ ] Are API changes highlighted in the PR description? - [ ] Is the PR labelled as enhancement or bug so it appears in autogenerated release notes? </details>
man-group · Apr 26, 2024 · 9858652 · 9858652
1 parent 547cd6b
commit 9858652
Show file tree

Hide file tree

Showing 5 changed files with 49 additions and 4 deletions.
diff --git a/python/arcticdb/toolbox/library_tool.py b/python/arcticdb/toolbox/library_tool.py
@@ -7,6 +7,7 @@
 import pandas as pd
 
 from arcticdb.version_store._normalization import FrameData
+from arcticdb.supported_types import ExplicitlySupportedDates
 from arcticdb_ext.codec import decode_segment
 from arcticdb_ext.storage import KeyType
 from arcticdb_ext.stream import SegmentInMemory
@@ -15,6 +16,7 @@
 from arcticdb.version_store._normalization import denormalize_dataframe
 
 VariantKey = Union[AtomKey, RefKey]
+VersionQueryInput = Union[int, str, ExplicitlySupportedDates, None]
 
 _KEY_PROPERTIES = {
     key_type: {k: v for k, v in vars(key_type).items() if isinstance(v, property)} for key_type in (AtomKey, RefKey)
@@ -31,6 +33,10 @@ def props_dict_to_atom_key(d: Dict[str, Any]) -> AtomKey:
 
 
 class LibraryTool(LibraryToolImpl):
+    def __init__(self, library, nvs):
+        super().__init__(library)
+        self._nvs = nvs
+
     @staticmethod
     def key_types() -> List[KeyType]:
         return list(KeyType.__members__.values())
@@ -135,4 +141,21 @@ def read_to_keys(
         8243267225673136445
         """
         df = self.read_to_dataframe(key)
-        return self.dataframe_to_keys(df, id if id is not None else key.id, filter_key_type)
+        return self.dataframe_to_keys(df, id if id is not None else key.id, filter_key_type)
+
+    def read_index(self, symbol: str, as_of: Optional[VersionQueryInput] = None, **kwargs) -> pd.DataFrame:
+        """
+        Read the index key for the named symbol.
+
+        Parameters
+        ----------
+        symbol : `str`
+            Symbol name.
+        as_of : `Optional[VersionQueryInput]`, default=None
+            See documentation of `read` method for more details.
+
+        Returns
+        -------
+        Pandas DataFrame representing the index key in a human-readable format.
+        """
+        return self._nvs.read_index(symbol, as_of, **kwargs)
diff --git a/python/arcticdb/version_store/_store.py b/python/arcticdb/version_store/_store.py
@@ -2819,4 +2819,4 @@ def library(self):
         return self._library
 
     def library_tool(self) -> LibraryTool:
-        return LibraryTool(self.library())
+        return LibraryTool(self.library(), self)
diff --git a/python/arcticdb/version_store/library.py b/python/arcticdb/version_store/library.py
@@ -290,6 +290,12 @@ class StagedDataFinalizeMethod(Enum):
     WRITE = auto()
     APPEND = auto()
 
+class DevTools:
+    def __init__(self, nvs):
+        self._nvs = nvs
+
+    def library_tool(self):
+        return self._nvs.library_tool()
 
 class Library:
     """
@@ -321,6 +327,7 @@ def __init__(self, arctic_instance_description: str, nvs: NativeVersionStore):
         self.arctic_instance_desc = arctic_instance_description
         self._nvs = nvs
         self._nvs._normalizer.df._skip_df_consolidation = True
+        self._dev_tools = DevTools(nvs)
 
     def __repr__(self):
         return "Library(%s, path=%s, storage=%s)" % (
@@ -1758,13 +1765,14 @@ def defragment_symbol_data(self, symbol: str, segment_size: Optional[int] = None
         >>> lib.write("symbol", pd.DataFrame({"A": [0]}, index=[pd.Timestamp(0)]))
         >>> lib.append("symbol", pd.DataFrame({"A": [1, 2]}, index=[pd.Timestamp(1), pd.Timestamp(2)]))
         >>> lib.append("symbol", pd.DataFrame({"A": [3]}, index=[pd.Timestamp(3)]))
-        >>> lib.read_index(sym)
+        >>> lib_tool = lib._dev_tools.library_tool()
+        >>> lib_tool.read_index(sym)
                             start_index                     end_index  version_id stream_id          creation_ts          content_hash  index_type  key_type  start_col  end_col  start_row  end_row
         1970-01-01 00:00:00.000000000 1970-01-01 00:00:00.000000001          20    b'sym'  1678974096622685727   6872717287607530038          84         2          1        2          0        1
         1970-01-01 00:00:00.000000001 1970-01-01 00:00:00.000000003          21    b'sym'  1678974096931527858  12345256156783683504          84         2          1        2          1        3
         1970-01-01 00:00:00.000000003 1970-01-01 00:00:00.000000004          22    b'sym'  1678974096970045987   7952936283266921920          84         2          1        2          3        4
         >>> lib.version_store.defragment_symbol_data("symbol", 2)
-        >>> lib.read_index(sym)  # Returns two segments rather than three as a result of the defragmentation operation
+        >>> lib_tool.read_index(sym)  # Returns two segments rather than three as a result of the defragmentation operation
                             start_index                     end_index  version_id stream_id          creation_ts         content_hash  index_type  key_type  start_col  end_col  start_row  end_row
         1970-01-01 00:00:00.000000000 1970-01-01 00:00:00.000000003          23    b'sym'  1678974097067271451  5576804837479525884          84         2          1        2          0        3
         1970-01-01 00:00:00.000000003 1970-01-01 00:00:00.000000004          23    b'sym'  1678974097067427062  7952936283266921920          84         2          1        2          3        4

diff --git a/python/tests/conftest.py b/python/tests/conftest.py
@@ -105,6 +105,10 @@ def lmdb_storage(tmp_path):
     with LmdbStorageFixture(tmp_path) as f:
         yield f
 
+@pytest.fixture
+def lmdb_library(lmdb_storage, lib_name):
+    return lmdb_storage.create_arctic().create_library(lib_name)
+
 
 @pytest.fixture(scope="session")
 def s3_storage_factory():

diff --git a/python/tests/integration/arcticdb/test_arctic.py b/python/tests/integration/arcticdb/test_arctic.py
@@ -927,6 +927,16 @@ def test_azure_sas_token(azurite_storage_factory: StorageFixtureFactory):
             ac = f.create_arctic()
             ac.create_library("x")
 
+def test_lib_has_lib_tools_read_index(lmdb_library):
+    lib = lmdb_library
+    sym = "my_symbol"
+
+    df = pd.DataFrame({"col": [1, 2, 3]})
+    lib.write(sym, df)
+    lib_tool = lib._dev_tools.library_tool()
+
+    assert lib_tool.read_index(sym).equals(lib._nvs.read_index(sym))
+
 
 def test_s3_force_uri_lib_config_handling(s3_storage):
     # force_uri_lib_config is a obsolete configuration. However, user still includes this option in their setup.