Skip to content

Commit

Permalink
👌 IMPROVE: Add AbstractRepositoryBackend.iter_object_streams
Browse files Browse the repository at this point in the history
This is essentially an addition to aiidateam#5156 and required aiidateam#5145

Without the "optimised" use of `Container.get_objects_stream_and_meta` for the `DiskObjectStoreRepositoryBackend`, the profiled archive creation in aiidateam#5145 goes from 4 minutes to 9 minutes!
  • Loading branch information
chrisjsewell committed Oct 8, 2021
1 parent 27364cf commit 9079ceb
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 2 deletions.
14 changes: 13 additions & 1 deletion aiida/repository/backend/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import hashlib
import io
import pathlib
from typing import BinaryIO, Iterable, Iterator, List, Optional, Union
from typing import BinaryIO, Iterable, Iterator, List, Optional, Tuple, Union

from aiida.common.hashing import chunked_file_hash

Expand Down Expand Up @@ -144,6 +144,18 @@ def get_object_content(self, key: str) -> bytes:
with self.open(key) as handle: # pylint: disable=not-context-manager
return handle.read()

def iter_object_streams(self, keys: List[str]) -> Iterator[Tuple[str, BinaryIO]]:
"""Return an iterator over the byte streams of objects identified by key.
:param keys: fully qualified identifiers for the objects within the repository.
:return: an iterator over the object byte streams.
:raise FileNotFoundError: if the file does not exist.
:raise OSError: if a file could not be opened.
"""
for key in keys:
with self.open(key) as handle: # pylint: disable=not-context-manager
yield key, handle

def get_object_hash(self, key: str) -> str:
"""Return the SHA-256 hash of an object stored under the given key.
Expand Down
8 changes: 7 additions & 1 deletion aiida/repository/backend/disk_object_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"""Implementation of the ``AbstractRepositoryBackend`` using the ``disk-objectstore`` as the backend."""
import contextlib
import shutil
from typing import BinaryIO, Iterable, Iterator, List, Optional
from typing import BinaryIO, Iterable, Iterator, List, Optional, Tuple

from disk_objectstore import Container

Expand Down Expand Up @@ -89,6 +89,12 @@ def open(self, key: str) -> Iterator[BinaryIO]:
with self.container.get_object_stream(key) as handle:
yield handle # type: ignore[misc]

def iter_object_streams(self, keys: List[str]) -> Iterator[Tuple[str, BinaryIO]]:
with self.container.get_objects_stream_and_meta(keys) as triplets:
for key, stream, _ in triplets:
assert stream is not None
yield key, stream # type: ignore[misc]

def delete_objects(self, keys: List[str]) -> None:
super().delete_objects(keys)
self.container.delete_objects(keys)
Expand Down
10 changes: 10 additions & 0 deletions tests/repository/backend/test_disk_object_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,16 @@ def test_open(repository, generate_directory):
assert handle.read() == b'content_b'


def test_iter_object_streams(repository):
"""Test the ``Repository.iter_object_streams`` method."""
repository.initialise()
key = repository.put_object_from_filelike(io.BytesIO(b'content'))

for _key, stream in repository.iter_object_streams([key]):
assert _key == key
assert stream.read() == b'content'


def test_delete_object(repository, generate_directory):
"""Test the ``Repository.delete_object`` method."""
repository.initialise()
Expand Down
9 changes: 9 additions & 0 deletions tests/repository/backend/test_sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,15 @@ def test_open(repository, generate_directory):
assert handle.read() == b'content_b'


def test_iter_object_streams(repository):
"""Test the ``Repository.iter_object_streams`` method."""
key = repository.put_object_from_filelike(io.BytesIO(b'content'))

for _key, stream in repository.iter_object_streams([key]):
assert _key == key
assert stream.read() == b'content'


def test_delete_object(repository, generate_directory):
"""Test the ``Repository.delete_object`` method."""
directory = generate_directory({'file_a': None})
Expand Down

0 comments on commit 9079ceb

Please sign in to comment.