Skip to content

Commit

Permalink
Dependencies: Update to disk-objectstore~=1.0 (#6132)
Browse files Browse the repository at this point in the history
* Update `DiskObjectStoreRepositoryBackend.get_info` to use the
  dataclasses returned by `count_objects` and `get_total_size`
  directly.
* Change `DiskObjectStoreRepositoryBackend.maintain` to always call
  `clean_storage`, even during live operation of the container.
* Change `DiskObjectStoreRepositoryBackend.maintain` to now pass
  `CompressMode.AUTO` when `compress` is set to `True`.
  • Loading branch information
sphuber authored Oct 17, 2023
1 parent 3e1f873 commit 56f9f6c
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 43 deletions.
45 changes: 20 additions & 25 deletions aiida/repository/backend/disk_object_store.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
"""Implementation of the ``AbstractRepositoryBackend`` using the ``disk-objectstore`` as the backend."""
import contextlib
import dataclasses
import shutil
import typing as t

Expand Down Expand Up @@ -156,24 +157,29 @@ def maintain( # type: ignore[override] # pylint: disable=arguments-differ,too-ma
) -> dict:
"""Performs maintenance operations.
:param live:if True, will only perform operations that are safe to do while the repository is in use.
:param pack_loose:flag for forcing the packing of loose files.
:param do_repack:flag for forcing the re-packing of already packed files.
:param clean_storage:flag for forcing the cleaning of soft-deleted files from the repository.
:param do_vacuum:flag for forcing the vacuuming of the internal database when cleaning the repository.
:param compress:flag for compressing the data when packing loose files.
:return:a dictionary with information on the operations performed.
:param live: if True, will only perform operations that are safe to do while the repository is in use.
:param pack_loose: flag for forcing the packing of loose files.
:param do_repack: flag for forcing the re-packing of already packed files.
:param clean_storage: flag for forcing the cleaning of soft-deleted files from the repository.
:param do_vacuum: flag for forcing the vacuuming of the internal database when cleaning the repository.
:param compress: flag for compressing the data when packing loose files. Set to ``Compress.AUTO`` if ``True``.
:return: a dictionary with information on the operations performed.
"""
from disk_objectstore import CompressMode

if live and (do_repack or clean_storage or do_vacuum):
overrides = {'do_repack': do_repack, 'clean_storage': clean_storage, 'do_vacuum': do_vacuum}
keys = ', '.join([key for key, override in overrides.items() if override is True]) # type: ignore
raise ValueError(f'The following overrides were enabled but cannot be if `live=True`: {keys}')

pack_loose = True if pack_loose is None else pack_loose

if compress is True:
compress = CompressMode.AUTO

if live:
do_repack = False
clean_storage = False
clean_storage = True if clean_storage is None else clean_storage
do_vacuum = False
else:
do_repack = True if do_repack is None else do_repack
Expand All @@ -182,15 +188,15 @@ def maintain( # type: ignore[override] # pylint: disable=arguments-differ,too-ma

with self._container as container:
if pack_loose:
files_numb = container.count_objects()['loose']
files_size = container.get_total_size()['total_size_loose'] * BYTES_TO_MB
files_numb = container.count_objects().loose
files_size = container.get_total_size().total_size_loose * BYTES_TO_MB
logger.report(f'Packing all loose files ({files_numb} files occupying {files_size} MB) ...')
if not dry_run:
container.pack_all_loose(compress=compress)

if do_repack:
files_numb = container.count_objects()['packed']
files_size = container.get_total_size()['total_size_packfiles_on_disk'] * BYTES_TO_MB
files_numb = container.count_objects().packed
files_size = container.get_total_size().total_size_packfiles_on_disk * BYTES_TO_MB
logger.report(f'Re-packing all pack files ({files_numb} files in packs, occupying {files_size} MB) ...')
if not dry_run:
container.repack()
Expand All @@ -211,24 +217,13 @@ def get_info( # type: ignore[override] # pylint: disable=arguments-differ
with self._container as container:
output_info['SHA-hash algorithm'] = container.hash_type
output_info['Compression algorithm'] = container.compression_algorithm
output_info['Objects'] = dataclasses.asdict(container.count_objects())

if not detailed:
return output_info

files_data = container.count_objects()
size_data = container.get_total_size()

output_info['Packs'] = files_data['pack_files']

output_info['Objects'] = {
'unpacked': files_data['loose'],
'packed': files_data['packed'],
}

output_info['Size (MB)'] = {
'unpacked': size_data['total_size_loose'] * BYTES_TO_MB,
'packed': size_data['total_size_packfiles_on_disk'] * BYTES_TO_MB,
'other': size_data['total_size_packindexes_on_disk'] * BYTES_TO_MB,
k: float(f'{v * BYTES_TO_MB:.2f}') for k, v in dataclasses.asdict(container.get_total_size()).items()
}

return output_info
2 changes: 1 addition & 1 deletion aiida/storage/psql_dos/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def get_repository(self) -> 'DiskObjectStoreRepositoryBackend':

from aiida.repository.backend import DiskObjectStoreRepositoryBackend

container = Container(str(get_filepath_container(self.profile)))
container = Container(get_filepath_container(self.profile))
return DiskObjectStoreRepositoryBackend(container=container)

@property
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ dependencies:
- circus~=0.18.0
- click-spinner~=0.1.8
- click~=8.1
- disk-objectstore~=0.6.0
- disk-objectstore~=1.0
- docstring_parser
- get-annotations~=0.1
- python-graphviz~=0.19
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ dependencies = [
"circus~=0.18.0",
"click-spinner~=0.1.8",
"click~=8.1",
"disk-objectstore~=0.6.0",
"disk-objectstore~=1.0",
"docstring-parser",
"get-annotations~=0.1;python_version<'3.10'",
"graphviz~=0.19",
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements-py-3.10.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ debugpy==1.6.7
decorator==5.1.1
defusedxml==0.7.1
deprecation==2.1.0
disk-objectstore==0.6.0
disk-objectstore==1.0.0
docstring-parser==0.15
docutils==0.16
emmet-core==0.57.1
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements-py-3.11.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ debugpy==1.6.7
decorator==5.1.1
defusedxml==0.7.1
deprecation==2.1.0
disk-objectstore==0.6.0
disk-objectstore==1.0.0
docstring-parser==0.15
docutils==0.16
emmet-core==0.57.1
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements-py-3.9.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ debugpy==1.6.7
decorator==5.1.1
defusedxml==0.7.1
deprecation==2.1.0
disk-objectstore==0.6.0
disk-objectstore==1.0.0
docstring-parser==0.15
docutils==0.16
emmet-core==0.57.1
Expand Down
22 changes: 10 additions & 12 deletions tests/repository/backend/test_disk_object_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,26 +224,24 @@ def test_get_info(populated_repository):
assert repository_info['SHA-hash algorithm'] == 'sha256'
assert repository_info['Compression algorithm'] == 'zlib+1'

assert 'Packs' in repository_info
assert repository_info['Packs'] == 1

assert 'Objects' in repository_info
assert 'unpacked' in repository_info['Objects']
assert 'pack_files' in repository_info['Objects']
assert 'loose' in repository_info['Objects']
assert 'packed' in repository_info['Objects']
assert repository_info['Objects']['unpacked'] == 2
assert repository_info['Objects']['pack_files'] == 1
assert repository_info['Objects']['loose'] == 1
assert repository_info['Objects']['packed'] == 3

assert 'Size (MB)' in repository_info
assert 'unpacked' in repository_info['Size (MB)']
assert 'packed' in repository_info['Size (MB)']
assert 'other' in repository_info['Size (MB)']
assert 'total_size_loose' in repository_info['Size (MB)']
assert 'total_size_packed' in repository_info['Size (MB)']


#yapf: disable
@pytest.mark.parametrize(('kwargs', 'output_info'), (
(
{'live': True},
{'unpacked': 2, 'packed': 4}
{'unpacked': 0, 'packed': 4}
),
(
{'live': False},
Expand All @@ -265,16 +263,16 @@ def test_get_info(populated_repository):
'clean_storage': False,
'do_vacuum': False,
},
{'unpacked': 2, 'packed': 3}
{'unpacked': 1, 'packed': 3}
),
))
# yapf: enable
def test_maintain(populated_repository, kwargs, output_info):
"""Test the ``maintain`` method."""
populated_repository.maintain(**kwargs)
file_info = populated_repository._container.count_objects() # pylint: disable=protected-access
assert file_info['loose'] == output_info['unpacked']
assert file_info['packed'] == output_info['packed']
assert file_info.loose == output_info['unpacked']
assert file_info.packed == output_info['packed']


@pytest.mark.parametrize('do_vacuum', [True, False])
Expand Down

0 comments on commit 56f9f6c

Please sign in to comment.