From 58823d455b4c19bfa81694ff5d6785e572ce6f95 Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Thu, 1 Dec 2022 17:15:16 -0800 Subject: [PATCH 01/24] First commit --- .../loggers/remote_uploader_downloader.py | 13 ++- composer/utils/__init__.py | 5 +- composer/utils/file_helpers.py | 9 +- composer/utils/object_store/__init__.py | 6 +- .../utils/object_store/oci_object_store.py | 96 +++++++++++++++++++ setup.py | 4 + 6 files changed, 123 insertions(+), 10 deletions(-) create mode 100644 composer/utils/object_store/oci_object_store.py diff --git a/composer/loggers/remote_uploader_downloader.py b/composer/loggers/remote_uploader_downloader.py index 7231888800..f8e37be1e7 100644 --- a/composer/loggers/remote_uploader_downloader.py +++ b/composer/loggers/remote_uploader_downloader.py @@ -22,8 +22,8 @@ from composer.loggers.logger import Logger from composer.loggers.logger_destination import LoggerDestination -from composer.utils import (LibcloudObjectStore, ObjectStore, ObjectStoreTransientError, S3ObjectStore, SFTPObjectStore, - dist, format_name_with_dist, get_file, retry) +from composer.utils import (LibcloudObjectStore, ObjectStore, ObjectStoreTransientError, OCIObjectStore, S3ObjectStore, + SFTPObjectStore, dist, format_name_with_dist, get_file, retry) if TYPE_CHECKING: from composer.core import State @@ -34,7 +34,12 @@ def _build_remote_backend(remote_backend_name: str, backend_kwargs: Dict[str, Any]): - remote_backend_name_to_cls = {'s3': S3ObjectStore, 'sftp': SFTPObjectStore, 'libcloud': LibcloudObjectStore} + remote_backend_name_to_cls = { + 's3': S3ObjectStore, + 'oci': OCIObjectStore, + 'sftp': SFTPObjectStore, + 'libcloud': LibcloudObjectStore + } remote_backend_cls = remote_backend_name_to_cls.get(remote_backend_name, None) if remote_backend_cls is None: raise ValueError( @@ -220,7 +225,7 @@ def __init__(self, parsed_remote_bucket = urlparse(bucket_uri) self.remote_backend_name, self.remote_bucket_name = parsed_remote_bucket.scheme, parsed_remote_bucket.netloc self.backend_kwargs = backend_kwargs if backend_kwargs is not None else {} - if self.remote_backend_name == 's3' and 'bucket' not in self.backend_kwargs: + if self.remote_backend_name in ['s3', 'oci'] and 'bucket' not in self.backend_kwargs: self.backend_kwargs['bucket'] = self.remote_bucket_name elif self.remote_backend_name == 'sftp' and 'host' not in self.backend_kwargs: self.backend_kwargs['host'] = f'sftp://{self.remote_bucket_name}' diff --git a/composer/utils/__init__.py b/composer/utils/__init__.py index a852579a89..3e57febfa1 100644 --- a/composer/utils/__init__.py +++ b/composer/utils/__init__.py @@ -18,8 +18,8 @@ from composer.utils.inference import ExportFormat, Transform, export_for_inference, export_with_logger, quantize_dynamic from composer.utils.iter_helpers import IteratorFileStream, ensure_tuple, map_collection from composer.utils.misc import get_free_tcp_port, is_model_deepspeed, is_model_fsdp, is_notebook, model_eval_mode -from composer.utils.object_store import (LibcloudObjectStore, ObjectStore, ObjectStoreTransientError, S3ObjectStore, - SFTPObjectStore) +from composer.utils.object_store import (LibcloudObjectStore, ObjectStore, ObjectStoreTransientError, OCIObjectStore, + S3ObjectStore, SFTPObjectStore) from composer.utils.retrying import retry from composer.utils.string_enum import StringEnum @@ -56,6 +56,7 @@ def warn_streaming_dataset_deprecation(old_version: int, new_version: int) -> No 'LibcloudObjectStore', 'S3ObjectStore', 'SFTPObjectStore', + 'OCIObjectStore', 'MissingConditionalImportError', 'import_object', 'is_model_deepspeed', diff --git a/composer/utils/file_helpers.py b/composer/utils/file_helpers.py index a930d44c86..ff55e21a9b 100644 --- a/composer/utils/file_helpers.py +++ b/composer/utils/file_helpers.py @@ -20,7 +20,7 @@ from composer.utils import dist from composer.utils.iter_helpers import iterate_with_callback -from composer.utils.object_store import ObjectStore, S3ObjectStore +from composer.utils.object_store import ObjectStore, OCIObjectStore, S3ObjectStore if TYPE_CHECKING: from composer.core import Timestamp @@ -315,7 +315,8 @@ def parse_uri(uri: str) -> Tuple[str, str, str]: Backend and bucket name will be empty string if the input is a local path """ parse_result = urlparse(uri) - backend, bucket_name, path = parse_result.scheme, parse_result.netloc, parse_result.path + backend, net_loc, path = parse_result.scheme, parse_result.netloc, parse_result.path + bucket_name = net_loc if '@' not in net_loc else net_loc.split('@')[0] if backend == '' and bucket_name == '': return backend, bucket_name, path else: @@ -342,6 +343,8 @@ def maybe_create_object_store_from_uri(uri: str) -> Optional[ObjectStore]: elif backend == 'wandb': raise NotImplementedError(f'There is no implementation for WandB load_object_store via URI. Please use ' 'WandBLogger') + elif backend == 'oci': + return OCIObjectStore(bucket=bucket_name) else: raise NotImplementedError(f'There is no implementation for the cloud backend {backend} via URI. Please use ' 's3 or one of the supported object stores') @@ -372,7 +375,7 @@ def maybe_create_remote_uploader_downloader_from_uri( warnings.warn( f'There already exists a RemoteUploaderDownloader object to handle the uri: {uri} you specified') return None - if backend == 's3': + if backend in ['s3', 'oci']: return RemoteUploaderDownloader(bucket_uri=f'{backend}://{bucket_name}') elif backend == 'wandb': diff --git a/composer/utils/object_store/__init__.py b/composer/utils/object_store/__init__.py index 57b77614a1..1c8915fba7 100644 --- a/composer/utils/object_store/__init__.py +++ b/composer/utils/object_store/__init__.py @@ -5,7 +5,11 @@ from composer.utils.object_store.libcloud_object_store import LibcloudObjectStore from composer.utils.object_store.object_store import ObjectStore, ObjectStoreTransientError +from composer.utils.object_store.oci_object_store import OCIObjectStore from composer.utils.object_store.s3_object_store import S3ObjectStore from composer.utils.object_store.sftp_object_store import SFTPObjectStore -__all__ = ['ObjectStore', 'ObjectStoreTransientError', 'LibcloudObjectStore', 'S3ObjectStore', 'SFTPObjectStore'] +__all__ = [ + 'ObjectStore', 'ObjectStoreTransientError', 'LibcloudObjectStore', 'S3ObjectStore', 'SFTPObjectStore', + 'OCIObjectStore' +] diff --git a/composer/utils/object_store/oci_object_store.py b/composer/utils/object_store/oci_object_store.py new file mode 100644 index 0000000000..b61c683529 --- /dev/null +++ b/composer/utils/object_store/oci_object_store.py @@ -0,0 +1,96 @@ +# Copyright 2022 MosaicML Composer authors +# SPDX-License-Identifier: Apache-2.0 + +"""S3-Compatible object store.""" + +from __future__ import annotations + +import os +import pathlib +import uuid +from typing import Callable, Optional, Union + +from composer.utils.import_helpers import MissingConditionalImportError +from composer.utils.object_store.object_store import ObjectStore + +__all__ = ['OCIObjectStore'] + + +class OCIObjectStore(ObjectStore): + """Utility for uploading to and downloading from an OCI bucket. + + Args: + bucket (str): The bucket name. + prefix (str): A path prefix such as `folder/subfolder/` to prepend to object names. Defaults to ''. + """ + + def __init__( + self, + bucket: str, + prefix: str = '', + ) -> None: + try: + import oci + except ImportError as e: + raise MissingConditionalImportError(conda_package='oci', + extra_deps_group='oci', + conda_channel='conda-forge') from e + + # Format paths + self.bucket = bucket.strip('/') + self.prefix = prefix.strip('/') + if self.prefix: + self.prefix += '/' + + config = oci.config.from_file() + self.client = oci.object_storage.ObjectStorageClient(config=config, + retry_strategy=oci.retry.DEFAULT_RETRY_STRATEGY) + self.namespace = self.client.get_namespace().data + + def get_uri(self, object_name: str) -> str: + return f'oci://{self.bucket}/{object_name}' + + def get_object_size(self, object_name: str) -> int: + objects = self.client.list_objects(self.namespace, self.bucket, prefix=object_name, fields='size').data.objects + relevant_objects = [obj for obj in objects if obj.name == object_name] + if relevant_objects: + obj = relevant_objects.pop() + return obj.size + else: + raise FileNotFoundError(f'Unable to locate oci://{self.bucket}@{self.namespace}/{object_name}') + + def upload_object( + self, + object_name: str, + filename: Union[str, pathlib.Path], + callback: Optional[Callable[[int, int], None]] = None, + ): + del callback + with open(filename, 'rb') as f: + self.client.put_object(self.namespace, bucket_name=self.bucket, object_name=object_name, put_object_body=f) + + def download_object( + self, + object_name: str, + filename: Union[str, pathlib.Path], + overwrite: bool = False, + callback: Optional[Callable[[int, int], None]] = None, + ): + del callback + if os.path.exists(filename) and not overwrite: + raise FileExistsError(f'The file at {filename} already exists') + tmp_path = str(filename) + f'.{uuid.uuid4()}.tmp' + + response = self.client.get_object( + self.namespace, + bucket_name=self.bucket, + object_name=object_name, + ) + + with open(tmp_path, 'wb') as f: + f.write(response.data.content) + + if overwrite: + os.replace(tmp_path, filename) + else: + os.rename(tmp_path, filename) diff --git a/setup.py b/setup.py index 5714a21bf6..3a33383f45 100644 --- a/setup.py +++ b/setup.py @@ -189,6 +189,10 @@ def package_files(prefix: str, directory: str, extension: str): 'apache-libcloud>=3.3.1,<4', ] +extra_deps['oci'] = [ + 'oci>=oci-2.88.2', +] + extra_deps['onnx'] = [ 'onnx>=1.12.0,<2', 'onnxruntime>=1.12.1,<2', From 175e7f1613709d57b55a8b3c7085c4f424054a43 Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Fri, 2 Dec 2022 17:18:56 -0800 Subject: [PATCH 02/24] Use UploadManager.upload_file instead of put_object --- composer/utils/object_store/oci_object_store.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/composer/utils/object_store/oci_object_store.py b/composer/utils/object_store/oci_object_store.py index b61c683529..f58a9e8119 100644 --- a/composer/utils/object_store/oci_object_store.py +++ b/composer/utils/object_store/oci_object_store.py @@ -46,6 +46,7 @@ def __init__( self.client = oci.object_storage.ObjectStorageClient(config=config, retry_strategy=oci.retry.DEFAULT_RETRY_STRATEGY) self.namespace = self.client.get_namespace().data + self.upload_manager = oci.object_storage.UploadManager(self.client) def get_uri(self, object_name: str) -> str: return f'oci://{self.bucket}/{object_name}' @@ -66,8 +67,11 @@ def upload_object( callback: Optional[Callable[[int, int], None]] = None, ): del callback - with open(filename, 'rb') as f: - self.client.put_object(self.namespace, bucket_name=self.bucket, object_name=object_name, put_object_body=f) + + self.upload_manager.upload_file(namespace_name=self.namespace, + bucket_name=self.bucket, + object_name=object_name, + file_path=filename) def download_object( self, From 964fc1e761242c12685853cc07ab30162eef5f24 Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Mon, 5 Dec 2022 16:10:32 -0800 Subject: [PATCH 03/24] Add tests for OCI --- .../utils/object_store/oci_object_store.py | 2 +- .../object_store/object_store_settings.py | 4 +- .../object_store/test_oci_object_store.py | 78 +++++++++++++++++++ 3 files changed, 81 insertions(+), 3 deletions(-) create mode 100644 tests/utils/object_store/test_oci_object_store.py diff --git a/composer/utils/object_store/oci_object_store.py b/composer/utils/object_store/oci_object_store.py index f58a9e8119..3927a91b41 100644 --- a/composer/utils/object_store/oci_object_store.py +++ b/composer/utils/object_store/oci_object_store.py @@ -86,7 +86,7 @@ def download_object( tmp_path = str(filename) + f'.{uuid.uuid4()}.tmp' response = self.client.get_object( - self.namespace, + namespace_name=self.namespace, bucket_name=self.bucket, object_name=object_name, ) diff --git a/tests/utils/object_store/object_store_settings.py b/tests/utils/object_store/object_store_settings.py index 308431dd3e..1597edebec 100644 --- a/tests/utils/object_store/object_store_settings.py +++ b/tests/utils/object_store/object_store_settings.py @@ -14,7 +14,7 @@ import composer.utils.object_store import composer.utils.object_store.sftp_object_store -from composer.utils.object_store import LibcloudObjectStore, ObjectStore, S3ObjectStore, SFTPObjectStore +from composer.utils.object_store import LibcloudObjectStore, ObjectStore, S3ObjectStore, SFTPObjectStore, OCIObjectStore from composer.utils.object_store.sftp_object_store import SFTPObjectStore from tests.common import get_module_subclasses @@ -54,7 +54,7 @@ object_stores = [ pytest.param(x, marks=_object_store_marks[x], id=x.__name__) - for x in get_module_subclasses(composer.utils.object_store, ObjectStore) + for x in get_module_subclasses(composer.utils.object_store, ObjectStore) if not issubclass(x, OCIObjectStore) ] diff --git a/tests/utils/object_store/test_oci_object_store.py b/tests/utils/object_store/test_oci_object_store.py new file mode 100644 index 0000000000..52c9aa3de0 --- /dev/null +++ b/tests/utils/object_store/test_oci_object_store.py @@ -0,0 +1,78 @@ +import pytest +from unittest.mock import MagicMock +from composer.utils import OCIObjectStore +from pathlib import Path + + +@pytest.fixture +def setup_oci_mocks(monkeypatch): + oci = pytest.importorskip('oci') + mock_config = MagicMock() + monkeypatch.setattr(oci.config, 'from_file', mock_config) + mock_object_storage_client = MagicMock() + monkeypatch.setattr(oci.object_storage, 'ObjectStorageClient', mock_object_storage_client) + mock_upload_manager = MagicMock() + monkeypatch.setattr(oci.object_storage, 'UploadManager', mock_upload_manager) + + +class TestOCIObjectStoreUnit: + + @classmethod + def setup_class(cls): + cls.mock_bucket_name = 'my_bucket' + cls.mock_namespace = 'my_namespace' + + cls.oci_os = OCIObjectStore(cls.mock_bucket_name) + cls.oci_os.namespace = cls.mock_namespace + + def test_upload_object(self, setup_oci_mocks, monkeypatch, tmp_path): + mock_object_name = 'my_object' + + # oci_os = OCIObjectStore(mock_bucket_name) + self.oci_os.namespace = self.mock_namespace + mock_upload_file = MagicMock() + monkeypatch.setattr(self.oci_os.upload_manager, 'upload_file', mock_upload_file) + file_to_upload = str(tmp_path / Path('my_upload.bin')) + with open(file_to_upload, 'wb') as f: + f.write(bytes(range(20))) + + self.oci_os.upload_object(object_name=mock_object_name, filename=file_to_upload) + mock_upload_file.assert_called_once_with(namespace_name=self.mock_namespace, + bucket_name=self.mock_bucket_name, + object_name=mock_object_name, + file_path=file_to_upload) + + + def test_download_object(self, monkeypatch, tmp_path): + mock_object_name = 'my_object' + mock_response_object = MagicMock() + file_content = bytes(range(4)) + mock_response_object.data.content = file_content + mock_get_object = MagicMock(return_value=mock_response_object) + monkeypatch.setattr(self.oci_os.client, 'get_object', mock_get_object) + file_to_download_to = str(tmp_path / Path('my_download.bin')) + + self.oci_os.download_object(object_name=mock_object_name, filename=file_to_download_to) + mock_get_object.assert_called_once_with(namespace_name=self.mock_namespace, + bucket_name=self.mock_bucket_name, + object_name=mock_object_name) + + with open(file_to_download_to, 'rb') as f: + actual_content = f.readline() + assert actual_content == file_content + + def test_get_object_size(self, monkeypatch): + mock_object_name = 'my_object' + mock_object_size = 11 + mock_object_1, mock_object_2 = MagicMock(), MagicMock() + mock_object_1.name = mock_object_name + mock_object_2.name = 'foobar' + mock_object_1.size = mock_object_size + mock_object_2.size = 3 + + mock_list_objects_return = MagicMock() + mock_list_objects_return.data.objects = [mock_object_1, mock_object_2] + mock_list_objects_fn = MagicMock(return_value=mock_list_objects_return) + monkeypatch.setattr(self.oci_os.client, 'list_objects', mock_list_objects_fn) + + assert self.oci_os.get_object_size(mock_object_name) == mock_object_size \ No newline at end of file From dff9af7f586498af41c80209ad2ad7f8a062c0df Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Mon, 5 Dec 2022 16:11:54 -0800 Subject: [PATCH 04/24] pre-commit --- .../object_store/object_store_settings.py | 5 +++-- .../object_store/test_oci_object_store.py | 22 +++++++++++-------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/tests/utils/object_store/object_store_settings.py b/tests/utils/object_store/object_store_settings.py index 1597edebec..e13bffe4d8 100644 --- a/tests/utils/object_store/object_store_settings.py +++ b/tests/utils/object_store/object_store_settings.py @@ -14,7 +14,7 @@ import composer.utils.object_store import composer.utils.object_store.sftp_object_store -from composer.utils.object_store import LibcloudObjectStore, ObjectStore, S3ObjectStore, SFTPObjectStore, OCIObjectStore +from composer.utils.object_store import LibcloudObjectStore, ObjectStore, OCIObjectStore, S3ObjectStore, SFTPObjectStore from composer.utils.object_store.sftp_object_store import SFTPObjectStore from tests.common import get_module_subclasses @@ -54,7 +54,8 @@ object_stores = [ pytest.param(x, marks=_object_store_marks[x], id=x.__name__) - for x in get_module_subclasses(composer.utils.object_store, ObjectStore) if not issubclass(x, OCIObjectStore) + for x in get_module_subclasses(composer.utils.object_store, ObjectStore) + if not issubclass(x, OCIObjectStore) ] diff --git a/tests/utils/object_store/test_oci_object_store.py b/tests/utils/object_store/test_oci_object_store.py index 52c9aa3de0..e2f64ac8cc 100644 --- a/tests/utils/object_store/test_oci_object_store.py +++ b/tests/utils/object_store/test_oci_object_store.py @@ -1,7 +1,12 @@ -import pytest -from unittest.mock import MagicMock -from composer.utils import OCIObjectStore +# Copyright 2022 MosaicML Composer authors +# SPDX-License-Identifier: Apache-2.0 + from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from composer.utils import OCIObjectStore @pytest.fixture @@ -38,10 +43,9 @@ def test_upload_object(self, setup_oci_mocks, monkeypatch, tmp_path): self.oci_os.upload_object(object_name=mock_object_name, filename=file_to_upload) mock_upload_file.assert_called_once_with(namespace_name=self.mock_namespace, - bucket_name=self.mock_bucket_name, - object_name=mock_object_name, - file_path=file_to_upload) - + bucket_name=self.mock_bucket_name, + object_name=mock_object_name, + file_path=file_to_upload) def test_download_object(self, monkeypatch, tmp_path): mock_object_name = 'my_object' @@ -56,7 +60,7 @@ def test_download_object(self, monkeypatch, tmp_path): mock_get_object.assert_called_once_with(namespace_name=self.mock_namespace, bucket_name=self.mock_bucket_name, object_name=mock_object_name) - + with open(file_to_download_to, 'rb') as f: actual_content = f.readline() assert actual_content == file_content @@ -75,4 +79,4 @@ def test_get_object_size(self, monkeypatch): mock_list_objects_fn = MagicMock(return_value=mock_list_objects_return) monkeypatch.setattr(self.oci_os.client, 'list_objects', mock_list_objects_fn) - assert self.oci_os.get_object_size(mock_object_name) == mock_object_size \ No newline at end of file + assert self.oci_os.get_object_size(mock_object_name) == mock_object_size From b6a267011df6921964cbf052b18d604b54021d99 Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Mon, 5 Dec 2022 16:14:14 -0800 Subject: [PATCH 05/24] Add comment --- tests/utils/object_store/object_store_settings.py | 2 ++ tests/utils/object_store/test_oci_object_store.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/utils/object_store/object_store_settings.py b/tests/utils/object_store/object_store_settings.py index e13bffe4d8..5198389657 100644 --- a/tests/utils/object_store/object_store_settings.py +++ b/tests/utils/object_store/object_store_settings.py @@ -55,6 +55,8 @@ object_stores = [ pytest.param(x, marks=_object_store_marks[x], id=x.__name__) for x in get_module_subclasses(composer.utils.object_store, ObjectStore) + # Decided to make a separate test suite for OCI instead of use this existing one + # because it is kinda complex to work with. if not issubclass(x, OCIObjectStore) ] diff --git a/tests/utils/object_store/test_oci_object_store.py b/tests/utils/object_store/test_oci_object_store.py index e2f64ac8cc..e3ff5664f9 100644 --- a/tests/utils/object_store/test_oci_object_store.py +++ b/tests/utils/object_store/test_oci_object_store.py @@ -20,7 +20,7 @@ def setup_oci_mocks(monkeypatch): monkeypatch.setattr(oci.object_storage, 'UploadManager', mock_upload_manager) -class TestOCIObjectStoreUnit: +class TestOCIObjectStore: @classmethod def setup_class(cls): From 3368e82e725ff667faef1f9af97929a84709f09b Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Tue, 6 Dec 2022 10:43:40 -0800 Subject: [PATCH 06/24] add stub for checking oci object store with checkpointing --- tests/utils/object_store/test_oci_object_store.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/utils/object_store/test_oci_object_store.py b/tests/utils/object_store/test_oci_object_store.py index e3ff5664f9..53702bfa73 100644 --- a/tests/utils/object_store/test_oci_object_store.py +++ b/tests/utils/object_store/test_oci_object_store.py @@ -7,6 +7,7 @@ import pytest from composer.utils import OCIObjectStore +from composer.loggers import RemoteUploaderDownloader @pytest.fixture @@ -80,3 +81,7 @@ def test_get_object_size(self, monkeypatch): monkeypatch.setattr(self.oci_os.client, 'list_objects', mock_list_objects_fn) assert self.oci_os.get_object_size(mock_object_name) == mock_object_size + + +def test_checkpointing_with_oci_object_store(self, setup_oci_mocks, monkeypatch, tmp_path): + pass \ No newline at end of file From 1d5e0c3c348110b462ff15e7732fc78c6d93b481 Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Wed, 7 Dec 2022 17:05:12 -0800 Subject: [PATCH 07/24] env variable check --- composer/utils/object_store/oci_object_store.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/composer/utils/object_store/oci_object_store.py b/composer/utils/object_store/oci_object_store.py index 3927a91b41..7ed63c4c3b 100644 --- a/composer/utils/object_store/oci_object_store.py +++ b/composer/utils/object_store/oci_object_store.py @@ -42,7 +42,11 @@ def __init__( if self.prefix: self.prefix += '/' - config = oci.config.from_file() + if 'OCI_CONFIG_FILE' in os.environ: + config = oci.config.from_file(os.environ['OCI_CONFIG_FILE']) + else: + config = oci.config.from_file() + self.client = oci.object_storage.ObjectStorageClient(config=config, retry_strategy=oci.retry.DEFAULT_RETRY_STRATEGY) self.namespace = self.client.get_namespace().data From b4d66f6e0f1eedc13a55628a19f64b44127bb959 Mon Sep 17 00:00:00 2001 From: Bandish Shah Date: Wed, 7 Dec 2022 17:14:09 -0800 Subject: [PATCH 08/24] Poke CI From 20f1232c83751aae13c3795a8c3c2acd3a65fb52 Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Thu, 8 Dec 2022 14:00:35 -0800 Subject: [PATCH 09/24] Add importorskip --- tests/utils/object_store/test_oci_object_store.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/utils/object_store/test_oci_object_store.py b/tests/utils/object_store/test_oci_object_store.py index 53702bfa73..cb25fd8216 100644 --- a/tests/utils/object_store/test_oci_object_store.py +++ b/tests/utils/object_store/test_oci_object_store.py @@ -7,7 +7,6 @@ import pytest from composer.utils import OCIObjectStore -from composer.loggers import RemoteUploaderDownloader @pytest.fixture @@ -21,6 +20,8 @@ def setup_oci_mocks(monkeypatch): monkeypatch.setattr(oci.object_storage, 'UploadManager', mock_upload_manager) + +@pytest.importorskip('oci') class TestOCIObjectStore: @classmethod @@ -31,6 +32,7 @@ def setup_class(cls): cls.oci_os = OCIObjectStore(cls.mock_bucket_name) cls.oci_os.namespace = cls.mock_namespace + def test_upload_object(self, setup_oci_mocks, monkeypatch, tmp_path): mock_object_name = 'my_object' @@ -67,6 +69,7 @@ def test_download_object(self, monkeypatch, tmp_path): assert actual_content == file_content def test_get_object_size(self, monkeypatch): + oci = pytest.importorskip('oci') mock_object_name = 'my_object' mock_object_size = 11 mock_object_1, mock_object_2 = MagicMock(), MagicMock() From b638714b470616f014d5d3a92b61808453431729 Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Thu, 8 Dec 2022 15:10:15 -0800 Subject: [PATCH 10/24] add importorskip --- tests/utils/object_store/test_oci_object_store.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/utils/object_store/test_oci_object_store.py b/tests/utils/object_store/test_oci_object_store.py index cb25fd8216..d491330fc4 100644 --- a/tests/utils/object_store/test_oci_object_store.py +++ b/tests/utils/object_store/test_oci_object_store.py @@ -21,19 +21,21 @@ def setup_oci_mocks(monkeypatch): -@pytest.importorskip('oci') + class TestOCIObjectStore: + @classmethod def setup_class(cls): + oci = pytest.importorskip('oci') cls.mock_bucket_name = 'my_bucket' cls.mock_namespace = 'my_namespace' cls.oci_os = OCIObjectStore(cls.mock_bucket_name) cls.oci_os.namespace = cls.mock_namespace - def test_upload_object(self, setup_oci_mocks, monkeypatch, tmp_path): + oci = pytest.importorskip('oci') mock_object_name = 'my_object' # oci_os = OCIObjectStore(mock_bucket_name) @@ -50,7 +52,9 @@ def test_upload_object(self, setup_oci_mocks, monkeypatch, tmp_path): object_name=mock_object_name, file_path=file_to_upload) + def test_download_object(self, monkeypatch, tmp_path): + oci = pytest.importorskip('oci') mock_object_name = 'my_object' mock_response_object = MagicMock() file_content = bytes(range(4)) @@ -86,5 +90,5 @@ def test_get_object_size(self, monkeypatch): assert self.oci_os.get_object_size(mock_object_name) == mock_object_size -def test_checkpointing_with_oci_object_store(self, setup_oci_mocks, monkeypatch, tmp_path): - pass \ No newline at end of file +# def test_checkpointing_with_oci_object_store(self, setup_oci_mocks, monkeypatch, tmp_path): +# pass \ No newline at end of file From 87774714596f8866dd24f40c1ae1d66f6cea982d Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Thu, 8 Dec 2022 15:37:28 -0800 Subject: [PATCH 11/24] add mock for form_file --- composer/utils/object_store/oci_object_store.py | 2 +- .../utils/object_store/test_oci_object_store.py | 17 +++++++---------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/composer/utils/object_store/oci_object_store.py b/composer/utils/object_store/oci_object_store.py index 7ed63c4c3b..5c45e31daf 100644 --- a/composer/utils/object_store/oci_object_store.py +++ b/composer/utils/object_store/oci_object_store.py @@ -46,7 +46,7 @@ def __init__( config = oci.config.from_file(os.environ['OCI_CONFIG_FILE']) else: config = oci.config.from_file() - + self.client = oci.object_storage.ObjectStorageClient(config=config, retry_strategy=oci.retry.DEFAULT_RETRY_STRATEGY) self.namespace = self.client.get_namespace().data diff --git a/tests/utils/object_store/test_oci_object_store.py b/tests/utils/object_store/test_oci_object_store.py index d491330fc4..76f28f84d0 100644 --- a/tests/utils/object_store/test_oci_object_store.py +++ b/tests/utils/object_store/test_oci_object_store.py @@ -13,21 +13,19 @@ def setup_oci_mocks(monkeypatch): oci = pytest.importorskip('oci') mock_config = MagicMock() - monkeypatch.setattr(oci.config, 'from_file', mock_config) + mock_from_file = MagicMock(return_value=mock_config) + monkeypatch.setattr(oci.config, 'from_file', mock_from_file) mock_object_storage_client = MagicMock() monkeypatch.setattr(oci.object_storage, 'ObjectStorageClient', mock_object_storage_client) mock_upload_manager = MagicMock() monkeypatch.setattr(oci.object_storage, 'UploadManager', mock_upload_manager) - - class TestOCIObjectStore: - @classmethod def setup_class(cls): - oci = pytest.importorskip('oci') + pytest.importorskip('oci') cls.mock_bucket_name = 'my_bucket' cls.mock_namespace = 'my_namespace' @@ -35,7 +33,7 @@ def setup_class(cls): cls.oci_os.namespace = cls.mock_namespace def test_upload_object(self, setup_oci_mocks, monkeypatch, tmp_path): - oci = pytest.importorskip('oci') + pytest.importorskip('oci') mock_object_name = 'my_object' # oci_os = OCIObjectStore(mock_bucket_name) @@ -52,9 +50,8 @@ def test_upload_object(self, setup_oci_mocks, monkeypatch, tmp_path): object_name=mock_object_name, file_path=file_to_upload) - def test_download_object(self, monkeypatch, tmp_path): - oci = pytest.importorskip('oci') + pytest.importorskip('oci') mock_object_name = 'my_object' mock_response_object = MagicMock() file_content = bytes(range(4)) @@ -73,7 +70,7 @@ def test_download_object(self, monkeypatch, tmp_path): assert actual_content == file_content def test_get_object_size(self, monkeypatch): - oci = pytest.importorskip('oci') + pytest.importorskip('oci') mock_object_name = 'my_object' mock_object_size = 11 mock_object_1, mock_object_2 = MagicMock(), MagicMock() @@ -91,4 +88,4 @@ def test_get_object_size(self, monkeypatch): # def test_checkpointing_with_oci_object_store(self, setup_oci_mocks, monkeypatch, tmp_path): -# pass \ No newline at end of file +# pass From c8c41bee83e2e4b665eb22758c2bb7469521c056 Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Thu, 8 Dec 2022 16:14:09 -0800 Subject: [PATCH 12/24] fix tests? --- .../object_store/test_oci_object_store.py | 137 +++++++++--------- 1 file changed, 68 insertions(+), 69 deletions(-) diff --git a/tests/utils/object_store/test_oci_object_store.py b/tests/utils/object_store/test_oci_object_store.py index 76f28f84d0..611a384ed4 100644 --- a/tests/utils/object_store/test_oci_object_store.py +++ b/tests/utils/object_store/test_oci_object_store.py @@ -10,8 +10,14 @@ @pytest.fixture -def setup_oci_mocks(monkeypatch): +def mock_bucket_name(): + return 'my_bucket' + +@pytest.fixture +def test_oci_obj_store(mock_bucket_name, monkeypatch): oci = pytest.importorskip('oci') + + # Mock all the oci functions. mock_config = MagicMock() mock_from_file = MagicMock(return_value=mock_config) monkeypatch.setattr(oci.config, 'from_file', mock_from_file) @@ -20,72 +26,65 @@ def setup_oci_mocks(monkeypatch): mock_upload_manager = MagicMock() monkeypatch.setattr(oci.object_storage, 'UploadManager', mock_upload_manager) + # Create OCIObjectStore object. + oci_os = OCIObjectStore(mock_bucket_name) + mock_namespace = 'my_namespace' + oci_os.namespace = mock_namespace + return oci_os + + +def test_upload_object(test_oci_obj_store, monkeypatch, tmp_path, mock_bucket_name): + pytest.importorskip('oci') + oci_os = test_oci_obj_store + mock_object_name = 'my_object' + + mock_upload_file = MagicMock() + monkeypatch.setattr(oci_os.upload_manager, 'upload_file', mock_upload_file) + file_to_upload = str(tmp_path / Path('my_upload.bin')) + with open(file_to_upload, 'wb') as f: + f.write(bytes(range(20))) + + oci_os.upload_object(object_name=mock_object_name, filename=file_to_upload) + mock_upload_file.assert_called_once_with(namespace_name=oci_os.namespace, + bucket_name=mock_bucket_name, + object_name=mock_object_name, + file_path=file_to_upload) + +def test_download_object(test_oci_obj_store, monkeypatch, tmp_path, mock_bucket_name): + pytest.importorskip('oci') + oci_os = test_oci_obj_store + mock_object_name = 'my_object' + mock_response_object = MagicMock() + file_content = bytes(range(4)) + mock_response_object.data.content = file_content + mock_get_object = MagicMock(return_value=mock_response_object) + monkeypatch.setattr(oci_os.client, 'get_object', mock_get_object) + file_to_download_to = str(tmp_path / Path('my_download.bin')) + + oci_os.download_object(object_name=mock_object_name, filename=file_to_download_to) + mock_get_object.assert_called_once_with(namespace_name=oci_os.namespace, + bucket_name=mock_bucket_name, + object_name=mock_object_name) + + with open(file_to_download_to, 'rb') as f: + actual_content = f.readline() + assert actual_content == file_content + +def test_get_object_size(test_oci_obj_store, monkeypatch): + pytest.importorskip('oci') + oci_os = test_oci_obj_store + mock_object_name = 'my_object' + mock_object_size = 11 + mock_object_1, mock_object_2 = MagicMock(), MagicMock() + mock_object_1.name = mock_object_name + mock_object_2.name = 'foobar' + mock_object_1.size = mock_object_size + mock_object_2.size = 3 + + mock_list_objects_return = MagicMock() + mock_list_objects_return.data.objects = [mock_object_1, mock_object_2] + mock_list_objects_fn = MagicMock(return_value=mock_list_objects_return) + monkeypatch.setattr(oci_os.client, 'list_objects', mock_list_objects_fn) + + assert oci_os.get_object_size(mock_object_name) == mock_object_size -class TestOCIObjectStore: - - @classmethod - def setup_class(cls): - pytest.importorskip('oci') - cls.mock_bucket_name = 'my_bucket' - cls.mock_namespace = 'my_namespace' - - cls.oci_os = OCIObjectStore(cls.mock_bucket_name) - cls.oci_os.namespace = cls.mock_namespace - - def test_upload_object(self, setup_oci_mocks, monkeypatch, tmp_path): - pytest.importorskip('oci') - mock_object_name = 'my_object' - - # oci_os = OCIObjectStore(mock_bucket_name) - self.oci_os.namespace = self.mock_namespace - mock_upload_file = MagicMock() - monkeypatch.setattr(self.oci_os.upload_manager, 'upload_file', mock_upload_file) - file_to_upload = str(tmp_path / Path('my_upload.bin')) - with open(file_to_upload, 'wb') as f: - f.write(bytes(range(20))) - - self.oci_os.upload_object(object_name=mock_object_name, filename=file_to_upload) - mock_upload_file.assert_called_once_with(namespace_name=self.mock_namespace, - bucket_name=self.mock_bucket_name, - object_name=mock_object_name, - file_path=file_to_upload) - - def test_download_object(self, monkeypatch, tmp_path): - pytest.importorskip('oci') - mock_object_name = 'my_object' - mock_response_object = MagicMock() - file_content = bytes(range(4)) - mock_response_object.data.content = file_content - mock_get_object = MagicMock(return_value=mock_response_object) - monkeypatch.setattr(self.oci_os.client, 'get_object', mock_get_object) - file_to_download_to = str(tmp_path / Path('my_download.bin')) - - self.oci_os.download_object(object_name=mock_object_name, filename=file_to_download_to) - mock_get_object.assert_called_once_with(namespace_name=self.mock_namespace, - bucket_name=self.mock_bucket_name, - object_name=mock_object_name) - - with open(file_to_download_to, 'rb') as f: - actual_content = f.readline() - assert actual_content == file_content - - def test_get_object_size(self, monkeypatch): - pytest.importorskip('oci') - mock_object_name = 'my_object' - mock_object_size = 11 - mock_object_1, mock_object_2 = MagicMock(), MagicMock() - mock_object_1.name = mock_object_name - mock_object_2.name = 'foobar' - mock_object_1.size = mock_object_size - mock_object_2.size = 3 - - mock_list_objects_return = MagicMock() - mock_list_objects_return.data.objects = [mock_object_1, mock_object_2] - mock_list_objects_fn = MagicMock(return_value=mock_list_objects_return) - monkeypatch.setattr(self.oci_os.client, 'list_objects', mock_list_objects_fn) - - assert self.oci_os.get_object_size(mock_object_name) == mock_object_size - - -# def test_checkpointing_with_oci_object_store(self, setup_oci_mocks, monkeypatch, tmp_path): -# pass From 5b43de8702fa43eae92343c9f72c1e6e32cafa43 Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Thu, 8 Dec 2022 16:38:34 -0800 Subject: [PATCH 13/24] pre-commit --- tests/utils/object_store/test_oci_object_store.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/utils/object_store/test_oci_object_store.py b/tests/utils/object_store/test_oci_object_store.py index 611a384ed4..8a74ec49ce 100644 --- a/tests/utils/object_store/test_oci_object_store.py +++ b/tests/utils/object_store/test_oci_object_store.py @@ -13,6 +13,7 @@ def mock_bucket_name(): return 'my_bucket' + @pytest.fixture def test_oci_obj_store(mock_bucket_name, monkeypatch): oci = pytest.importorskip('oci') @@ -46,9 +47,10 @@ def test_upload_object(test_oci_obj_store, monkeypatch, tmp_path, mock_bucket_na oci_os.upload_object(object_name=mock_object_name, filename=file_to_upload) mock_upload_file.assert_called_once_with(namespace_name=oci_os.namespace, - bucket_name=mock_bucket_name, - object_name=mock_object_name, - file_path=file_to_upload) + bucket_name=mock_bucket_name, + object_name=mock_object_name, + file_path=file_to_upload) + def test_download_object(test_oci_obj_store, monkeypatch, tmp_path, mock_bucket_name): pytest.importorskip('oci') @@ -70,6 +72,7 @@ def test_download_object(test_oci_obj_store, monkeypatch, tmp_path, mock_bucket_ actual_content = f.readline() assert actual_content == file_content + def test_get_object_size(test_oci_obj_store, monkeypatch): pytest.importorskip('oci') oci_os = test_oci_obj_store @@ -87,4 +90,3 @@ def test_get_object_size(test_oci_obj_store, monkeypatch): monkeypatch.setattr(oci_os.client, 'list_objects', mock_list_objects_fn) assert oci_os.get_object_size(mock_object_name) == mock_object_size - From d371a9a4c0220bb9e8e380d3fb1e71df860925a6 Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Fri, 9 Dec 2022 17:09:04 -0800 Subject: [PATCH 14/24] Add note --- tests/utils/object_store/object_store_settings.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/utils/object_store/object_store_settings.py b/tests/utils/object_store/object_store_settings.py index 5198389657..ac40af9c0f 100644 --- a/tests/utils/object_store/object_store_settings.py +++ b/tests/utils/object_store/object_store_settings.py @@ -55,8 +55,7 @@ object_stores = [ pytest.param(x, marks=_object_store_marks[x], id=x.__name__) for x in get_module_subclasses(composer.utils.object_store, ObjectStore) - # Decided to make a separate test suite for OCI instead of use this existing one - # because it is kinda complex to work with. + # Note: OCI has its own test suite, so it is exempted from being included in this one. if not issubclass(x, OCIObjectStore) ] From d709d37c59610361a2f0969589c33416933fe478 Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Fri, 9 Dec 2022 17:14:47 -0800 Subject: [PATCH 15/24] typo --- tests/utils/object_store/object_store_settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/object_store/object_store_settings.py b/tests/utils/object_store/object_store_settings.py index ac40af9c0f..67e01b63f3 100644 --- a/tests/utils/object_store/object_store_settings.py +++ b/tests/utils/object_store/object_store_settings.py @@ -55,7 +55,7 @@ object_stores = [ pytest.param(x, marks=_object_store_marks[x], id=x.__name__) for x in get_module_subclasses(composer.utils.object_store, ObjectStore) - # Note: OCI has its own test suite, so it is exempted from being included in this one. + # Note: OCI has its own test suite, so it is exempt from being included in this one.`` if not issubclass(x, OCIObjectStore) ] From 9edc29a8df1d5855ad315fc55b86d9c5a41ad008 Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Fri, 9 Dec 2022 17:15:54 -0800 Subject: [PATCH 16/24] s3 -> oci --- composer/utils/object_store/oci_object_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer/utils/object_store/oci_object_store.py b/composer/utils/object_store/oci_object_store.py index 5c45e31daf..2dad9f5294 100644 --- a/composer/utils/object_store/oci_object_store.py +++ b/composer/utils/object_store/oci_object_store.py @@ -1,7 +1,7 @@ # Copyright 2022 MosaicML Composer authors # SPDX-License-Identifier: Apache-2.0 -"""S3-Compatible object store.""" +"""OCI-Compatible object store.""" from __future__ import annotations From 00f07aca90adeeefa5b552654bebe246727d5afb Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Fri, 9 Dec 2022 17:18:49 -0800 Subject: [PATCH 17/24] explictly check for empty string --- composer/utils/object_store/oci_object_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer/utils/object_store/oci_object_store.py b/composer/utils/object_store/oci_object_store.py index 2dad9f5294..dfbf55ea84 100644 --- a/composer/utils/object_store/oci_object_store.py +++ b/composer/utils/object_store/oci_object_store.py @@ -39,7 +39,7 @@ def __init__( # Format paths self.bucket = bucket.strip('/') self.prefix = prefix.strip('/') - if self.prefix: + if self.prefix != '': self.prefix += '/' if 'OCI_CONFIG_FILE' in os.environ: From 278957264410dea3b8102c65e6833eca7289c9b7 Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Fri, 9 Dec 2022 17:21:21 -0800 Subject: [PATCH 18/24] explicitly test list length --- composer/utils/object_store/oci_object_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer/utils/object_store/oci_object_store.py b/composer/utils/object_store/oci_object_store.py index dfbf55ea84..971ab14973 100644 --- a/composer/utils/object_store/oci_object_store.py +++ b/composer/utils/object_store/oci_object_store.py @@ -58,7 +58,7 @@ def get_uri(self, object_name: str) -> str: def get_object_size(self, object_name: str) -> int: objects = self.client.list_objects(self.namespace, self.bucket, prefix=object_name, fields='size').data.objects relevant_objects = [obj for obj in objects if obj.name == object_name] - if relevant_objects: + if len(relevant_objects) > 0: obj = relevant_objects.pop() return obj.size else: From 76aaa39fc15901cb03a51cee4697c0e7c5096f55 Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Fri, 9 Dec 2022 17:24:46 -0800 Subject: [PATCH 19/24] reword error --- composer/utils/object_store/oci_object_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer/utils/object_store/oci_object_store.py b/composer/utils/object_store/oci_object_store.py index 971ab14973..cf3cda1632 100644 --- a/composer/utils/object_store/oci_object_store.py +++ b/composer/utils/object_store/oci_object_store.py @@ -86,7 +86,7 @@ def download_object( ): del callback if os.path.exists(filename) and not overwrite: - raise FileExistsError(f'The file at {filename} already exists') + raise FileExistsError(f'The file at {filename} already exists and overwrite is set to False') tmp_path = str(filename) + f'.{uuid.uuid4()}.tmp' response = self.client.get_object( From 9c4e4068633e6ebe814c6489af5697dce2999c79 Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Fri, 9 Dec 2022 17:28:22 -0800 Subject: [PATCH 20/24] Adjust error message for S3ObjectStore --- composer/utils/object_store/s3_object_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer/utils/object_store/s3_object_store.py b/composer/utils/object_store/s3_object_store.py index 6348a8fa26..568af41711 100644 --- a/composer/utils/object_store/s3_object_store.py +++ b/composer/utils/object_store/s3_object_store.py @@ -141,7 +141,7 @@ def download_object( callback: Optional[Callable[[int, int], None]] = None, ): if os.path.exists(filename) and not overwrite: - raise FileExistsError(f'The file at {filename} already exists') + raise FileExistsError(f'The file at {filename} already exists and overwrite is set to False.') tmp_path = str(filename) + f'.{uuid.uuid4()}.tmp' if callback is None: cb_wrapper = None From 9ea96553f41c301667f6ece431a60331026d516d Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Fri, 9 Dec 2022 18:19:51 -0800 Subject: [PATCH 21/24] Simplify get_object_size --- .../utils/object_store/oci_object_store.py | 12 +++++++----- .../object_store/test_oci_object_store.py | 18 +++++++++--------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/composer/utils/object_store/oci_object_store.py b/composer/utils/object_store/oci_object_store.py index cf3cda1632..95df90f067 100644 --- a/composer/utils/object_store/oci_object_store.py +++ b/composer/utils/object_store/oci_object_store.py @@ -56,11 +56,13 @@ def get_uri(self, object_name: str) -> str: return f'oci://{self.bucket}/{object_name}' def get_object_size(self, object_name: str) -> int: - objects = self.client.list_objects(self.namespace, self.bucket, prefix=object_name, fields='size').data.objects - relevant_objects = [obj for obj in objects if obj.name == object_name] - if len(relevant_objects) > 0: - obj = relevant_objects.pop() - return obj.size + response = self.client.get_object( + namespace_name=self.namespace, + bucket_name=self.bucket, + object_name=object_name, + ) + if response.status == 200: + return int(response.data.headers['Content-Length']) else: raise FileNotFoundError(f'Unable to locate oci://{self.bucket}@{self.namespace}/{object_name}') diff --git a/tests/utils/object_store/test_oci_object_store.py b/tests/utils/object_store/test_oci_object_store.py index 8a74ec49ce..1396e6cd62 100644 --- a/tests/utils/object_store/test_oci_object_store.py +++ b/tests/utils/object_store/test_oci_object_store.py @@ -78,15 +78,15 @@ def test_get_object_size(test_oci_obj_store, monkeypatch): oci_os = test_oci_obj_store mock_object_name = 'my_object' mock_object_size = 11 - mock_object_1, mock_object_2 = MagicMock(), MagicMock() - mock_object_1.name = mock_object_name - mock_object_2.name = 'foobar' - mock_object_1.size = mock_object_size - mock_object_2.size = 3 - mock_list_objects_return = MagicMock() - mock_list_objects_return.data.objects = [mock_object_1, mock_object_2] - mock_list_objects_fn = MagicMock(return_value=mock_list_objects_return) - monkeypatch.setattr(oci_os.client, 'list_objects', mock_list_objects_fn) + mock_response = MagicMock() + mock_response.status = 200 + mock_response.data.headers = {'Content-Length': mock_object_size} + mock_get_object_fn = MagicMock(return_value=mock_response) + monkeypatch.setattr(oci_os.client, 'get_object', mock_get_object_fn) assert oci_os.get_object_size(mock_object_name) == mock_object_size + + mock_response.status = 400 + with pytest.raises(FileNotFoundError): + oci_os.get_object_size(mock_object_name) From 4f69849b8d7698e69940dc8023e6b483b1cb9e1b Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Fri, 9 Dec 2022 18:36:12 -0800 Subject: [PATCH 22/24] Update docs for OCI --- docs/source/trainer/checkpointing.rst | 56 ++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/docs/source/trainer/checkpointing.rst b/docs/source/trainer/checkpointing.rst index 3fc7303431..ef417c44d4 100644 --- a/docs/source/trainer/checkpointing.rst +++ b/docs/source/trainer/checkpointing.rst @@ -297,11 +297,13 @@ and then the :class:`.RemoteUploaderDownloader` logger will upload checkpoints t Behind the scenes, the :class:`.RemoteUploaderDownloader` uses :doc:`Apache Libcloud `. -The easiest way to upload checkpoints to S3 is to prefix your ``save_folder`` with ``'s3://'``. All other +The easiest way to upload checkpoints to S3 or OCI is to prefix your ``save_folder`` with ``'s3://'`` or ``'oci://'``. All other checkpoint arguments remain the same. For example, ``save_filename`` will be the name of the checkpoint file -that gets uploaded to the S3 URI that you specified. +that gets uploaded to the S3 or OCI URI that you specified. +For example, for S3: .. testcode:: + # Save checkpoints every epoch to s3://my_bucket/checkpoints from composer.trainer import Trainer trainer = Trainer( @@ -317,12 +319,36 @@ that gets uploaded to the S3 URI that you specified. trainer.fit() - This will train your model, saving the checkpoints locally, upload them to the S3 Bucket `my_bucket`, and delete the checkpoints from the local disk. The checkpoints will be located on S3 inside your bucket as `checkpoints/ep3.pt` for third epoch's checkpoints, for example. The full URI in this case would be: `s3://my_bucket/checkpoints/ep3.pt`. +Similarly for OCI: +.. testcode:: + # Save checkpoints every epoch to oci://my_bucket/checkpoints + from composer.trainer import Trainer + + trainer = Trainer( + model=model, + train_dataloader=train_dataloader, + max_duration='10ep', + save_folder='oci://my_bucket/checkpoints', + save_interval='1ep', + save_overwrite=True, + save_filename='ep{epoch}.pt', + save_num_checkpoints_to_keep=0, # delete all checkpoints locally + ) + + trainer.fit() + +This will train your model, saving the checkpoints locally, upload them to the OCI Bucket `my_bucket`, +and delete the checkpoints from the local disk. The checkpoints will be located on OCI inside your bucket as +`checkpoints/ep3.pt` for third epoch's checkpoints, for example. The full URI in this case would be: +`oci://my_bucket/checkpoints/ep3.pt`. + +There are a few additional trainer arguments which can be helpful to configure: + * ``save_num_checkpoints_to_keep``: Set this parameter to remove checkpoints from the local disk after they have been uploaded. For example, setting this parameter to 1 will only keep the latest checkpoint locally; setting it to 0 will remove each checkpoint after it has been uploaded. Checkpoints are never deleted from object stores. @@ -372,11 +398,6 @@ GCS. * :doc:`Full list of object store providers ` * :class:`~.RemoteUploaderDownloader` -There are a few additional trainer arguments which can be helpful to configure: - -* ``save_num_checkpoints_to_keep``: Set this parameter to remove checkpoints from the local disk after they have been - uploaded. For example, setting this parameter to 1 will only keep the latest checkpoint locally; setting it to 0 - will remove each checkpoint after it has been uploaded. Checkpoints are never deleted from object stores. Saving Checkpoints to Google Cloud Storage (GCS) @@ -498,8 +519,8 @@ should be the path to the checkpoint file *within the container/bucket*. new_trainer.fit() -An easier way to load checkpoints from S3 specifically is to just use a URI starting with ``s3://``. -If you use the S3 URI, it is not necessary to specify a ``load_object_store``. Note, that for other +An easier way to load checkpoints from S3 or OCI specifically is to just use a URI starting with ``s3://`` or ``oci://``. +If you use the S3 or OCI URI, it is not necessary to specify a ``load_object_store``. Note, that for other object stores like WandB or LibCloud, you must still specify a ``load_object_store``. .. testcode:: @@ -516,6 +537,21 @@ object stores like WandB or LibCloud, you must still specify a ``load_object_sto This will load the first epoch's checkpoints from S3 and resume training in the second epoch. +Similarly for OCI: + +.. testcode:: + :skipif: not _LIBCLOUD_INSTALLED + + new_trainer = Trainer( + model=model, + train_dataloader=train_dataloader, + max_duration="10ep", + load_path="oci://checkpoint-debugging/checkpoints/ep1.pt", + ) + + new_trainer.fit() + +This will load the first epoch's checkpoints from OCI and resume training in the second epoch. Loading Checkpoints from Google Cloud Storage (GCS) --------------------------------------------------- From 9520ed6d08a22b3a9bdf3864a417a87940c43fd4 Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Fri, 9 Dec 2022 18:52:47 -0800 Subject: [PATCH 23/24] fix doctest --- docs/source/doctest_fixtures.py | 2 +- docs/source/trainer/checkpointing.rst | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/source/doctest_fixtures.py b/docs/source/doctest_fixtures.py index 66732bc455..2cfbd73523 100644 --- a/docs/source/doctest_fixtures.py +++ b/docs/source/doctest_fixtures.py @@ -198,7 +198,7 @@ def _new_trainer_init(self, fake_ellipses: None = None, **kwargs: Any): kwargs['progress_bar'] = False # hide tqdm logging if 'log_to_console' not in kwargs: kwargs['log_to_console'] = False # hide console logging - if 'load_path' in kwargs and urlparse(kwargs['load_path']).scheme == 's3': + if 'load_path' in kwargs and urlparse(kwargs['load_path']).scheme in ['s3', 'oci']: kwargs['load_path'] = urlparse(kwargs['load_path']).path.lstrip('/') kwargs['load_object_store'] = LibcloudObjectStore() _original_trainer_init(self, **kwargs) diff --git a/docs/source/trainer/checkpointing.rst b/docs/source/trainer/checkpointing.rst index 9684023ba9..ebb3e84f72 100644 --- a/docs/source/trainer/checkpointing.rst +++ b/docs/source/trainer/checkpointing.rst @@ -305,9 +305,10 @@ that gets uploaded to the S3 or OCI URI that you specified. For example, for S3: .. testcode:: - # Save checkpoints every epoch to s3://my_bucket/checkpoints + from composer.trainer import Trainer + # Save checkpoints every epoch to s3://my_bucket/checkpoints trainer = Trainer( model=model, train_dataloader=train_dataloader, @@ -328,9 +329,10 @@ and delete the checkpoints from the local disk. The checkpoints will be located Similarly for OCI: .. testcode:: - # Save checkpoints every epoch to oci://my_bucket/checkpoints + from composer.trainer import Trainer + # Save checkpoints every epoch to oci://my_bucket/checkpoints trainer = Trainer( model=model, train_dataloader=train_dataloader, From 7f9484f9b91e072abe955d02d8ddf7fba70c5804 Mon Sep 17 00:00:00 2001 From: Evan Racah Date: Mon, 12 Dec 2022 17:08:23 -0800 Subject: [PATCH 24/24] fix setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c694ec0f67..314244b8a8 100644 --- a/setup.py +++ b/setup.py @@ -190,7 +190,7 @@ def package_files(prefix: str, directory: str, extension: str): ] extra_deps['oci'] = [ - 'oci>=oci-2.88.2', + 'oci>=2.88.2,<3.0.0', ] extra_deps['onnx'] = [