Skip to content
This repository has been archived by the owner on Nov 14, 2023. It is now read-only.

Commit

Permalink
Remove previews downloading when task creating with cloud storage data (
Browse files Browse the repository at this point in the history
cvat-ai#5499)

PR removes previews downloading from CS when task creating (skipped in
PR cvat-ai#5478). In addition, I had to change the test to check for the file
name existing in the message when the specified file is not found in the
bucket, because now the preview is no longer downloaded at the stage of
creating a task.
  • Loading branch information
Marishka17 authored and mikhail-treskin committed Jul 1, 2023
1 parent c1f9ff2 commit 685452d
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 17 deletions.
12 changes: 3 additions & 9 deletions cvat/apps/engine/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,9 @@ def _add_prefix(properties):
content = list(map(_add_prefix, raw_content))
else:
sequence, content = cloud_storage_manifest.get_subset(sorted_media)
if not content:
raise ValidationError('There is no intersection of the files specified'
'in the request with the contents of the bucket')
sorted_content = (i[1] for i in sorted(zip(sequence, content)))
manifest.create(sorted_content)

Expand Down Expand Up @@ -354,8 +357,6 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False):
)

if is_data_in_cloud:
cloud_storage_instance = db_storage_to_storage_instance(db_data.cloud_storage)

manifest = ImageManifestManager(db_data.get_manifest_path())
cloud_storage_manifest = ImageManifestManager(
os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file),
Expand Down Expand Up @@ -392,13 +393,6 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False):
elif is_data_in_cloud:
sorted_media = sort(media['image'], data['sorting_method'])

# download previews from cloud storage
data_size = len(sorted_media)
segment_step, *_ = _get_task_segment_data(db_task, data_size)
for preview_frame in range(0, data_size, segment_step):
preview = sorted_media[preview_frame]
cloud_storage_instance.download_file(preview, os.path.join(upload_dir, preview))

# Define task manifest content based on cloud storage manifest content and uploaded files
_create_task_manifest_based_on_cloud_storage_manifest(
sorted_media, cloud_storage_manifest_prefix,
Expand Down
47 changes: 39 additions & 8 deletions tests/python/rest_api/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,8 @@ def _wait_until_task_is_created(api: apis.TasksApi, task_id: int) -> models.RqSt
sleep(1)
raise Exception("Cannot create task")

def _test_create_task(self, username, spec, data, content_type, **kwargs):
@staticmethod
def _test_create_task(username, spec, data, content_type, **kwargs):
with make_api_client(username) as api_client:
(task, response) = api_client.tasks_api.create(spec, **kwargs)
assert response.status == HTTPStatus.CREATED
Expand All @@ -435,7 +436,7 @@ def _test_create_task(self, username, spec, data, content_type, **kwargs):
)
assert response.status == HTTPStatus.ACCEPTED

status = self._wait_until_task_is_created(api_client.tasks_api, task.id)
status = TestPostTaskData._wait_until_task_is_created(api_client.tasks_api, task.id)
assert status.state.value == "Finished"

return task.id
Expand Down Expand Up @@ -798,16 +799,22 @@ def test_create_task_with_file_pattern(
status = self._test_cannot_create_task(self._USERNAME, task_spec, data_spec)
assert "No media data found" in status.message


@pytest.mark.usefixtures("restore_db_per_function")
@pytest.mark.usefixtures("restore_cvat_data")
class TestWorkWithTask:
_USERNAME = "admin1"

@pytest.mark.with_external_services
@pytest.mark.parametrize(
"cloud_storage_id, manifest, org",
[(1, "manifest.jsonl", "")], # public bucket
)
def test_cannot_create_task_with_mythical_cloud_storage_data(
self, cloud_storage_id, manifest, org
def test_work_with_task_containing_non_stable_cloud_storage_files(
self, cloud_storage_id, manifest, org, cloud_storages, request
):
mythical_file = "mythical.jpg"
cloud_storage_content = [mythical_file, manifest]
image_name = "image_case_65_1.png"
cloud_storage_content = [image_name, manifest]

task_spec = {
"name": f"Task with mythical file from cloud storage {cloud_storage_id}",
Expand All @@ -821,8 +828,32 @@ def test_cannot_create_task_with_mythical_cloud_storage_data(
"server_files": cloud_storage_content,
}

status = self._test_cannot_create_task(self._USERNAME, task_spec, data_spec, org=org)
assert mythical_file in status.message
task_id = TestPostTaskData._test_create_task(
self._USERNAME, task_spec, data_spec, content_type="application/json", org=org
)

# save image from the "public" bucket and remove it temporary

s3_client = s3.make_client()
bucket_name = cloud_storages[cloud_storage_id]["resource"]

image = s3_client.download_fileobj(bucket_name, image_name)
s3_client.remove_file(bucket_name, image_name)
request.addfinalizer(
partial(s3_client.create_file, bucket=bucket_name, filename=image_name, data=image)
)

with make_api_client(self._USERNAME) as api_client:
try:
api_client.tasks_api.retrieve_data(
task_id, number=0, quality="original", type="frame"
)
raise AssertionError("Frame should not exist")
except AssertionError:
raise
except Exception as ex:
assert ex.status == HTTPStatus.NOT_FOUND
assert image_name in ex.body


@pytest.mark.usefixtures("restore_db_per_class")
Expand Down
7 changes: 7 additions & 0 deletions tests/python/shared/utils/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#
# SPDX-License-Identifier: MIT

from io import BytesIO

import boto3
from botocore.exceptions import ClientError

Expand Down Expand Up @@ -40,6 +42,11 @@ def file_exists(self, bucket: str, filename: str) -> bool:
else:
raise

def download_fileobj(self, bucket: str, key: str) -> bytes:
with BytesIO() as data:
self.client.download_fileobj(Bucket=bucket, Key=key, Fileobj=data)
return data.getvalue()


def make_client() -> S3Client:
return S3Client(
Expand Down

0 comments on commit 685452d

Please sign in to comment.