Skip to content

Commit

Permalink
Use subset name on task export (#8176)
Browse files Browse the repository at this point in the history
<!-- Raise an issue to propose your change
(https://github.com/cvat-ai/cvat/issues).
It helps to avoid duplication of efforts from multiple independent
contributors.
Discuss your ideas with maintainers to be sure that changes will be
approved and merged.
Read the [Contribution guide](https://docs.cvat.ai/docs/contributing/).
-->

<!-- Provide a general summary of your changes in the Title above -->

### Motivation and context
<!-- Why is this change required? What problem does it solve? If it
fixes an open
issue, please link to the issue here. Describe your changes in detail,
add
screenshots. -->
Fixes #6451

### How has this been tested?
<!-- Please describe in detail how you tested your changes.
Include details of your testing environment, and the tests you ran to
see how your change affects other areas of the code, etc. -->

### Checklist
<!-- Go over all the following points, and put an `x` in all the boxes
that apply.
If an item isn't applicable for some reason, then ~~explicitly
strikethrough~~ the whole
line. If you don't do that, GitHub will show incorrect progress for the
pull request.
If you're unsure about any of these, don't hesitate to ask. We're here
to help! -->
- [ ] I submit my changes into the `develop` branch
- [ ] I have created a changelog fragment <!-- see top comment in
CHANGELOG.md -->
- [ ] I have updated the documentation accordingly
- [ ] I have added tests to cover my changes
- [ ] I have linked related issues (see [GitHub docs](

https://help.github.com/en/github/managing-your-work-on-github/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword))
- [ ] I have increased versions of npm packages if it is necessary

([cvat-canvas](https://github.com/cvat-ai/cvat/tree/develop/cvat-canvas#versioning),

[cvat-core](https://github.com/cvat-ai/cvat/tree/develop/cvat-core#versioning),

[cvat-data](https://github.com/cvat-ai/cvat/tree/develop/cvat-data#versioning)
and

[cvat-ui](https://github.com/cvat-ai/cvat/tree/develop/cvat-ui#versioning))

### License

- [ ] I submit _my code changes_ under the same [MIT License](
https://github.com/cvat-ai/cvat/blob/develop/LICENSE) that covers the
project.
  Feel free to contact the maintainers if that's a concern.


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
- Enhanced task export functionality: Images are now organized into
folders based on subsets for better data organization.

- **Tests**
- Introduced new test methods and updated existing tests to validate the
new subset-based export functionality.
  - Added new data entries for improved test coverage and verification.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
  • Loading branch information
Eldies and zhiltsov-max authored Jul 17, 2024
1 parent 49c39ef commit 2490153
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
### Changed

- On task export, put images to folders depending on subset
(<https://github.com/cvat-ai/cvat/pull/8176>)
15 changes: 11 additions & 4 deletions cvat/apps/dataset_manager/bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ class CommonData(InstanceLabelData):
Tag = namedtuple('Tag', 'frame, label, attributes, source, group, id')
Tag.__new__.__defaults__ = (0, None)
Frame = namedtuple(
'Frame', 'idx, id, frame, name, width, height, labeled_shapes, tags, shapes, labels')
'Frame', 'idx, id, frame, name, width, height, labeled_shapes, tags, shapes, labels, subset')
Label = namedtuple('Label', 'id, name, color, type')

def __init__(self,
Expand All @@ -223,6 +223,7 @@ def __init__(self,
self._db_data = db_task.data
self._use_server_track_ids = use_server_track_ids
self._required_frames = included_frames
self._db_subset = db_task.subset

super().__init__(db_task)

Expand Down Expand Up @@ -268,6 +269,7 @@ def _init_frame_info(self):
"path": "frame_{:06d}".format(self.abs_frame_id(frame)),
"width": self._db_data.video.width,
"height": self._db_data.video.height,
"subset": self._db_subset,
} for frame in self.rel_range
}
else:
Expand All @@ -278,6 +280,7 @@ def _init_frame_info(self):
"path": db_image.path,
"width": db_image.width,
"height": db_image.height,
"subset": self._db_subset,
} for db_image in queryset
}

Expand Down Expand Up @@ -409,6 +412,7 @@ def get_frame(idx):
frames[frame] = CommonData.Frame(
idx=idx,
id=frame_info.get("id", 0),
subset=frame_info["subset"],
frame=frame,
name=frame_info["path"],
height=frame_info["height"],
Expand Down Expand Up @@ -1487,12 +1491,14 @@ def __init__(
dimension: DimensionType = DimensionType.DIM_2D,
**kwargs
):
instance_meta = instance_data.meta[instance_data.META_FIELD]
dm.SourceExtractor.__init__(
self, media_type=dm.Image if dimension == DimensionType.DIM_2D else PointCloud
self,
media_type=dm.Image if dimension == DimensionType.DIM_2D else PointCloud,
subset=instance_meta['subset'],
)
CVATDataExtractorMixin.__init__(self, **kwargs)

instance_meta = instance_data.meta[instance_data.META_FIELD]
self._categories = self._load_categories(instance_meta['labels'])
self._user = self._load_user_info(instance_meta) if dimension == DimensionType.DIM_3D else {}
self._dimension = dimension
Expand Down Expand Up @@ -1527,6 +1533,7 @@ def __init__(
dm_item = dm.DatasetItem(
id=osp.splitext(frame_data.name)[0],
annotations=dm_anno, media=dm_image,
subset=frame_data.subset,
attributes={'frame': frame_data.frame
})
elif dimension == DimensionType.DIM_3D:
Expand All @@ -1543,7 +1550,7 @@ def __init__(
dm_item = dm.DatasetItem(
id=osp.splitext(osp.split(frame_data.name)[-1])[0],
annotations=dm_anno, media=PointCloud(dm_image[0]), related_images=dm_image[1],
attributes=attributes
attributes=attributes, subset=frame_data.subset,
)

dm_items.append(dm_item)
Expand Down
2 changes: 1 addition & 1 deletion cvat/apps/quality_control/quality_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -1701,7 +1701,7 @@ def _find_gt_conflicts(self):
gt_job_dataset = self._gt_dataset

for gt_item in gt_job_dataset:
ds_item = ds_job_dataset.get(gt_item.id)
ds_item = ds_job_dataset.get(id=gt_item.id, subset=gt_item.subset)
if not ds_item:
continue # we need to compare only intersecting frames

Expand Down
28 changes: 28 additions & 0 deletions tests/python/rest_api/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# SPDX-License-Identifier: MIT

import io
import itertools
import json
import os
import os.path as osp
Expand All @@ -13,6 +14,7 @@
from http import HTTPStatus
from itertools import chain, product
from math import ceil
from operator import itemgetter
from pathlib import Path
from tempfile import NamedTemporaryFile, TemporaryDirectory
from time import sleep, time
Expand Down Expand Up @@ -807,6 +809,32 @@ def test_export_dataset_after_deleting_related_cloud_storage(self, admin_user, t
response = export_dataset(api_client.tasks_api.retrieve_dataset_endpoint, id=task["id"])
assert response.data

@pytest.mark.parametrize(
"export_format, default_subset_name, subset_path_template",
[
("Datumaro 1.0", "", "images/{subset}"),
("YOLO 1.1", "train", "obj_{subset}_data"),
],
)
def test_uses_subset_name(
self, tasks, admin_user, export_format, default_subset_name, subset_path_template
):
group_key_func = itemgetter("subset")
subsets_and_tasks = [
(subset, next(group))
for subset, group in itertools.groupby(
sorted(tasks, key=group_key_func),
key=group_key_func,
)
]
for subset_name, task in subsets_and_tasks:
response = self._test_export_task(admin_user, tid=task["id"], format=export_format)
with zipfile.ZipFile(io.BytesIO(response.data)) as zip_file:
subset_path = subset_path_template.format(subset=subset_name or default_subset_name)
assert any(
subset_path in path for path in zip_file.namelist()
), f"No {subset_path} in {zip_file.namelist()}"


@pytest.mark.usefixtures("restore_db_per_function")
@pytest.mark.usefixtures("restore_cvat_data")
Expand Down

0 comments on commit 2490153

Please sign in to comment.