Skip to content

Commit

Permalink
Avoid unnecessary image loading in Datumaro format on save (cvat-ai#176)
Browse files Browse the repository at this point in the history
* Avoid unnecessary image loading in Datumaro format

* update changelog

* add test
  • Loading branch information
Maxim Zhiltsov authored Mar 22, 2021
1 parent 15e573b commit a572846
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- The ability to work with file names containing Cyrillic and spaces (<https://github.com/openvinotoolkit/datumaro/pull/148>)
- Image reading and saving in ICDAR formats (<https://github.com/openvinotoolkit/datumaro/pull/174>)
- Unnecessary image loading on dataset saving (<https://github.com/openvinotoolkit/datumaro/pull/176>)

### Security
-
Expand Down
3 changes: 2 additions & 1 deletion datumaro/plugins/datumaro_format/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,10 @@ def write_item(self, item):
self._context._save_image(item, path)

item_desc['image'] = {
'size': item.image.size,
'path': path,
}
if item.image.has_size: # avoid occasional loading
item_desc['image']['size'] = item.image.size
self.items.append(item_desc)

for ann in item.annotations:
Expand Down
16 changes: 14 additions & 2 deletions datumaro/util/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,9 @@ def __init__(self, data=None, path=None, loader=None, cache=None,
data = lazy_image(path, loader=loader, cache=cache)
self._data = data

if not self._size and isinstance(data, np.ndarray):
self._size = data.shape[:2]

@property
def path(self):
return self._path
Expand All @@ -261,13 +264,22 @@ def ext(self):
@property
def data(self):
if callable(self._data):
return self._data()
return self._data
data = self._data()
else:
data = self._data

if self._size is None and data is not None:
self._size = data.shape[:2]
return data

@property
def has_data(self):
return self._data is not None

@property
def has_size(self):
return self._size is not None or isinstance(self._data, np.ndarray)

@property
def size(self):
if self._size is None:
Expand Down
19 changes: 19 additions & 0 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,25 @@ def test_flushes_changes_on_save(self):

self.assertFalse(dataset.is_modified)

def test_does_not_load_images_on_saving(self):
# Issue https://github.com/openvinotoolkit/datumaro/issues/177
# Missing image metadata (size etc.) can lead to image loading on
# dataset save without image saving

called = False
def test_loader():
nonlocal called
called = True

dataset = Dataset.from_iterable([
DatasetItem(1, image=test_loader)
])

with TestDir() as test_dir:
dataset.save(test_dir)

self.assertFalse(called)


class DatasetItemTest(TestCase):
def test_ctor_requires_id(self):
Expand Down

0 comments on commit a572846

Please sign in to comment.