diff --git a/CHANGELOG.md b/CHANGELOG.md index 60e0d168a52..3e559538fda 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - some AI Tools were not sending responses properly () - Unable to upload annotations () - Fix build dependencies for Siammask () +- Bug: Exif orientation information handled incorrectly () ### Security - TDB diff --git a/cvat-core/src/frames.js b/cvat-core/src/frames.js index fef97285892..53279522123 100644 --- a/cvat-core/src/frames.js +++ b/cvat-core/src/frames.js @@ -66,6 +66,14 @@ value: height, writable: false, }, + /** + * task ID + * @name tid + * @type {integer} + * @memberof module:API.cvat.classes.FrameData + * @readonly + * @instance + */ tid: { value: taskID, writable: false, diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py index 3e64490281d..1a7fa04d2c7 100644 --- a/cvat/apps/engine/media_extractors.py +++ b/cvat/apps/engine/media_extractors.py @@ -9,6 +9,7 @@ import io import itertools import struct +from enum import IntEnum from abc import ABC, abstractmethod from contextlib import closing @@ -29,6 +30,20 @@ from cvat.apps.engine.mime_types import mimetypes from utils.dataset_manifest import VideoManifestManager, ImageManifestManager +ORIENTATION_EXIF_TAG = 274 + + +class ORIENTATION(IntEnum): + NORMAL_HORIZONTAL=1 + MIRROR_HORIZONTAL=2 + NORMAL_180_ROTATED=3 + MIRROR_VERTICAL=4 + MIRROR_HORIZONTAL_270_ROTATED=5 + NORMAL_90_ROTATED=6 + MIRROR_HORIZONTAL_90_ROTATED=7 + NORMAL_270_ROTATED=8 + + def get_mime(name): for type_name, type_def in MEDIA_TYPES.items(): if type_def['has_mime_type'](name): @@ -62,6 +77,27 @@ def sort(images, sorting_method=SortingMethod.LEXICOGRAPHICAL, func=None): else: raise NotImplementedError() +def image_size_within_orientation(img: Image): + orientation = img.getexif().get(ORIENTATION_EXIF_TAG, ORIENTATION.NORMAL_HORIZONTAL) + if orientation > 4: + return img.height, img.width + return img.width, img.height + +def rotate_within_exif(img: Image): + orientation = img.getexif().get(ORIENTATION_EXIF_TAG, ORIENTATION.NORMAL_HORIZONTAL) + if orientation in [ORIENTATION.NORMAL_180_ROTATED, ORIENTATION.MIRROR_VERTICAL]: + img = img.rotate(180, expand=True) + elif orientation in [ORIENTATION.NORMAL_270_ROTATED, ORIENTATION.MIRROR_HORIZONTAL_90_ROTATED]: + img = img.rotate(90, expand=True) + elif orientation in [ORIENTATION.NORMAL_90_ROTATED, ORIENTATION.MIRROR_HORIZONTAL_270_ROTATED]: + img = img.rotate(270, expand=True) + if orientation in [ + ORIENTATION.MIRROR_HORIZONTAL, ORIENTATION.MIRROR_VERTICAL, + ORIENTATION.MIRROR_HORIZONTAL_270_ROTATED ,ORIENTATION.MIRROR_HORIZONTAL_90_ROTATED, + ]: + img = img.transpose(Image.FLIP_LEFT_RIGHT) + return img + class IMediaReader(ABC): def __init__(self, source_path, step, start, stop, dimension): self._source_path = source_path @@ -85,11 +121,13 @@ def get_progress(self, pos): @staticmethod def _get_preview(obj): PREVIEW_SIZE = (256, 256) + if isinstance(obj, io.IOBase): preview = Image.open(obj) else: preview = obj preview.thumbnail(PREVIEW_SIZE) + preview = rotate_within_exif(preview) return preview.convert('RGB') @@ -173,7 +211,7 @@ def get_image_size(self, i): properties = ValidateDimension.get_pcd_properties(f) return int(properties["WIDTH"]), int(properties["HEIGHT"]) img = Image.open(self._source_path[i]) - return img.width, img.height + return image_size_within_orientation(img) def reconcile(self, source_files, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D, sorting_method=None): # FIXME @@ -314,7 +352,7 @@ def get_image_size(self, i): properties = ValidateDimension.get_pcd_properties(f) return int(properties["WIDTH"]), int(properties["HEIGHT"]) img = Image.open(io.BytesIO(self._zip_source.read(self._source_path[i]))) - return img.width, img.height + return image_size_within_orientation(img) def get_image(self, i): if self._dimension == DimensionType.DIM_3D: @@ -538,6 +576,7 @@ def __init__(self, quality, dimension=DimensionType.DIM_2D): @staticmethod def _compress_image(image_path, quality): image = image_path.to_image() if isinstance(image_path, av.VideoFrame) else Image.open(image_path) + image = rotate_within_exif(image) # Ensure image data fits into 8bit per pixel before RGB conversion as PIL clips values on conversion if image.mode == "I": # Image mode is 32bit integer pixels. diff --git a/utils/dataset_manifest/core.py b/utils/dataset_manifest/core.py index d2e9da5ff02..36d70eab927 100644 --- a/utils/dataset_manifest/core.py +++ b/utils/dataset_manifest/core.py @@ -194,14 +194,18 @@ def __iter__(self): if idx in self.range_: image = next(sources) img = Image.open(image, mode='r') + orientation = img.getexif().get(274, 1) img_name = os.path.relpath(image, self._data_dir) if self._data_dir \ else os.path.basename(image) name, extension = os.path.splitext(img_name) + width, height = img.width, img.height + if orientation > 4: + width, height = height, width image_properties = { 'name': name.replace('\\', '/'), 'extension': extension, - 'width': img.width, - 'height': img.height, + 'width': width, + 'height': height, } if self._meta and img_name in self._meta: image_properties['meta'] = self._meta[img_name]