Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added DICOM conversion script #3095

Merged
merged 5 commits into from
Apr 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Documentation on mask annotation (<https://github.com/openvinotoolkit/cvat/pull/3044>)
- Hotkeys to switch a label of existing object or to change default label (for objects created with N) (<https://github.com/openvinotoolkit/cvat/pull/3070>)
- A script to convert some kinds of DICOM files to regular images (<https://github.com/openvinotoolkit/cvat/pull/3095>)

### Changed

Expand Down
21 changes: 21 additions & 0 deletions utils/dicom_converter/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Description

The script is used to convert some kinds of DICOM data to regular images.
Then you can annotate these images on CVAT and get a segmentation mask.
The conversion script was tested on CT, MT and some multi-frame DICOM data.
DICOM files with series (multi-frame) are saved under the same name with a number postfix: 001, 002, 003, etc.

# Installation

```bash
python3 -m venv .env
. .env/bin/activate
pip install -r requirements.txt
```

# Running
Copy link
Contributor

@nmanovic nmanovic Apr 20, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better to provide an example for a real dataset (minor comment)


```
. .env/bin/activate # if not activated
python script.py input_data output_data
```
4 changes: 4 additions & 0 deletions utils/dicom_converter/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
numpy==1.20.2
Pillow==8.2.0
pydicom==2.1.2
tqdm==4.60.0
113 changes: 113 additions & 0 deletions utils/dicom_converter/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Copyright (C) 2021 Intel Corporation
#
# SPDX-License-Identifier: MIT


import os
import argparse
import logging
from glob import glob

import numpy as np
from tqdm import tqdm
from PIL import Image
from pydicom import dcmread
from pydicom.pixel_data_handlers.util import convert_color_space


# Script configuration
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
parser = argparse.ArgumentParser(description='The script is used to convert some kinds of DICOM (.dcm) files to regular image files (.png)')
parser.add_argument('input', type=str, help='A root directory with medical data files in DICOM format. The script finds all these files based on their extension')
parser.add_argument('output', type=str, help='Where to save converted files. The script repeats internal directories structure of the input root directory')
args = parser.parse_args()


class Converter:
def __init__(self, filename):
with dcmread(filename) as ds:
self._pixel_array = ds.pixel_array
self._photometric_interpretation = ds.PhotometricInterpretation
self._min_value = ds.pixel_array.min()
self._max_value = ds.pixel_array.max()
self._depth = ds.BitsStored

logging.debug('File: {}'.format(filename))
logging.debug('Photometric interpretation: {}'.format(self._photometric_interpretation))
logging.debug('Min value: {}'.format(self._min_value))
logging.debug('Max value: {}'.format(self._max_value))
logging.debug('Depth: {}'.format(self._depth))

try:
self._length = ds["NumberOfFrames"].value
except KeyError:
self._length = 1

def __len__(self):
return self._length

def __iter__(self):
if self._length == 1:
self._pixel_array = np.expand_dims(self._pixel_array, axis=0)

for pixel_array in self._pixel_array:
# Normalization to an output range 0..255, 0..65535
pixel_array = pixel_array - self._min_value
pixel_array = pixel_array.astype(int) * (2 ** self._depth - 1)
pixel_array = pixel_array // (self._max_value - self._min_value)

# In some cases we need to convert colors additionally
if 'YBR' in self._photometric_interpretation:
pixel_array = convert_color_space(pixel_array, self._photometric_interpretation, 'RGB')

if self._depth == 8:
image = Image.fromarray(pixel_array.astype(np.uint8))
elif self._depth == 16:
image = Image.fromarray(pixel_array.astype(np.uint16))
else:
raise Exception('Not supported depth {}'.format(self._depth))

yield image


def main(root_dir, output_root_dir):
dicom_files = glob(os.path.join(root_dir, '**', '*.dcm'), recursive = True)
if not len(dicom_files):
logging.info('DICOM files are not found under the specified path')
else:
logging.info('Number of found DICOM files: ' + str(len(dicom_files)))

pbar = tqdm(dicom_files)
for input_filename in pbar:
pbar.set_description('Conversion: ' + input_filename)
input_basename = os.path.basename(input_filename)

output_subpath = os.path.relpath(os.path.dirname(input_filename), root_dir)
output_path = os.path.join(output_root_dir, output_subpath)
output_basename = '{}.png'.format(os.path.splitext(input_basename)[0])
output_filename = os.path.join(output_path, output_basename)

if not os.path.exists(output_path):
os.makedirs(output_path)

try:
iterated_converter = Converter(input_filename)
length = len(iterated_converter)
for i, image in enumerate(iterated_converter):
if length == 1:
image.save(output_filename)
else:
filename_index = str(i).zfill(len(str(length)))
list_output_filename = '{}_{}.png'.format(os.path.splitext(output_filename)[0], filename_index)
image.save(list_output_filename)
except Exception as ex:
logging.error('Error while processing ' + input_filename)
logging.error(ex)

if __name__ == '__main__':
input_root_path = os.path.abspath(args.input.rstrip(os.sep))
output_root_path = os.path.abspath(args.output.rstrip(os.sep))

logging.info('From: {}'.format(input_root_path))
logging.info('To: {}'.format(output_root_path))
main(input_root_path, output_root_path)