Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/pascal voc converter #9

Merged
merged 5 commits into from
Aug 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,8 @@
# Contributors

- **[Victor Salimonov](https://github.com/VikTorSalimonov)**

* Documentation, screencasts

- **[Sebastián Yonekura](https://github.com/syonekura)**
* [convert_to_voc.py](cvat/utils/convert_to_voc.py) - an utility for
converting CVAT XML to PASCAL VOC data annotation format.
2 changes: 1 addition & 1 deletion cvat/requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ rq==0.10.0
scipy==1.0.1
sqlparse==0.2.4
django-sendfile==0.3.11
dj-pagination==2.3.2
dj-pagination==2.3.2
Empty file added cvat/utils/__init__.py
Empty file.
80 changes: 80 additions & 0 deletions cvat/utils/convert_to_voc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""
Given a CVAT XML and a directory with the image dataset, this script reads the
CVAT XML and writes the annotations in PASCAL VOC format into a given
directory.

This implementation only supports bounding boxes in CVAT annotation format, and
warns if it encounter any tracks or annotations that are not bounding boxes,
ignoring them in both cases.

To use the script run:

python convert_to_voc.py cvat.xml path_to_image_directory output_directory
"""
import os
import argparse
import xml.etree.ElementTree
from PIL import Image
from pascal_voc_writer import Writer
import logging

logger = logging.getLogger()
KNOWN_TAGS = {'box', 'image', 'attribute'}


def process_cvat_xml(xml_file, img_dir, annotation_dir):
"""
Transforms a single XML in CVAT format to multiple PASCAL VOC format
XMls.

:param xml_file: CVAT format XML
:param img_dir: image directory of the dataset
:param annotation_dir: directory of annotations with PASCAL VOC format
:return:
"""
os.makedirs(annotation_dir)
cvat_xml = xml.etree.ElementTree.parse(xml_file)

tracks = [(x.get('id'), x.get('label'))
for x in cvat_xml.findall('track')]
if tracks:
logger.warn('Cannot parse interpolation tracks, ignoring {} tracks'
.format(len(tracks)))

for img_tag in cvat_xml.findall('image'):
filename = img_tag.get('name')

filepath = os.path.join(img_dir, filename)
with Image.open(filepath) as img:
width, height = img.size

writer = Writer(filepath, width, height)

unknown_tags = {x.tag for x in img_tag.iter()}.difference(KNOWN_TAGS)
if unknown_tags:
logger.warn('Ignoring tags for image {}: {}'
.format(filepath, unknown_tags))

for box in img_tag.findall('box'):
label = box.get('label')
xmin = float(box.get('xtl'))
ymin = float(box.get('ytl'))
xmax = float(box.get('xbr'))
ymax = float(box.get('ybr'))

writer.addObject(label, xmin, ymin, xmax, ymax)

fname = os.path.splitext(filename)[0] + '.xml'
writer.save(os.path.join(annotation_dir, fname))


parser = argparse.ArgumentParser(description='Transforms CVAT XML to Pascal '
'VOC format')
parser.add_argument('cvat_xml', type=argparse.FileType(), help='CVAT XML file')
parser.add_argument('img_dir', help='Image directory of the dataset')
parser.add_argument('annotation_dir', help='Output directory of '
'XML annotations')

if __name__ == '__main__':
args = vars(parser.parse_args())
process_cvat_xml(args['cvat_xml'], args['img_dir'], args['annotation_dir'])
1 change: 1 addition & 0 deletions cvat/utils/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pascal-voc-writer==0.1.4
Empty file added cvat/utils/tests/__init__.py
Empty file.
179 changes: 179 additions & 0 deletions cvat/utils/tests/test_process_cvat_xml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import tempfile
import shutil
import os
from unittest import TestCase, mock
from cvat.utils.convert_to_voc import process_cvat_xml

XML_ANNOTATION_EXAMPLE = """<?xml version="1.0" encoding="utf-8"?>
<annotations>
<version>1.0</version>
<meta>
<task>
<id>1063</id>
<name>My annotation task</name>
<size>75</size>
<mode>annotation</mode>
<overlap>0</overlap>
<bugtracker></bugtracker>
<created>2018-06-06 11:57:54.807162+03:00</created>
<updated>2018-06-06 12:42:29.375251+03:00</updated>
<labels>
<label>
<name>car</name>
<attributes>
<attribute>@select=model:a,b,c,d</attribute>
</attributes>
</label>
</labels>
<segments>
<segment>
<id>3086</id>
<start>0</start>
<stop>74</stop>
<url>http://cvat.examle.com:8080/?id=3086</url>
</segment>
</segments>
<owner>
<username>admin</username>
<email></email>
</owner>
</task>
<dumped>2018-06-06 15:47:04.386866+03:00</dumped>
</meta>
<image id="0" name="C15_L1_0001.jpg">
<box label="car" xtl="38.95" ytl="26.51" xbr="140.64" ybr="54.29" occluded="0">
<attribute name="parked">false</attribute>
<attribute name="model">a</attribute>
</box>
</image>
<image id="1" name="C15_L1_0002.jpg">
<box label="car" xtl="49.13" ytl="23.34" xbr="149.54" ybr="53.88" occluded="0">
<attribute name="parked">true</attribute>
<attribute name="model">a</attribute>
</box>
</image>
<image id="2" name="C15_L1_0003.jpg">
<box label="car" xtl="50.73" ytl="30.26" xbr="146.72" ybr="59.97" occluded="0">
<attribute name="parked">false</attribute>
<attribute name="model">b</attribute>
</box>
</image>
<image id="39" name="C15_L1_0040.jpg">
<box label="car" xtl="49.60" ytl="30.15" xbr="150.19" ybr="58.06" occluded="0">
<attribute name="parked">false</attribute>
<attribute name="model">c</attribute>
</box>
<point label="car" x="30.1" y="170.4" occluded="0">
<attribute name="parked">true</attribute>
<attribute name="model">a</attribute>
</point>
</image>
</annotations>
"""
XML_INTERPOLATION_EXAMPLE = """<?xml version="1.0" encoding="utf-8"?>
<annotations>
<version>1.0</version>
<meta>
<task>
<id>1062</id>
<name>My interpolation task</name>
<size>30084</size>
<mode>interpolation</mode>
<overlap>20</overlap>
<bugtracker></bugtracker>
<created>2018-05-31 14:13:36.483219+03:00</created>
<updated>2018-06-06 13:56:32.113705+03:00</updated>
<labels>
<label>
<name>car</name>
<attributes>
<attribute>@select=model:1,2,3,4</attribute>
</attributes>
</label>
</labels>
<segments>
<segment>
<id>3085</id>
<start>0</start>
<stop>30083</stop>
<url>http://cvat.example.com:8080/?id=3085</url>
</segment>
</segments>
<owner>
<username>admin</username>
<email></email>
</owner>
</task>
<dumped>2018-06-06 15:52:11.138470+03:00</dumped>
</meta>
<track id="0" label="car">
<box frame="110" xtl="634.12" ytl="37.68" xbr="661.50" ybr="71.37" outside="0" occluded="1" keyframe="1">
<attribute name="model">1</attribute>
</box>
<box frame="111" xtl="634.21" ytl="38.50" xbr="661.59" ybr="72.19" outside="0" occluded="1" keyframe="0">
<attribute name="model">1</attribute>
</box>
<box frame="112" xtl="634.30" ytl="39.32" xbr="661.67" ybr="73.01" outside="1" occluded="1" keyframe="1">
<attribute name="model">1</attribute>
</box>
</track>
<track id="1" label="car">
<box frame="0" xtl="626.81" ytl="30.96" xbr="656.05" ybr="58.88" outside="0" occluded="0" keyframe="1">
<attribute name="model">3</attribute>
</box>
<box frame="1" xtl="626.63" ytl="31.56" xbr="655.87" ybr="59.48" outside="0" occluded="0" keyframe="0">
<attribute name="model">3</attribute>
</box>
<box frame="2" xtl="626.09" ytl="33.38" xbr="655.33" ybr="61.29" outside="1" occluded="0" keyframe="1">
<attribute name="model">3</attribute>
</box>
</track>
</annotations>
"""


class TestProcessCvatXml(TestCase):
def setUp(self):
self.test_dir = tempfile.mkdtemp()

def tearDown(self):
shutil.rmtree(self.test_dir)

@mock.patch('cvat.utils.convert_to_voc.logger')
@mock.patch('cvat.utils.convert_to_voc.Image')
def test_parse_annotation_xml(self, mock_image, mock_logger):
xml_filename = os.path.join(self.test_dir, 'annotations.xml')
with open(xml_filename, mode='x') as file:
file.write(XML_ANNOTATION_EXAMPLE)

voc_dir = os.path.join(self.test_dir, 'voc_dir')

width, height = 600, 400
img = mock.MagicMock()
img.size = width, height
mock_image.open.return_value.__enter__.return_value = img

images = ['C15_L1_0001', 'C15_L1_0002', 'C15_L1_0003', 'C15_L1_0040']
expected_xmls = [os.path.join(voc_dir, x + '.xml')
for x in images]
expected_warn = "Ignoring tags for image img_dir/C15_L1_0040.jpg: " \
"{'point'}"
process_cvat_xml(xml_filename, 'img_dir', voc_dir)
for exp in expected_xmls:
self.assertTrue(os.path.exists(exp))
mock_logger.warn.assert_called_once_with(expected_warn)

@mock.patch('cvat.utils.convert_to_voc.logger')
def test_parse_interpolation_xml(self, mock_logger):
xml_filename = os.path.join(self.test_dir, 'interpolations.xml')
with open(xml_filename, mode='x') as file:
file.write(XML_INTERPOLATION_EXAMPLE)

voc_dir = os.path.join(self.test_dir, 'voc_dir')
expected_warn = 'Cannot parse interpolation tracks, ignoring 2 tracks'

process_cvat_xml(xml_filename, 'img_dir', voc_dir)

self.assertTrue(os.path.exists(voc_dir))
self.assertTrue(len(os.listdir(voc_dir)) == 0)
mock_logger.warn.assert_called_once_with(expected_warn)