Skip to content

Commit

Permalink
Merge pull request #9 from syonekura/feature/pascal_voc_converter
Browse files Browse the repository at this point in the history
Feature/pascal voc converter
  • Loading branch information
nmanovic committed Aug 23, 2018
2 parents c579ba2 + 52d2149 commit e7fba70
Show file tree
Hide file tree
Showing 7 changed files with 265 additions and 2 deletions.
5 changes: 4 additions & 1 deletion CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,8 @@
# Contributors

- **[Victor Salimonov](https://github.com/VikTorSalimonov)**

* Documentation, screencasts

- **[Sebastián Yonekura](https://github.com/syonekura)**
* [convert_to_voc.py](cvat/utils/convert_to_voc.py) - an utility for
converting CVAT XML to PASCAL VOC data annotation format.
2 changes: 1 addition & 1 deletion cvat/requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ rq==0.10.0
scipy==1.0.1
sqlparse==0.2.4
django-sendfile==0.3.11
dj-pagination==2.3.2
dj-pagination==2.3.2
Empty file added cvat/utils/__init__.py
Empty file.
80 changes: 80 additions & 0 deletions cvat/utils/convert_to_voc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""
Given a CVAT XML and a directory with the image dataset, this script reads the
CVAT XML and writes the annotations in PASCAL VOC format into a given
directory.
This implementation only supports bounding boxes in CVAT annotation format, and
warns if it encounter any tracks or annotations that are not bounding boxes,
ignoring them in both cases.
To use the script run:
python convert_to_voc.py cvat.xml path_to_image_directory output_directory
"""
import os
import argparse
import xml.etree.ElementTree
from PIL import Image
from pascal_voc_writer import Writer
import logging

logger = logging.getLogger()
KNOWN_TAGS = {'box', 'image', 'attribute'}


def process_cvat_xml(xml_file, img_dir, annotation_dir):
"""
Transforms a single XML in CVAT format to multiple PASCAL VOC format
XMls.
:param xml_file: CVAT format XML
:param img_dir: image directory of the dataset
:param annotation_dir: directory of annotations with PASCAL VOC format
:return:
"""
os.makedirs(annotation_dir)
cvat_xml = xml.etree.ElementTree.parse(xml_file)

tracks = [(x.get('id'), x.get('label'))
for x in cvat_xml.findall('track')]
if tracks:
logger.warn('Cannot parse interpolation tracks, ignoring {} tracks'
.format(len(tracks)))

for img_tag in cvat_xml.findall('image'):
filename = img_tag.get('name')

filepath = os.path.join(img_dir, filename)
with Image.open(filepath) as img:
width, height = img.size

writer = Writer(filepath, width, height)

unknown_tags = {x.tag for x in img_tag.iter()}.difference(KNOWN_TAGS)
if unknown_tags:
logger.warn('Ignoring tags for image {}: {}'
.format(filepath, unknown_tags))

for box in img_tag.findall('box'):
label = box.get('label')
xmin = float(box.get('xtl'))
ymin = float(box.get('ytl'))
xmax = float(box.get('xbr'))
ymax = float(box.get('ybr'))

writer.addObject(label, xmin, ymin, xmax, ymax)

fname = os.path.splitext(filename)[0] + '.xml'
writer.save(os.path.join(annotation_dir, fname))


parser = argparse.ArgumentParser(description='Transforms CVAT XML to Pascal '
'VOC format')
parser.add_argument('cvat_xml', type=argparse.FileType(), help='CVAT XML file')
parser.add_argument('img_dir', help='Image directory of the dataset')
parser.add_argument('annotation_dir', help='Output directory of '
'XML annotations')

if __name__ == '__main__':
args = vars(parser.parse_args())
process_cvat_xml(args['cvat_xml'], args['img_dir'], args['annotation_dir'])
1 change: 1 addition & 0 deletions cvat/utils/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pascal-voc-writer==0.1.4
Empty file added cvat/utils/tests/__init__.py
Empty file.
179 changes: 179 additions & 0 deletions cvat/utils/tests/test_process_cvat_xml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import tempfile
import shutil
import os
from unittest import TestCase, mock
from cvat.utils.convert_to_voc import process_cvat_xml

XML_ANNOTATION_EXAMPLE = """<?xml version="1.0" encoding="utf-8"?>
<annotations>
<version>1.0</version>
<meta>
<task>
<id>1063</id>
<name>My annotation task</name>
<size>75</size>
<mode>annotation</mode>
<overlap>0</overlap>
<bugtracker></bugtracker>
<created>2018-06-06 11:57:54.807162+03:00</created>
<updated>2018-06-06 12:42:29.375251+03:00</updated>
<labels>
<label>
<name>car</name>
<attributes>
<attribute>@select=model:a,b,c,d</attribute>
</attributes>
</label>
</labels>
<segments>
<segment>
<id>3086</id>
<start>0</start>
<stop>74</stop>
<url>http://cvat.examle.com:8080/?id=3086</url>
</segment>
</segments>
<owner>
<username>admin</username>
<email></email>
</owner>
</task>
<dumped>2018-06-06 15:47:04.386866+03:00</dumped>
</meta>
<image id="0" name="C15_L1_0001.jpg">
<box label="car" xtl="38.95" ytl="26.51" xbr="140.64" ybr="54.29" occluded="0">
<attribute name="parked">false</attribute>
<attribute name="model">a</attribute>
</box>
</image>
<image id="1" name="C15_L1_0002.jpg">
<box label="car" xtl="49.13" ytl="23.34" xbr="149.54" ybr="53.88" occluded="0">
<attribute name="parked">true</attribute>
<attribute name="model">a</attribute>
</box>
</image>
<image id="2" name="C15_L1_0003.jpg">
<box label="car" xtl="50.73" ytl="30.26" xbr="146.72" ybr="59.97" occluded="0">
<attribute name="parked">false</attribute>
<attribute name="model">b</attribute>
</box>
</image>
<image id="39" name="C15_L1_0040.jpg">
<box label="car" xtl="49.60" ytl="30.15" xbr="150.19" ybr="58.06" occluded="0">
<attribute name="parked">false</attribute>
<attribute name="model">c</attribute>
</box>
<point label="car" x="30.1" y="170.4" occluded="0">
<attribute name="parked">true</attribute>
<attribute name="model">a</attribute>
</point>
</image>
</annotations>
"""
XML_INTERPOLATION_EXAMPLE = """<?xml version="1.0" encoding="utf-8"?>
<annotations>
<version>1.0</version>
<meta>
<task>
<id>1062</id>
<name>My interpolation task</name>
<size>30084</size>
<mode>interpolation</mode>
<overlap>20</overlap>
<bugtracker></bugtracker>
<created>2018-05-31 14:13:36.483219+03:00</created>
<updated>2018-06-06 13:56:32.113705+03:00</updated>
<labels>
<label>
<name>car</name>
<attributes>
<attribute>@select=model:1,2,3,4</attribute>
</attributes>
</label>
</labels>
<segments>
<segment>
<id>3085</id>
<start>0</start>
<stop>30083</stop>
<url>http://cvat.example.com:8080/?id=3085</url>
</segment>
</segments>
<owner>
<username>admin</username>
<email></email>
</owner>
</task>
<dumped>2018-06-06 15:52:11.138470+03:00</dumped>
</meta>
<track id="0" label="car">
<box frame="110" xtl="634.12" ytl="37.68" xbr="661.50" ybr="71.37" outside="0" occluded="1" keyframe="1">
<attribute name="model">1</attribute>
</box>
<box frame="111" xtl="634.21" ytl="38.50" xbr="661.59" ybr="72.19" outside="0" occluded="1" keyframe="0">
<attribute name="model">1</attribute>
</box>
<box frame="112" xtl="634.30" ytl="39.32" xbr="661.67" ybr="73.01" outside="1" occluded="1" keyframe="1">
<attribute name="model">1</attribute>
</box>
</track>
<track id="1" label="car">
<box frame="0" xtl="626.81" ytl="30.96" xbr="656.05" ybr="58.88" outside="0" occluded="0" keyframe="1">
<attribute name="model">3</attribute>
</box>
<box frame="1" xtl="626.63" ytl="31.56" xbr="655.87" ybr="59.48" outside="0" occluded="0" keyframe="0">
<attribute name="model">3</attribute>
</box>
<box frame="2" xtl="626.09" ytl="33.38" xbr="655.33" ybr="61.29" outside="1" occluded="0" keyframe="1">
<attribute name="model">3</attribute>
</box>
</track>
</annotations>
"""


class TestProcessCvatXml(TestCase):
def setUp(self):
self.test_dir = tempfile.mkdtemp()

def tearDown(self):
shutil.rmtree(self.test_dir)

@mock.patch('cvat.utils.convert_to_voc.logger')
@mock.patch('cvat.utils.convert_to_voc.Image')
def test_parse_annotation_xml(self, mock_image, mock_logger):
xml_filename = os.path.join(self.test_dir, 'annotations.xml')
with open(xml_filename, mode='x') as file:
file.write(XML_ANNOTATION_EXAMPLE)

voc_dir = os.path.join(self.test_dir, 'voc_dir')

width, height = 600, 400
img = mock.MagicMock()
img.size = width, height
mock_image.open.return_value.__enter__.return_value = img

images = ['C15_L1_0001', 'C15_L1_0002', 'C15_L1_0003', 'C15_L1_0040']
expected_xmls = [os.path.join(voc_dir, x + '.xml')
for x in images]
expected_warn = "Ignoring tags for image img_dir/C15_L1_0040.jpg: " \
"{'point'}"
process_cvat_xml(xml_filename, 'img_dir', voc_dir)
for exp in expected_xmls:
self.assertTrue(os.path.exists(exp))
mock_logger.warn.assert_called_once_with(expected_warn)

@mock.patch('cvat.utils.convert_to_voc.logger')
def test_parse_interpolation_xml(self, mock_logger):
xml_filename = os.path.join(self.test_dir, 'interpolations.xml')
with open(xml_filename, mode='x') as file:
file.write(XML_INTERPOLATION_EXAMPLE)

voc_dir = os.path.join(self.test_dir, 'voc_dir')
expected_warn = 'Cannot parse interpolation tracks, ignoring 2 tracks'

process_cvat_xml(xml_filename, 'img_dir', voc_dir)

self.assertTrue(os.path.exists(voc_dir))
self.assertTrue(len(os.listdir(voc_dir)) == 0)
mock_logger.warn.assert_called_once_with(expected_warn)

0 comments on commit e7fba70

Please sign in to comment.