cvat-ai · nmanovic · Aug 23, 2018 · Jul 16, 2018 · Jul 16, 2018 · Aug 20, 2018
@@ -18,5 +18,8 @@
 # Contributors
 
 - **[Victor Salimonov](https://github.com/VikTorSalimonov)**
-
   * Documentation, screencasts
+
+- **[Sebastián Yonekura](https://github.com/syonekura)**
+  * [convert_to_voc.py](cvat/utils/convert_to_voc.py) - an utility for 
+  converting CVAT XML to PASCAL VOC data annotation format.
@@ -21,4 +21,4 @@ rq==0.10.0
 scipy==1.0.1
 sqlparse==0.2.4
 django-sendfile==0.3.11
-dj-pagination==2.3.2
+dj-pagination==2.3.2
@@ -0,0 +1,80 @@
+"""
+Given a CVAT XML and a directory with the image dataset, this script reads the
+CVAT XML and writes the annotations in PASCAL VOC format into a given
+directory.
+
+This implementation only supports bounding boxes in CVAT annotation format, and
+warns if it encounter any tracks or annotations that are not bounding boxes,
+ignoring them in both cases.
+
+To use the script run:
+
+python convert_to_voc.py cvat.xml path_to_image_directory output_directory
+"""
+import os
+import argparse
+import xml.etree.ElementTree
+from PIL import Image
+from pascal_voc_writer import Writer
+import logging
+
+logger = logging.getLogger()
+KNOWN_TAGS = {'box', 'image', 'attribute'}
+
+
+def process_cvat_xml(xml_file, img_dir, annotation_dir):
+    """
+    Transforms a single XML in CVAT format to multiple PASCAL VOC format
+    XMls.
+
+    :param xml_file: CVAT format XML
+    :param img_dir: image directory of the dataset
+    :param annotation_dir: directory of annotations with PASCAL VOC format
+    :return:
+    """
+    os.makedirs(annotation_dir)
+    cvat_xml = xml.etree.ElementTree.parse(xml_file)
+
+    tracks = [(x.get('id'), x.get('label'))
+              for x in cvat_xml.findall('track')]
+    if tracks:
+        logger.warn('Cannot parse interpolation tracks, ignoring {} tracks'
+                    .format(len(tracks)))
+
+    for img_tag in cvat_xml.findall('image'):
+        filename = img_tag.get('name')
+
+        filepath = os.path.join(img_dir, filename)
+        with Image.open(filepath) as img:
+            width, height = img.size
+
+        writer = Writer(filepath, width, height)
+
+        unknown_tags = {x.tag for x in img_tag.iter()}.difference(KNOWN_TAGS)
+        if unknown_tags:
+            logger.warn('Ignoring tags for image {}: {}'
+                        .format(filepath, unknown_tags))
+
+        for box in img_tag.findall('box'):
+            label = box.get('label')
+            xmin = float(box.get('xtl'))
+            ymin = float(box.get('ytl'))
+            xmax = float(box.get('xbr'))
+            ymax = float(box.get('ybr'))
+
+            writer.addObject(label, xmin, ymin, xmax, ymax)
+
+        fname = os.path.splitext(filename)[0] + '.xml'
+        writer.save(os.path.join(annotation_dir, fname))
+
+
+parser = argparse.ArgumentParser(description='Transforms CVAT XML to Pascal '
+                                             'VOC format')
+parser.add_argument('cvat_xml', type=argparse.FileType(), help='CVAT XML file')
+parser.add_argument('img_dir', help='Image directory of the dataset')
+parser.add_argument('annotation_dir', help='Output directory of '
+                                           'XML annotations')
+
+if __name__ == '__main__':
+    args = vars(parser.parse_args())
+    process_cvat_xml(args['cvat_xml'], args['img_dir'], args['annotation_dir'])
@@ -0,0 +1 @@
+pascal-voc-writer==0.1.4
@@ -0,0 +1,179 @@
+import tempfile
+import shutil
+import os
+from unittest import TestCase, mock
+from cvat.utils.convert_to_voc import process_cvat_xml
+
+XML_ANNOTATION_EXAMPLE = """<?xml version="1.0" encoding="utf-8"?>
+<annotations>
+  <version>1.0</version>
+  <meta>
+    <task>
+      <id>1063</id>
+      <name>My annotation task</name>
+      <size>75</size>
+      <mode>annotation</mode>
+      <overlap>0</overlap>
+      <bugtracker></bugtracker>
+      <created>2018-06-06 11:57:54.807162+03:00</created>
+      <updated>2018-06-06 12:42:29.375251+03:00</updated>
+      <labels>
+        <label>
+          <name>car</name>
+          <attributes>
+            <attribute>@select=model:a,b,c,d</attribute>
+          </attributes>
+        </label>
+      </labels>
+      <segments>
+        <segment>
+          <id>3086</id>
+          <start>0</start>
+          <stop>74</stop>
+          <url>http://cvat.examle.com:8080/?id=3086</url>
+        </segment>
+      </segments>
+      <owner>
+        <username>admin</username>
+        <email></email>
+      </owner>
+    </task>
+    <dumped>2018-06-06 15:47:04.386866+03:00</dumped>
+  </meta>
+  <image id="0" name="C15_L1_0001.jpg">
+    <box label="car" xtl="38.95" ytl="26.51" xbr="140.64" ybr="54.29" occluded="0">
+      <attribute name="parked">false</attribute>
+      <attribute name="model">a</attribute>
+    </box>
+  </image>
+  <image id="1" name="C15_L1_0002.jpg">
+    <box label="car" xtl="49.13" ytl="23.34" xbr="149.54" ybr="53.88" occluded="0">
+      <attribute name="parked">true</attribute>
+      <attribute name="model">a</attribute>
+    </box>
+  </image>
+  <image id="2" name="C15_L1_0003.jpg">
+    <box label="car" xtl="50.73" ytl="30.26" xbr="146.72" ybr="59.97" occluded="0">
+      <attribute name="parked">false</attribute>
+      <attribute name="model">b</attribute>
+    </box>
+  </image>
+  <image id="39" name="C15_L1_0040.jpg">
+    <box label="car" xtl="49.60" ytl="30.15" xbr="150.19" ybr="58.06" occluded="0">
+      <attribute name="parked">false</attribute>
+      <attribute name="model">c</attribute>
+    </box>
+    <point label="car" x="30.1" y="170.4" occluded="0">
+      <attribute name="parked">true</attribute>
+      <attribute name="model">a</attribute>
+    </point>
+  </image>
+</annotations>
+"""
+XML_INTERPOLATION_EXAMPLE = """<?xml version="1.0" encoding="utf-8"?>
+<annotations>
+  <version>1.0</version>
+  <meta>
+    <task>
+      <id>1062</id>
+      <name>My interpolation task</name>
+      <size>30084</size>
+      <mode>interpolation</mode>
+      <overlap>20</overlap>
+      <bugtracker></bugtracker>
+      <created>2018-05-31 14:13:36.483219+03:00</created>
+      <updated>2018-06-06 13:56:32.113705+03:00</updated>
+      <labels>
+        <label>
+          <name>car</name>
+          <attributes>
+            <attribute>@select=model:1,2,3,4</attribute>
+          </attributes>
+        </label>
+      </labels>
+      <segments>
+        <segment>
+          <id>3085</id>
+          <start>0</start>
+          <stop>30083</stop>
+          <url>http://cvat.example.com:8080/?id=3085</url>
+        </segment>
+      </segments>
+      <owner>
+        <username>admin</username>
+        <email></email>
+      </owner>
+    </task>
+    <dumped>2018-06-06 15:52:11.138470+03:00</dumped>
+  </meta>
+  <track id="0" label="car">
+    <box frame="110" xtl="634.12" ytl="37.68" xbr="661.50" ybr="71.37" outside="0" occluded="1" keyframe="1">
+      <attribute name="model">1</attribute>
+    </box>
+    <box frame="111" xtl="634.21" ytl="38.50" xbr="661.59" ybr="72.19" outside="0" occluded="1" keyframe="0">
+      <attribute name="model">1</attribute>
+    </box>
+    <box frame="112" xtl="634.30" ytl="39.32" xbr="661.67" ybr="73.01" outside="1" occluded="1" keyframe="1">
+      <attribute name="model">1</attribute>
+    </box>
+  </track>
+  <track id="1" label="car">
+    <box frame="0" xtl="626.81" ytl="30.96" xbr="656.05" ybr="58.88" outside="0" occluded="0" keyframe="1">
+      <attribute name="model">3</attribute>
+    </box>
+    <box frame="1" xtl="626.63" ytl="31.56" xbr="655.87" ybr="59.48" outside="0" occluded="0" keyframe="0">
+      <attribute name="model">3</attribute>
+    </box>
+    <box frame="2" xtl="626.09" ytl="33.38" xbr="655.33" ybr="61.29" outside="1" occluded="0" keyframe="1">
+      <attribute name="model">3</attribute>
+    </box>
+  </track>
+</annotations>
+"""
+
+
+class TestProcessCvatXml(TestCase):
+    def setUp(self):
+        self.test_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.test_dir)
+
+    @mock.patch('cvat.utils.convert_to_voc.logger')
+    @mock.patch('cvat.utils.convert_to_voc.Image')
+    def test_parse_annotation_xml(self, mock_image, mock_logger):
+        xml_filename = os.path.join(self.test_dir, 'annotations.xml')
+        with open(xml_filename, mode='x') as file:
+            file.write(XML_ANNOTATION_EXAMPLE)
+
+        voc_dir = os.path.join(self.test_dir, 'voc_dir')
+
+        width, height = 600, 400
+        img = mock.MagicMock()
+        img.size = width, height
+        mock_image.open.return_value.__enter__.return_value = img
+
+        images = ['C15_L1_0001', 'C15_L1_0002', 'C15_L1_0003', 'C15_L1_0040']
+        expected_xmls = [os.path.join(voc_dir, x + '.xml')
+                         for x in images]
+        expected_warn = "Ignoring tags for image img_dir/C15_L1_0040.jpg: " \
+                        "{'point'}"
+        process_cvat_xml(xml_filename, 'img_dir', voc_dir)
+        for exp in expected_xmls:
+            self.assertTrue(os.path.exists(exp))
+        mock_logger.warn.assert_called_once_with(expected_warn)
+
+    @mock.patch('cvat.utils.convert_to_voc.logger')
+    def test_parse_interpolation_xml(self, mock_logger):
+        xml_filename = os.path.join(self.test_dir, 'interpolations.xml')
+        with open(xml_filename, mode='x') as file:
+            file.write(XML_INTERPOLATION_EXAMPLE)
+
+        voc_dir = os.path.join(self.test_dir, 'voc_dir')
+        expected_warn = 'Cannot parse interpolation tracks, ignoring 2 tracks'
+
+        process_cvat_xml(xml_filename, 'img_dir', voc_dir)
+
+        self.assertTrue(os.path.exists(voc_dir))
+        self.assertTrue(len(os.listdir(voc_dir)) == 0)
+        mock_logger.warn.assert_called_once_with(expected_warn)