cvat-ai · nmanovic · Mar 20, 2019 · Feb 23, 2019 · Feb 23, 2019 · Feb 25, 2019
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - The ReID application for automatic bounding box merging has been added (#299)
 - Keyboard shortcuts to switch next/previous default shape type (box, polygon etc) [Alt + <, Alt + >] (#316)
 - Converter for VOC now supports interpolation tracks 
+- Semi-automatic semantic segmentation with the [Deep Extreme Cut](http://www.vision.ee.ethz.ch/~cvlsegmentation/dextr/) work
 
 ### Changed
 - Propagation setup has been moved from settings to bottom player panel

@@ -125,6 +125,16 @@ RUN if [ "$OPENVINO_TOOLKIT" = "yes" ]; then \
         wget https://download.01.org/openvinotoolkit/2018_R5/open_model_zoo/person-reidentification-retail-0079/FP32/person-reidentification-retail-0079.bin -O reid/reid.bin; \
     fi
 
+# TODO: CHANGE URL
+ARG WITH_DEXTR
+ENV WITH_DEXTR=${WITH_DEXTR}
+ENV DEXTR_MODEL_DIR=${HOME}/models/dextr
+RUN if [ "$WITH_DEXTR" = "yes" ]; then \
+        mkdir ${DEXTR_MODEL_DIR} -p && \
+        wget https://download.01.org/openvinotoolkit/models_contrib/cvat/dextr_model_v1.zip -O ${DEXTR_MODEL_DIR}/dextr.zip && \
+        unzip ${DEXTR_MODEL_DIR}/dextr.zip -d ${DEXTR_MODEL_DIR} && rm ${DEXTR_MODEL_DIR}/dextr.zip; \
+    fi
+
 COPY ssh ${HOME}/.ssh
 COPY cvat/ ${HOME}/cvat
 COPY tests ${HOME}/tests

diff --git a/README.md b/README.md
@@ -82,6 +82,7 @@ docker-compose -f docker-compose.yml -f components/cuda/docker-compose.cuda.yml
 -   [Analytics: management and monitoring of data annotation team](components/analytics/README.md)
 -   [TF Object Detection API: auto annotation](components/tf_annotation/README.md)
 -   [Support for NVIDIA GPUs](components/cuda/README.md)
+-   [Semi-automatic segmentation with Deep Extreme Cut](cvat/apps/dextr_segmentation/README.md)
 
 ### Create superuser account
 

@@ -0,0 +1,37 @@
+# Copyright (C) 2018 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+from openvino.inference_engine import IENetwork, IEPlugin
+
+import subprocess
+import os
+
+_IE_PLUGINS_PATH = os.getenv("IE_PLUGINS_PATH", None)
+
+
+def _check_instruction(instruction):
+    return instruction == str.strip(
+        subprocess.check_output(
+            'lscpu | grep -o "{}" | head -1'.format(instruction), shell=True
+        ).decode('utf-8')
+    )
+
+
+def make_plugin():
+    if _IE_PLUGINS_PATH is None:
+        raise OSError('Inference engine plugin path env not found in the system.')
+
+    plugin = IEPlugin(device='CPU', plugin_dirs=[_IE_PLUGINS_PATH])
+    if (_check_instruction('avx2')):
+        plugin.add_cpu_extension(os.path.join(_IE_PLUGINS_PATH, 'libcpu_extension_avx2.so'))
+    elif (_check_instruction('sse4')):
+        plugin.add_cpu_extension(os.path.join(_IE_PLUGINS_PATH, 'libcpu_extension_sse4.so'))
+    else:
+        raise Exception('Inference engine requires a support of avx2 or sse4.')
+
+    return plugin
+
+
+def make_network(model, weights):
+    return IENetwork.from_ir(model = model, weights = weights)
@@ -8,7 +8,7 @@
 import os
 import subprocess
 
-from openvino.inference_engine import IENetwork, IEPlugin
+from cvat.apps.auto_annotation.inference_engine import make_plugin, make_network
 
 class ModelLoader():
     def __init__(self, model, weights):
@@ -19,15 +19,9 @@ def __init__(self, model, weights):
         if not IE_PLUGINS_PATH:
             raise OSError("Inference engine plugin path env not found in the system.")
 
-        plugin = IEPlugin(device="CPU", plugin_dirs=[IE_PLUGINS_PATH])
-        if (self._check_instruction("avx2")):
-            plugin.add_cpu_extension(os.path.join(IE_PLUGINS_PATH, "libcpu_extension_avx2.so"))
-        elif (self._check_instruction("sse4")):
-            plugin.add_cpu_extension(os.path.join(IE_PLUGINS_PATH, "libcpu_extension_sse4.so"))
-        else:
-            raise Exception("Inference engine requires a support of avx2 or sse4.")
+        plugin = make_plugin()
+        network = make_network(self._model, self._weights)
 
-        network = IENetwork.from_ir(model=self._model, weights=self._weights)
         supported_layers = plugin.get_supported_layers(network)
         not_supported_layers = [l for l in network.layers.keys() if l not in supported_layers]
         if len(not_supported_layers) != 0:
@@ -47,12 +41,6 @@ def infer(self, image):
         in_frame = in_frame.transpose((2, 0, 1))  # Change data layout from HWC to CHW
         return self._net.infer(inputs={self._input_blob_name: in_frame})[self._output_blob_name].copy()
 
-    @staticmethod
-    def _check_instruction(instruction):
-        return instruction == str.strip(
-            subprocess.check_output(
-                "lscpu | grep -o \"{}\" | head -1".format(instruction), shell=True
-            ).decode("utf-8"))
 
 def load_label_map(labels_path):
         with open(labels_path, "r") as f:

@@ -0,0 +1,31 @@
+# Semi-Automatic Segmentation with [Deep Extreme Cut](http://www.vision.ee.ethz.ch/~cvlsegmentation/dextr/)
+
+## About the application
+
+The application allows to use deep learning models for semi-automatic semantic and instance segmentation.
+You can get a segmentation polygon from four (or more) extreme points of an object.
+This application uses the pre-trained DEXTR model which has been converted to Inference Engine format.
+
+We are grateful to K.K. Maninis, S. Caelles, J. Pont-Tuset, and L. Van Gool who permitted using their models in our tool
+
+## Build docker image
+```bash
+# OpenVINO component is also needed
+docker-compose -f docker-compose.yml -f components/openvino/docker-compose.openvino.yml -f cvat/apps/dextr_segmentation/docker-compose.dextr.yml build
+```
+
+## Run docker container
+```bash
+docker-compose -f docker-compose.yml -f components/openvino/docker-compose.openvino.yml -f cvat/apps/dextr_segmentation/docker-compose.dextr.yml up -d
+```
+
+## Using
+
+1.  Open a job
+2.  Select "Auto Segmentation" in the list of shapes
+3.  Run the draw mode as usually (by press the "Create Shape" button or by "N" shortcut)
+4.  Click four-six (or more if it's need) extreme points of an object
+5.  Close the draw mode as usually (by shortcut or pressing the button "Stop Creation")
+6.  Wait a moment and you will get a class agnostic annotation polygon
+7.  You can close an annotation request if it is too long
+(in case if it is queued to rq worker and all workers are busy)
@@ -0,0 +1,7 @@
+# Copyright (C) 2018 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+from cvat.settings.base import JS_3RDPARTY
+
+JS_3RDPARTY['engine'] = JS_3RDPARTY.get('engine', []) + ['dextr_segmentation/js/enginePlugin.js']
@@ -0,0 +1,8 @@
+# Copyright (C) 2018 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+from django.apps import AppConfig
+
+class DextrSegmentationConfig(AppConfig):
+    name = 'dextr_segmentation'
@@ -0,0 +1,106 @@
+
+# Copyright (C) 2018 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+from cvat.apps.auto_annotation.inference_engine import make_plugin, make_network
+
+import os
+import cv2
+import PIL
+import numpy as np
+
+_IE_CPU_EXTENSION = os.getenv("IE_CPU_EXTENSION", "libcpu_extension_avx2.so")
+_IE_PLUGINS_PATH = os.getenv("IE_PLUGINS_PATH", None)
+
+_DEXTR_MODEL_DIR = os.getenv("DEXTR_MODEL_DIR", None)
+_DEXTR_PADDING = 50
+_DEXTR_TRESHOLD = 0.9
+_DEXTR_SIZE = 512
+
+class DEXTR_HANDLER:
+    def __init__(self):
+        self._plugin = None
+        self._network = None
+        self._exec_network = None
+        self._input_blob = None
+        self._output_blob = None
+        if not _DEXTR_MODEL_DIR:
+            raise Exception("DEXTR_MODEL_DIR is not defined")
+
+
+    def handle(self, im_path, points):
+        # Lazy initialization
+        if not self._plugin:
+            self._plugin = make_plugin()
+            self._network = make_network(os.path.join(_DEXTR_MODEL_DIR, 'dextr.xml'),
+                os.path.join(_DEXTR_MODEL_DIR, 'dextr.bin'))
+            self._input_blob = next(iter(self._network.inputs))
+            self._output_blob = next(iter(self._network.outputs))
+            self._exec_network = self._plugin.load(network=self._network)
+
+        image = PIL.Image.open(im_path)
+        numpy_image = np.array(image)
+        points = np.asarray([[int(p["x"]), int(p["y"])] for p in points], dtype=int)
+        bounding_box = (
+            max(min(points[:, 0]) - _DEXTR_PADDING, 0),
+            max(min(points[:, 1]) - _DEXTR_PADDING, 0),
+            min(max(points[:, 0]) + _DEXTR_PADDING, numpy_image.shape[1] - 1),
+            min(max(points[:, 1]) + _DEXTR_PADDING, numpy_image.shape[0] - 1)
+        )
+
+        # Prepare an image
+        numpy_cropped = np.array(image.crop(bounding_box))
+        resized = cv2.resize(numpy_cropped, (_DEXTR_SIZE, _DEXTR_SIZE),
+            interpolation = cv2.INTER_CUBIC).astype(np.float32)
+
+        # Make a heatmap
+        points = points - [min(points[:, 0]), min(points[:, 1])] + [_DEXTR_PADDING, _DEXTR_PADDING]
+        points = (points * [_DEXTR_SIZE / numpy_cropped.shape[1], _DEXTR_SIZE / numpy_cropped.shape[0]]).astype(int)
+        heatmap = np.zeros(shape=resized.shape[:2], dtype=np.float64)
+        for point in points:
+            gaussian_x_axis = np.arange(0, _DEXTR_SIZE, 1, float) - point[0]
+            gaussian_y_axis = np.arange(0, _DEXTR_SIZE, 1, float)[:, np.newaxis] - point[1]
+            gaussian = np.exp(-4 * np.log(2) * ((gaussian_x_axis ** 2 + gaussian_y_axis ** 2) / 100)).astype(np.float64)
+            heatmap = np.maximum(heatmap, gaussian)
+        cv2.normalize(heatmap,  heatmap, 0, 255, cv2.NORM_MINMAX)
+
+        # Concat an image and a heatmap
+        input_dextr = np.concatenate((resized, heatmap[:, :, np.newaxis].astype(resized.dtype)), axis=2)
+        input_dextr = input_dextr.transpose((2,0,1))
+
+        np.set_printoptions(threshold=np.nan)
+        pred = self._exec_network.infer(inputs={self._input_blob: input_dextr[np.newaxis, ...]})[self._output_blob][0, 0, :, :]
+        pred = cv2.resize(pred, tuple(reversed(numpy_cropped.shape[:2])), interpolation = cv2.INTER_CUBIC)
+        result = np.zeros(numpy_image.shape[:2])
+        result[bounding_box[1]:bounding_box[1] + pred.shape[0], bounding_box[0]:bounding_box[0] + pred.shape[1]] = pred > _DEXTR_TRESHOLD
+
+        # Convert a mask to a polygon
+        result = np.array(result, dtype=np.uint8)
+        cv2.normalize(result,result,0,255,cv2.NORM_MINMAX)
+        contours = None
+        if int(cv2.__version__.split('.')[0]) > 3:
+            contours = cv2.findContours(result, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_KCOS)[0]
+        else:
+            contours = cv2.findContours(result, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_KCOS)[1]
+
+        contours = max(contours, key=lambda arr: arr.size)
+        if contours.shape.count(1):
+            contours = np.squeeze(contours)
+        if contours.size < 3 * 2:
+            raise Exception('Less then three point have been detected. Can not build a polygon.')
+
+        result = ""
+        for point in contours:
+            result += "{},{} ".format(int(point[0]), int(point[1]))
+        result = result[:-1]
+
+        return result
+
+    def __del__(self):
+        if self._exec_network:
+            del self._exec_network
+        if self._network:
+            del self._network
+        if self._plugin:
+            del self._plugin
@@ -0,0 +1,14 @@
+#
+# Copyright (C) 2018 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+#
+
+version: "2.3"
+
+services:
+  cvat:
+    build:
+      context: .
+      args:
+        WITH_DEXTR: "yes"