Add transposed convolution layers

Kotlin · Jan 18, 2022 · 1d5492e · 1d5492e
1 parent 6b1c592
commit 1d5492e
Show file tree

Hide file tree

Showing 22 changed files with 1,386 additions and 74 deletions.
diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/Layer.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/Layer.kt
@@ -159,4 +159,12 @@ internal fun IntArray.toLongArray(): LongArray {
         1 -> longArrayOf(this[0].toLong())
         else -> LongArray(size) { this[it].toLong() }
     }
+}
+
+internal fun LongArray.toIntArray(): IntArray {
+    return when (size) {
+        0 -> intArrayOf()
+        1 -> intArrayOf(this[0].toInt())
+        else -> IntArray(size) { this[it].toInt() }
+    }
 }
diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/AbstractConv.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/AbstractConv.kt
@@ -59,7 +59,7 @@ public abstract class AbstractConv(
     protected abstract val biasRegularizer: Regularizer?
     protected abstract val activityRegularizer: Regularizer?
     protected abstract val padding: ConvPadding
-    protected abstract val useBias: Boolean
+    internal abstract val useBias: Boolean
 
     /** Tensor with kernel weights */
     protected lateinit var kernel: KVariable

diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv1D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv1D.kt
@@ -12,6 +12,7 @@ import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
 import org.jetbrains.kotlinx.dl.api.core.layer.requireArraySize
 import org.jetbrains.kotlinx.dl.api.core.layer.toLongList
 import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
+import org.jetbrains.kotlinx.dl.api.core.shape.TensorShape
 import org.jetbrains.kotlinx.dl.api.core.shape.convOutputLength
 import org.jetbrains.kotlinx.dl.api.core.util.convBiasVarName
 import org.jetbrains.kotlinx.dl.api.core.util.convKernelVarName
@@ -21,8 +22,6 @@ import org.tensorflow.op.Ops
 import org.tensorflow.op.core.Squeeze
 import org.tensorflow.op.nn.Conv2d
 
-private const val EXTRA_DIM = 1L
-
 /**
  * 1D convolution layer (e.g. convolution over audio data).
  *
@@ -79,10 +78,6 @@ public class Conv1D(
 
     override val kernelSize: IntArray = intArrayOf(kernelLength)
 
-    /** Axis of height for which the extra dimension is added (unsqueezed) before actual
-     * convolution operation and the output from actual implementation are squeezed. */
-    private val squeezeAxis = Squeeze.axis(listOf(EXTRA_DIM))
-
     override fun kernelVarName(name: String): String = convKernelVarName(name, dim = 1)
 
     override fun biasVarName(name: String): String = convBiasVarName(name, dim = 1)
@@ -91,15 +86,13 @@ public class Conv1D(
         tf: Ops,
         input: Operand<Float>
     ): Operand<Float> {
-        val expandedInput = tf.expandDims(input, tf.constant(EXTRA_DIM))
-        val expandedKernel = tf.expandDims(kernel.variable, tf.constant(EXTRA_DIM - 1))
-        val expandedStrides = intArrayOf(strides[0], 1, strides[1], strides[2])
-        val expandedDilations = intArrayOf(dilations[0], 1, dilations[1], dilations[2])
-        val options = Conv2d.dilations(expandedDilations.toLongList()).dataFormat("NHWC")
-        val result = tf.nn.conv2d(
-            expandedInput, expandedKernel, expandedStrides.toLongList(), padding.paddingName, options
-        )
-        return tf.squeeze(result, squeezeAxis)
+        return tf.withExpandedDimensions(input) { expandedInput ->
+            val options = Conv2d.dilations(expand(dilations).toLongList()).dataFormat("NHWC")
+            return@withExpandedDimensions tf.nn.conv2d(
+                expandedInput, tf.expandKernel(kernel.variable), expand(strides).toLongList(),
+                padding.paddingName, options
+            )
+        }
     }
 
     protected override fun defineOutputShape(inputShape: Shape): Shape {
@@ -122,4 +115,53 @@ public class Conv1D(
                 "dilation=${dilations.contentToString()}, activation=$activation, kernelInitializer=$kernelInitializer, " +
                 "biasInitializer=$biasInitializer, kernelShape=${kernel.shape}, biasShape=${bias?.shape}, padding=$padding, " +
                 "biasRegularizer=$biasRegularizer, kernelRegularizer=$kernelRegularizer, activityRegularizer=$activityRegularizer)"
+
+    internal companion object {
+        internal const val EXTRA_DIM = 1
+
+        /** Axis of height for which the extra dimension is added (unsqueezed) before actual
+         * convolution operation and the output from actual implementation are squeezed. */
+        private val squeezeAxis = Squeeze.axis(listOf(EXTRA_DIM.toLong()))
+
+        internal fun expandKernel(kernel: IntArray): IntArray {
+            return kernel.withAdded(EXTRA_DIM - 1, 1)
+        }
+
+        internal fun Ops.expandKernel(kernel: Operand<Float>): Operand<Float> {
+            return expandDims(kernel, constant(EXTRA_DIM - 1))
+        }
+
+        internal fun TensorShape.expand(): TensorShape {
+            return TensorShape(dims().withAdded(EXTRA_DIM, 1))
+        }
+
+        internal fun expand(array: IntArray): IntArray {
+            return array.withAdded(EXTRA_DIM, 1)
+        }
+
+        /**
+         * Adds an extra dimension to the input, performs the provided operation
+         * and squeezes the result by removing the dimension added previously.
+         * This allows to perform 2D operations on 1D inputs.
+         */
+        internal fun Ops.withExpandedDimensions(input: Operand<Float>,
+                                                operation: (Operand<Float>) -> Operand<Float>
+        ): Operand<Float> {
+            val expandedInput = expandDims(input, constant(EXTRA_DIM))
+            val expandedOutput = operation(expandedInput)
+            return squeeze(expandedOutput, squeezeAxis)
+        }
+
+        internal fun LongArray.withAdded(position: Int, element: Long): LongArray {
+            return toMutableList().apply { add(position, element) }.toLongArray()
+        }
+
+        internal fun IntArray.withAdded(position: Int, element: Int): IntArray {
+            return toMutableList().apply { add(position, element) }.toIntArray()
+        }
+
+        internal fun IntArray.withAdded(position: Int, elements: List<Int>): IntArray {
+            return toMutableList().apply { addAll(position, elements) }.toIntArray()
+        }
+    }
 }
diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv1DTranspose.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv1DTranspose.kt
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2021-2022 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved.
+ * Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file.
+ */
+
+package org.jetbrains.kotlinx.dl.api.core.layer.convolutional
+
+import org.jetbrains.kotlinx.dl.api.core.activation.Activations
+import org.jetbrains.kotlinx.dl.api.core.initializer.HeNormal
+import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform
+import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
+import org.jetbrains.kotlinx.dl.api.core.layer.NoGradients
+import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.Conv1D.Companion.EXTRA_DIM
+import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.Conv1D.Companion.expand
+import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.Conv1D.Companion.expandKernel
+import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.Conv1D.Companion.withAdded
+import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.Conv1D.Companion.withExpandedDimensions
+import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.Conv2DTranspose.Companion.withStandardPadding
+import org.jetbrains.kotlinx.dl.api.core.layer.requireArraySize
+import org.jetbrains.kotlinx.dl.api.core.layer.toLongList
+import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
+import org.tensorflow.Operand
+import org.tensorflow.op.Ops
+
+/**
+ * 1D convolution transpose layer.
+ *
+ * This is an operation going in the opposite direction of a normal convolution:
+ * it transforms a tensor shaped like an output of some convolution into tensor that has the shape of the input.
+ *
+ * This layer expects input data of size `(N, L, C)` where
+ * ```
+ * N - batch size
+ * L - length of signal sequence
+ * C - number of channels
+ * ```
+ *
+ * Note: dilation values greater than 1 are not supported on cpu
+ * (see https://github.com/tensorflow/tensorflow/issues/28264).
+ *
+ * @property [filters] dimensionality of the output space (i.e. the number of filters in the convolution)
+ * @property [kernelLength] size of the convolutional kernel (one number)
+ * @property [strides] strides of the convolution for each dimension of the input tensor (three numbers)
+ * @property [dilations] dilations of the convolution for each dimension of the input tensor (three numbers).
+ *           Currently, dilation values greater than 1 are not supported on cpu.
+ * @property [activation] activation function
+ * @property [kernelInitializer] initializer for the kernel
+ * @property [biasInitializer] initializer for the bias
+ * @property [kernelRegularizer] regularizer for the kernel
+ * @property [biasRegularizer] regularizer for the bias
+ * @property [activityRegularizer] regularizer function applied to the output of the layer
+ * @property [padding] type of padding to use
+ * @property [outputPadding] the amount of explicit padding to use (six numbers: two for each dimension).
+ * @property [useBias] a flag that specifies if the bias should be used
+ * @param [name] custom layer name
+ */
+public class Conv1DTranspose(
+    public override val filters: Int = 3,
+    public val kernelLength: Int = 3,
+    public override val strides: IntArray = intArrayOf(1, 1, 1),
+    public override val dilations: IntArray = intArrayOf(1, 1, 1),
+    public override val activation: Activations = Activations.Relu,
+    public override val kernelInitializer: Initializer = HeNormal(),
+    public override val biasInitializer: Initializer = HeUniform(),
+    public override val kernelRegularizer: Regularizer? = null,
+    public override val biasRegularizer: Regularizer? = null,
+    public override val activityRegularizer: Regularizer? = null,
+    public override val padding: ConvPadding = ConvPadding.SAME,
+    public override val outputPadding: IntArray? = null,
+    public override val useBias: Boolean = true,
+    name: String = ""
+) : ConvTranspose(dimensions = 1, name), NoGradients {
+
+    init {
+        requireArraySize(strides, dimensions + 2, "strides")
+        requireArraySize(dilations, dimensions + 2, "dilations")
+        if (outputPadding != null) requireArraySize(outputPadding, 2 * (dimensions + 2), "outputPadding")
+        isTrainable = false
+    }
+
+    override val kernelSize: IntArray = intArrayOf(kernelLength)
+
+    override fun convImplementation(tf: Ops, input: Operand<Float>): Operand<Float> {
+        return tf.withExpandedDimensions(input) { expandedInput ->
+            val expandedOutputPadding = outputPadding?.withAdded(EXTRA_DIM * 2, listOf(0, 0))
+            return@withExpandedDimensions tf.nn.conv2dBackpropInput(
+                tf.shapeWithDynamicBatchSize(outputShape.expand(), input),
+                tf.expandKernel(kernel.variable),
+                expandedInput,
+                expand(strides).toLongList(),
+                if (outputPadding != null) Conv2DTranspose.EXPLICIT else padding.paddingName,
+                *Conv2DTranspose.buildOptions(
+                    expand(dilations),
+                    expandedOutputPadding?.withStandardPadding(
+                        padding,
+                        expandKernel(kernelSize),
+                        expand(dilations)
+                    )
+                )
+            )
+        }
+    }
+}
diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv2DTranspose.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv2DTranspose.kt
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2021-2022 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved.
+ * Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file.
+ */
+
+package org.jetbrains.kotlinx.dl.api.core.layer.convolutional
+
+import org.jetbrains.kotlinx.dl.api.core.activation.Activations
+import org.jetbrains.kotlinx.dl.api.core.initializer.HeNormal
+import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform
+import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
+import org.jetbrains.kotlinx.dl.api.core.layer.NoGradients
+import org.jetbrains.kotlinx.dl.api.core.layer.requireArraySize
+import org.jetbrains.kotlinx.dl.api.core.layer.toLongList
+import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
+import org.jetbrains.kotlinx.dl.api.core.shape.convTransposeSingleSidePadding
+import org.tensorflow.Operand
+import org.tensorflow.op.Ops
+import org.tensorflow.op.nn.Conv2dBackpropInput
+
+/**
+ * 2D convolution transpose layer.
+ *
+ * This is an operation going in the opposite direction of a normal convolution:
+ * it transforms a tensor shaped like an output of some convolution into tensor that has the shape of the input.
+ *
+ * This layer expects input data of size `(N, H, W, C)` where
+ * ```
+ * N - batch size
+ * H - height
+ * W - width
+ * C - number of channels
+ * ```
+ *
+ * Note: dilation values greater than 1 are not supported on cpu
+ * (see https://github.com/tensorflow/tensorflow/issues/28264).
+ *
+ * @property [filters] dimensionality of the output space (i.e. the number of filters in the convolution)
+ * @property [kernelSize] size of the convolutional kernel (two numbers)
+ * @property [strides] strides of the convolution for each dimension of the input tensor (four numbers)
+ * @property [dilations] dilations of the convolution for each dimension of the input tensor (four numbers).
+ *           Currently, dilation values greater than 1 are not supported on cpu.
+ * @property [activation] activation function
+ * @property [kernelInitializer] initializer for the kernel
+ * @property [biasInitializer] initializer for the bias
+ * @property [kernelRegularizer] regularizer for the kernel
+ * @property [biasRegularizer] regularizer for the bias
+ * @property [activityRegularizer] regularizer function applied to the output of the layer
+ * @property [padding] type of padding to use
+ * @property [outputPadding] the amount of explicit padding to use (eight numbers: two for each dimension).
+ * @property [useBias] a flag that specifies if the bias should be used
+ * @param [name] custom layer name
+ */
+public class Conv2DTranspose(
+    public override val filters: Int = 3,
+    public override val kernelSize: IntArray = intArrayOf(3, 3),
+    public override val strides: IntArray = intArrayOf(1, 1, 1, 1),
+    public override val dilations: IntArray = intArrayOf(1, 1, 1, 1),
+    public override val activation: Activations = Activations.Relu,
+    public override val kernelInitializer: Initializer = HeNormal(),
+    public override val biasInitializer: Initializer = HeUniform(),
+    public override val kernelRegularizer: Regularizer? = null,
+    public override val biasRegularizer: Regularizer? = null,
+    public override val activityRegularizer: Regularizer? = null,
+    public override val padding: ConvPadding = ConvPadding.SAME,
+    public override val outputPadding: IntArray? = null,
+    public override val useBias: Boolean = true,
+    name: String = ""
+) : ConvTranspose(dimensions = 2, name), NoGradients {
+
+    init {
+        requireArraySize(kernelSize, dimensions, "kernelSize")
+        requireArraySize(strides, dimensions + 2, "strides")
+        requireArraySize(dilations, dimensions + 2, "dilations")
+        if (outputPadding != null) requireArraySize(outputPadding, 2 * (dimensions + 2), "outputPadding")
+        isTrainable = false
+    }
+
+    override fun convImplementation(tf: Ops, input: Operand<Float>): Operand<Float> {
+        return tf.nn.conv2dBackpropInput(
+            tf.shapeWithDynamicBatchSize(outputShape, input),
+            kernel.variable,
+            input,
+            strides.toLongList(),
+            if (outputPadding != null) EXPLICIT else padding.paddingName,
+            *buildOptions(
+                dilations,
+                outputPadding?.withStandardPadding(
+                    padding,
+                    kernelSize,
+                    dilations
+                )
+            )
+        )
+    }
+
+    internal companion object {
+        internal const val EXPLICIT = "EXPLICIT"
+
+        /**
+         * Combines explicitly provided padding value with the standard padding from the provided padding method.
+         * This is needed since [org.tensorflow.op.NnOps.conv2dBackpropInput] function does not support specifying
+         * both padding method and explicit output padding at the same time.
+         */
+        internal fun IntArray.withStandardPadding(padding: ConvPadding,
+                                                  kernelSize: IntArray,
+                                                  dilations: IntArray
+        ): IntArray {
+            val withStandardPadding = kernelSize.indices.flatMap { dim ->
+                listOf(
+                    convTransposeSingleSidePadding(padding, this[2 * dim], kernelSize[dim], dilations[dim + 1]),
+                    convTransposeSingleSidePadding(padding, this[2 * dim + 1], kernelSize[dim], dilations[dim + 1])
+                )
+            }
+            return intArrayOf(0, 0, *(withStandardPadding.toIntArray()), 0, 0)
+        }
+
+        internal fun buildOptions(dilations: IntArray, outputPadding: IntArray?): Array<Conv2dBackpropInput.Options> {
+            val options = mutableListOf(Conv2dBackpropInput.dilations(dilations.toLongList()))
+            if (outputPadding != null) {
+                options.add(Conv2dBackpropInput.explicitPaddings(outputPadding.toLongList()))
+            }
+            return options.map { it.dataFormat("NHWC") }.toTypedArray()
+        }
+    }
+}