Add Conv1D impementation (#67)

* fix conv2d layer test * create conv1d with code refactor * naming fixes * add docs version * fixes for PR requests * lint fixes * add model loader and saver from keras for conv1d
Kotlin · Jun 2, 2021 · 54cd45c · 54cd45c
1 parent 7e8c7d9
commit 54cd45c
Show file tree

Hide file tree

Showing 16 changed files with 523 additions and 233 deletions.
diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv1D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv1D.kt
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2021 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved.
+ * Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file.
+ */
+
+package org.jetbrains.kotlinx.dl.api.core.layer.convolutional
+
+import org.jetbrains.kotlinx.dl.api.core.activation.Activations
+import org.jetbrains.kotlinx.dl.api.core.initializer.HeNormal
+import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform
+import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
+import org.tensorflow.Operand
+import org.tensorflow.op.Ops
+import org.tensorflow.op.core.Squeeze
+
+private const val KERNEL_VARIABLE_NAME = "conv1d_kernel"
+
+private const val BIAS_VARIABLE_NAME = "conv1d_bias"
+
+private const val EXTRA_DIM = 1L
+
+/**
+ * 1D convolution layer (e.g. convolution over audio data).
+ *
+ * This layer creates a convolution kernel that is convolved (actually cross-correlated)
+ * with the layer input to produce a tensor of outputs.
+ * Finally, the `activation` is applied to the outputs as well.
+ *
+ * It expects input data of size `(N, L, C)` where
+ * ```
+ * N - batch size
+ * L - length of signal sequence
+ * C - number of channels
+ * ```
+ *
+ * @property [filters] The dimensionality of the output space (i.e. the number of filters in the convolution).
+ * @property [kernelSize] Long number, specifying the width of the 1D convolution window.
+ * @property [strides] Three numbers specifying stride of the pooling
+ * operation for each dimension of input tensor.
+ * NOTE: Specifying stride value != 1 is incompatible with specifying `dilation` value != 1.
+ * @property [dilations] Three numbers specifying the dilation rate to use for
+ * dilated convolution sequence dimensions of input tensor.
+ * @property [activation] Activation function.
+ * @property [kernelInitializer] An initializer for the convolution kernel
+ * @property [biasInitializer] An initializer for the bias vector.
+ * @property [padding] The padding method, either 'valid' or 'same' or 'full'.
+ * @property [name] Custom layer name.
+ * @property [useBias] If true the layer uses a bias vector.
+ * @constructor Creates [Conv1D] object.
+ *
+ * @since 0.3
+ */
+public class Conv1D(
+    public val filters: Long = 32,
+    public val kernelSize: Long = 3,
+    public val strides: LongArray = longArrayOf(1, 1, 1),
+    public val dilations: LongArray = longArrayOf(1, 1, 1),
+    public val activation: Activations = Activations.Relu,
+    public val kernelInitializer: Initializer = HeNormal(),
+    public val biasInitializer: Initializer = HeUniform(),
+    public val padding: ConvPadding = ConvPadding.SAME,
+    public val useBias: Boolean = true,
+    name: String = "",
+) : Conv2DImpl(
+    filtersInternal = filters,
+    kernelSizeInternal = longArrayOf(1, kernelSize),
+    stridesInternal = longArrayOf(strides[0], 1, strides[1], strides[2]),
+    dilationsInternal = longArrayOf(dilations[0], 1, dilations[1], dilations[2]),
+    activationInternal = activation,
+    kernelInitializerInternal = kernelInitializer,
+    biasInitializerInternal = biasInitializer,
+    paddingInternal = padding,
+    useBiasInternal = useBias,
+    kernelVariableName = KERNEL_VARIABLE_NAME,
+    biasVariableName = BIAS_VARIABLE_NAME,
+    name = name
+) {
+    private val squeezeAxis = Squeeze.axis(listOf(EXTRA_DIM))
+
+    override fun forward(
+        tf: Ops,
+        input: Operand<Float>,
+        isTraining: Operand<Boolean>,
+        numberOfLosses: Operand<Float>?
+    ): Operand<Float> {
+        val reshapedInput = tf.expandDims(input, tf.constant(EXTRA_DIM))
+        val result = super.forward(tf, reshapedInput, isTraining, numberOfLosses)
+        return tf.squeeze(result, squeezeAxis)
+    }
+
+    override fun toString(): String {
+        return "Conv2D(filters=$filters, kernelSize=$kernelSize, strides=$strides, " +
+                "dilation=$dilations, activation=$activation, kernelInitializer=$kernelInitializer, " +
+                "biasInitializer=$biasInitializer, kernelShape=$kernelShape, biasShape=$biasShape, padding=$padding)"
+    }
+}
diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv2D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv2D.kt
@@ -21,22 +21,32 @@ import org.tensorflow.op.Ops
 import org.tensorflow.op.core.Variable
 import org.tensorflow.op.nn.Conv2d
 import org.tensorflow.op.nn.Conv2d.dilations
+import java.lang.IllegalArgumentException
 import kotlin.math.roundToInt
 
 private const val KERNEL_VARIABLE_NAME = "conv2d_kernel"
+
 private const val BIAS_VARIABLE_NAME = "conv2d_bias"
 
 /**
  * 2D convolution layer (e.g. spatial convolution over images).
  *
  * This layer creates a convolution kernel that is convolved (actually cross-correlated)
  * with the layer input to produce a tensor of outputs.
- * Finally, if `activation` is applied to the outputs as well.
+ * Finally, the `activation` is applied to the outputs as well.
+ *
+ * It expects input data of size `(N, H, W, C)` where
+ * ```
+ * N - batch size
+ * H - height
+ * W - width
+ * C - number of channels
+ * ```
  *
  * @property [filters] The dimensionality of the output space (i.e. the number of filters in the convolution).
  * @property [kernelSize] Two long numbers, specifying the height and width of the 2D convolution window.
  * @property [strides] Strides of the pooling operation for each dimension of input tensor.
- * NOTE: Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1.
+ * NOTE: Specifying any stride value != 1 is incompatible with specifying any `dilations` value != 1.
  * @property [dilations] Four numbers, specifying the dilation rate to use for dilated convolution for each dimension of input tensor.
  * @property [activation] Activation function.
  * @property [kernelInitializer] An initializer for the convolution kernel
@@ -57,69 +67,89 @@ public class Conv2D(
     public val padding: ConvPadding = ConvPadding.SAME,
     public val useBias: Boolean = true,
     name: String = ""
+) : Conv2DImpl(
+    filtersInternal = filters,
+    kernelSizeInternal = kernelSize,
+    stridesInternal = strides,
+    dilationsInternal = dilations,
+    activationInternal = activation,
+    kernelInitializerInternal = kernelInitializer,
+    biasInitializerInternal = biasInitializer,
+    paddingInternal = padding,
+    useBiasInternal = useBias,
+    kernelVariableName = KERNEL_VARIABLE_NAME,
+    biasVariableName = BIAS_VARIABLE_NAME,
+    name = name
+) {
+    init {
+        assertArraySize(kernelSize, 2, "kernelSize")
+        assertArraySize(strides, 4, "strides")
+        assertArraySize(dilations, 4, "dilations")
+    }
+
+    override fun toString(): String {
+        return "Conv2D(filters=$filters, kernelSize=${kernelSize.contentToString()}, strides=${strides.contentToString()}, " +
+                "dilations=${dilations.contentToString()}, activation=$activation, kernelInitializer=$kernelInitializer, " +
+                "biasInitializer=$biasInitializer, kernelShape=$kernelShape, biasShape=$biasShape, padding=$padding)"
+    }
+}
+
+public abstract class Conv2DImpl(
+    private val filtersInternal: Long,
+    private val kernelSizeInternal: LongArray,
+    private val stridesInternal: LongArray,
+    private val dilationsInternal: LongArray,
+    private val activationInternal: Activations,
+    private val kernelInitializerInternal: Initializer,
+    private val biasInitializerInternal: Initializer,
+    private val paddingInternal: ConvPadding,
+    private val useBiasInternal: Boolean,
+    private val kernelVariableName: String,
+    private val biasVariableName: String,
+    name: String = ""
 ) : Layer(name) {
     // weight tensors
     private lateinit var kernel: Variable<Float>
     private var bias: Variable<Float>? = null
 
     // weight tensor shapes
-    private lateinit var biasShape: Shape
-    private lateinit var kernelShape: Shape
+    protected lateinit var kernelShape: Shape
+    protected lateinit var biasShape: Shape
 
     override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape) {
         // Amount of channels should be the last value in the inputShape (make warning here)
         val lastElement = inputShape.size(inputShape.numDimensions() - 1)
 
         // Compute shapes of kernel and bias matrices
-        kernelShape = shapeFromDims(*kernelSize, lastElement, filters)
-        biasShape = Shape.make(filters)
+        kernelShape = shapeFromDims(*kernelSizeInternal, lastElement, filtersInternal)
+        biasShape = Shape.make(filtersInternal)
 
-        // should be calculated before addWeight because it's used in calculation, need to rewrite addWEight to avoid strange behaviour
-        // calculate fanIn, fanOut
+        // should be calculated before addWeight because it's used in calculation,
+        // need to rewrite addWeight to avoid strange behaviour calculate fanIn, fanOut
         val inputDepth = lastElement // amount of channels
-        val outputDepth = filters // amount of channels for the next layer
+        val outputDepth = filtersInternal // amount of channels for the next layer
 
-        fanIn = (inputDepth * kernelSize[0] * kernelSize[1]).toInt()
-        fanOut = ((outputDepth * kernelSize[0] * kernelSize[1] / (strides[0].toDouble() * strides[1])).roundToInt())
+        fanIn = (inputDepth * kernelSizeInternal[0] * kernelSizeInternal[1]).toInt()
+        fanOut = ((outputDepth * kernelSizeInternal[0] * kernelSizeInternal[1] /
+                (stridesInternal[0].toDouble() * stridesInternal[1])).roundToInt())
 
         val (kernelVariableName, biasVariableName) = defineVariableNames()
         createConv2DVariables(tf, kernelVariableName, biasVariableName, kGraph)
     }
 
-    private fun defineVariableNames(): Pair<String, String> {
-        return if (name.isNotEmpty()) {
-            Pair(conv2dKernelVarName(name), conv2dBiasVarName(name))
-        } else {
-            Pair(KERNEL_VARIABLE_NAME, BIAS_VARIABLE_NAME)
-        }
-    }
-
-    private fun createConv2DVariables(
-        tf: Ops,
-        kernelVariableName: String,
-        biasVariableName: String,
-        kGraph: KGraph
-    ) {
-        kernel = tf.withName(kernelVariableName).variable(kernelShape, getDType())
-        if (useBias) bias = tf.withName(biasVariableName).variable(biasShape, getDType())
-
-        kernel = addWeight(tf, kGraph, kernelVariableName, kernel, kernelInitializer)
-        if (useBias) bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializer)
-    }
-
     override fun computeOutputShape(inputShape: Shape): Shape {
         var rows = inputShape.size(1)
         var cols = inputShape.size(2)
         rows = convOutputLength(
-            rows, kernelSize[0].toInt(), padding,
-            strides[1].toInt(), dilations[1].toInt()
+            rows, kernelSizeInternal[0].toInt(), paddingInternal,
+            stridesInternal[1].toInt(), dilationsInternal[1].toInt()
         )
         cols = convOutputLength(
-            cols, kernelSize[1].toInt(), padding,
-            strides[2].toInt(), dilations[2].toInt()
+            cols, kernelSizeInternal[1].toInt(), paddingInternal,
+            stridesInternal[2].toInt(), dilationsInternal[2].toInt()
         )
 
-        val shape = Shape.make(inputShape.size(0), rows, cols, filters)
+        val shape = Shape.make(inputShape.size(0), rows, cols, filtersInternal)
         outputShape = TensorShape(shape)
         return shape
     }
@@ -130,26 +160,15 @@ public class Conv2D(
         isTraining: Operand<Boolean>,
         numberOfLosses: Operand<Float>?
     ): Operand<Float> {
-        val tfPadding = when (padding) {
-            ConvPadding.SAME -> "SAME"
-            ConvPadding.VALID -> "VALID"
-            ConvPadding.FULL -> "FULL"
-        }
+        val paddingName = paddingInternal.paddingName
+        val options: Conv2d.Options = dilations(dilationsInternal.toList()).dataFormat("NHWC")
+        var output: Operand<Float> = tf.nn.conv2d(input, kernel, stridesInternal.toMutableList(), paddingName, options)
 
-        val options: Conv2d.Options = dilations(dilations.toList()).dataFormat("NHWC")
-        var output: Operand<Float> = tf.nn.conv2d(input, kernel, strides.toMutableList(), tfPadding, options)
-
-        if (useBias) {
+        if (useBiasInternal) {
             output = tf.nn.biasAdd(output, bias)
         }
 
-        return Activations.convert(activation).apply(tf, output, name)
-    }
-
-    override val weights: Map<String, Array<*>> get() = extractConv2DWeights()
-
-    private fun extractConv2DWeights(): Map<String, Array<*>> {
-        return extractWeights(defineVariableNames().toList())
+        return Activations.convert(activationInternal).apply(tf, output, name)
     }
 
     /** Returns the shape of kernel weights. */
@@ -158,12 +177,41 @@ public class Conv2D(
     /** Returns the shape of bias weights. */
     public val biasShapeArray: LongArray get() = TensorShape(biasShape).dims()
 
+    override val weights: Map<String, Array<*>> get() = extractConv2DWeights()
+
     override val hasActivation: Boolean get() = true
 
     override val paramCount: Int
-        get() = (numElementsInShape(shapeToLongArray(kernelShape)) + numElementsInShape(shapeToLongArray(biasShape))).toInt()
+        get() = (kernelShape.numElements() + biasShape.numElements()).toInt()
 
-    override fun toString(): String {
-        return "Conv2D(filters=$filters, kernelSize=${kernelSize.contentToString()}, strides=${strides.contentToString()}, dilations=${dilations.contentToString()}, activation=$activation, kernelInitializer=$kernelInitializer, biasInitializer=$biasInitializer, kernelShape=$kernelShape, padding=$padding)"
+    private fun extractConv2DWeights(): Map<String, Array<*>> {
+        return extractWeights(defineVariableNames().toList())
+    }
+
+    private fun defineVariableNames(): Pair<String, String> {
+        return if (name.isNotEmpty()) {
+            Pair(conv2dKernelVarName(name), conv2dBiasVarName(name))
+        } else {
+            Pair(kernelVariableName, biasVariableName)
+        }
+    }
+
+    private fun createConv2DVariables(
+        tf: Ops,
+        kernelVariableName: String,
+        biasVariableName: String,
+        kGraph: KGraph
+    ) {
+        kernel = tf.withName(kernelVariableName).variable(kernelShape, getDType())
+        if (useBiasInternal) bias = tf.withName(biasVariableName).variable(biasShape, getDType())
+
+        kernel = addWeight(tf, kGraph, kernelVariableName, kernel, kernelInitializerInternal)
+        if (useBiasInternal) bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializerInternal)
+    }
+}
+
+private fun assertArraySize(array: LongArray, size: Int, name: String) {
+    if (array.size != size) {
+        throw IllegalArgumentException("$name is expected to have size equal $size")
     }
 }
diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/ConvPadding.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/ConvPadding.kt
@@ -8,16 +8,16 @@ package org.jetbrains.kotlinx.dl.api.core.layer.convolutional
 /**
  * Type of padding.
  */
-public enum class ConvPadding {
+public enum class ConvPadding(internal val paddingName: String) {
     /**
      * Results in padding evenly to the left/right or up/down of the input such that output has the same
      * height/width dimension as the input.
      */
-    SAME,
+    SAME("SAME"),
 
-    /** No padding. */
-    VALID,
+    /** No padding. Results in smaller output size for `kernelSize > 1` */
+    VALID("VALID"),
 
     /** Full padding. For Keras compatibility goals. */
-    FULL
+    FULL("FULL");
 }
diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/DepthwiseConv2D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/DepthwiseConv2D.kt
@@ -137,19 +137,14 @@ public class DepthwiseConv2D(
         isTraining: Operand<Boolean>,
         numberOfLosses: Operand<Float>?
     ): Operand<Float> {
-        val tfPadding = when (padding) {
-            ConvPadding.SAME -> "SAME"
-            ConvPadding.VALID -> "VALID"
-            ConvPadding.FULL -> "FULL"
-        }
-
+        val paddingName = padding.paddingName
         val options: DepthwiseConv2dNative.Options = dilations(dilations.toList()).dataFormat("NHWC")
         var output: Operand<Float> =
             tf.nn.depthwiseConv2dNative(
                 input,
                 depthwiseKernel,
                 strides.toMutableList(),
-                tfPadding,
+                paddingName,
                 options
             )
 
@@ -175,11 +170,7 @@ public class DepthwiseConv2D(
     override val hasActivation: Boolean get() = true
 
     override val paramCount: Int
-        get() = (numElementsInShape(shapeToLongArray(depthwiseKernelShape)) + numElementsInShape(
-            shapeToLongArray(
-                biasShape
-            )
-        )).toInt()
+        get() = (depthwiseKernelShape.numElements() + biasShape.numElements()).toInt()
 
     override fun toString(): String {
         return "DepthwiseConv2D(kernelSize=${kernelSize.contentToString()}, strides=${strides.contentToString()}, dilations=${dilations.contentToString()}, activation=$activation, depthMultiplier=$depthMultiplier, depthwiseInitializer=$depthwiseInitializer, biasInitializer=$biasInitializer, padding=$padding, useBias=$useBias, depthwiseKernel=$depthwiseKernel, bias=$bias, biasShape=$biasShape, depthwiseKernelShape=$depthwiseKernelShape)"