Skip to content

Commit

Permalink
Add Conv1D impementation (#67)
Browse files Browse the repository at this point in the history
* fix conv2d layer test

* create conv1d with code refactor

* naming fixes

* add docs version

* fixes for PR requests

* lint fixes

* add model loader and saver from keras for conv1d
  • Loading branch information
avan1235 authored Jun 2, 2021
1 parent 7e8c7d9 commit 54cd45c
Show file tree
Hide file tree
Showing 16 changed files with 523 additions and 233 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* Copyright 2021 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved.
* Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file.
*/

package org.jetbrains.kotlinx.dl.api.core.layer.convolutional

import org.jetbrains.kotlinx.dl.api.core.activation.Activations
import org.jetbrains.kotlinx.dl.api.core.initializer.HeNormal
import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform
import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
import org.tensorflow.Operand
import org.tensorflow.op.Ops
import org.tensorflow.op.core.Squeeze

private const val KERNEL_VARIABLE_NAME = "conv1d_kernel"

private const val BIAS_VARIABLE_NAME = "conv1d_bias"

private const val EXTRA_DIM = 1L

/**
* 1D convolution layer (e.g. convolution over audio data).
*
* This layer creates a convolution kernel that is convolved (actually cross-correlated)
* with the layer input to produce a tensor of outputs.
* Finally, the `activation` is applied to the outputs as well.
*
* It expects input data of size `(N, L, C)` where
* ```
* N - batch size
* L - length of signal sequence
* C - number of channels
* ```
*
* @property [filters] The dimensionality of the output space (i.e. the number of filters in the convolution).
* @property [kernelSize] Long number, specifying the width of the 1D convolution window.
* @property [strides] Three numbers specifying stride of the pooling
* operation for each dimension of input tensor.
* NOTE: Specifying stride value != 1 is incompatible with specifying `dilation` value != 1.
* @property [dilations] Three numbers specifying the dilation rate to use for
* dilated convolution sequence dimensions of input tensor.
* @property [activation] Activation function.
* @property [kernelInitializer] An initializer for the convolution kernel
* @property [biasInitializer] An initializer for the bias vector.
* @property [padding] The padding method, either 'valid' or 'same' or 'full'.
* @property [name] Custom layer name.
* @property [useBias] If true the layer uses a bias vector.
* @constructor Creates [Conv1D] object.
*
* @since 0.3
*/
public class Conv1D(
public val filters: Long = 32,
public val kernelSize: Long = 3,
public val strides: LongArray = longArrayOf(1, 1, 1),
public val dilations: LongArray = longArrayOf(1, 1, 1),
public val activation: Activations = Activations.Relu,
public val kernelInitializer: Initializer = HeNormal(),
public val biasInitializer: Initializer = HeUniform(),
public val padding: ConvPadding = ConvPadding.SAME,
public val useBias: Boolean = true,
name: String = "",
) : Conv2DImpl(
filtersInternal = filters,
kernelSizeInternal = longArrayOf(1, kernelSize),
stridesInternal = longArrayOf(strides[0], 1, strides[1], strides[2]),
dilationsInternal = longArrayOf(dilations[0], 1, dilations[1], dilations[2]),
activationInternal = activation,
kernelInitializerInternal = kernelInitializer,
biasInitializerInternal = biasInitializer,
paddingInternal = padding,
useBiasInternal = useBias,
kernelVariableName = KERNEL_VARIABLE_NAME,
biasVariableName = BIAS_VARIABLE_NAME,
name = name
) {
private val squeezeAxis = Squeeze.axis(listOf(EXTRA_DIM))

override fun forward(
tf: Ops,
input: Operand<Float>,
isTraining: Operand<Boolean>,
numberOfLosses: Operand<Float>?
): Operand<Float> {
val reshapedInput = tf.expandDims(input, tf.constant(EXTRA_DIM))
val result = super.forward(tf, reshapedInput, isTraining, numberOfLosses)
return tf.squeeze(result, squeezeAxis)
}

override fun toString(): String {
return "Conv2D(filters=$filters, kernelSize=$kernelSize, strides=$strides, " +
"dilation=$dilations, activation=$activation, kernelInitializer=$kernelInitializer, " +
"biasInitializer=$biasInitializer, kernelShape=$kernelShape, biasShape=$biasShape, padding=$padding)"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,32 @@ import org.tensorflow.op.Ops
import org.tensorflow.op.core.Variable
import org.tensorflow.op.nn.Conv2d
import org.tensorflow.op.nn.Conv2d.dilations
import java.lang.IllegalArgumentException
import kotlin.math.roundToInt

private const val KERNEL_VARIABLE_NAME = "conv2d_kernel"

private const val BIAS_VARIABLE_NAME = "conv2d_bias"

/**
* 2D convolution layer (e.g. spatial convolution over images).
*
* This layer creates a convolution kernel that is convolved (actually cross-correlated)
* with the layer input to produce a tensor of outputs.
* Finally, if `activation` is applied to the outputs as well.
* Finally, the `activation` is applied to the outputs as well.
*
* It expects input data of size `(N, H, W, C)` where
* ```
* N - batch size
* H - height
* W - width
* C - number of channels
* ```
*
* @property [filters] The dimensionality of the output space (i.e. the number of filters in the convolution).
* @property [kernelSize] Two long numbers, specifying the height and width of the 2D convolution window.
* @property [strides] Strides of the pooling operation for each dimension of input tensor.
* NOTE: Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1.
* NOTE: Specifying any stride value != 1 is incompatible with specifying any `dilations` value != 1.
* @property [dilations] Four numbers, specifying the dilation rate to use for dilated convolution for each dimension of input tensor.
* @property [activation] Activation function.
* @property [kernelInitializer] An initializer for the convolution kernel
Expand All @@ -57,69 +67,89 @@ public class Conv2D(
public val padding: ConvPadding = ConvPadding.SAME,
public val useBias: Boolean = true,
name: String = ""
) : Conv2DImpl(
filtersInternal = filters,
kernelSizeInternal = kernelSize,
stridesInternal = strides,
dilationsInternal = dilations,
activationInternal = activation,
kernelInitializerInternal = kernelInitializer,
biasInitializerInternal = biasInitializer,
paddingInternal = padding,
useBiasInternal = useBias,
kernelVariableName = KERNEL_VARIABLE_NAME,
biasVariableName = BIAS_VARIABLE_NAME,
name = name
) {
init {
assertArraySize(kernelSize, 2, "kernelSize")
assertArraySize(strides, 4, "strides")
assertArraySize(dilations, 4, "dilations")
}

override fun toString(): String {
return "Conv2D(filters=$filters, kernelSize=${kernelSize.contentToString()}, strides=${strides.contentToString()}, " +
"dilations=${dilations.contentToString()}, activation=$activation, kernelInitializer=$kernelInitializer, " +
"biasInitializer=$biasInitializer, kernelShape=$kernelShape, biasShape=$biasShape, padding=$padding)"
}
}

public abstract class Conv2DImpl(
private val filtersInternal: Long,
private val kernelSizeInternal: LongArray,
private val stridesInternal: LongArray,
private val dilationsInternal: LongArray,
private val activationInternal: Activations,
private val kernelInitializerInternal: Initializer,
private val biasInitializerInternal: Initializer,
private val paddingInternal: ConvPadding,
private val useBiasInternal: Boolean,
private val kernelVariableName: String,
private val biasVariableName: String,
name: String = ""
) : Layer(name) {
// weight tensors
private lateinit var kernel: Variable<Float>
private var bias: Variable<Float>? = null

// weight tensor shapes
private lateinit var biasShape: Shape
private lateinit var kernelShape: Shape
protected lateinit var kernelShape: Shape
protected lateinit var biasShape: Shape

override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape) {
// Amount of channels should be the last value in the inputShape (make warning here)
val lastElement = inputShape.size(inputShape.numDimensions() - 1)

// Compute shapes of kernel and bias matrices
kernelShape = shapeFromDims(*kernelSize, lastElement, filters)
biasShape = Shape.make(filters)
kernelShape = shapeFromDims(*kernelSizeInternal, lastElement, filtersInternal)
biasShape = Shape.make(filtersInternal)

// should be calculated before addWeight because it's used in calculation, need to rewrite addWEight to avoid strange behaviour
// calculate fanIn, fanOut
// should be calculated before addWeight because it's used in calculation,
// need to rewrite addWeight to avoid strange behaviour calculate fanIn, fanOut
val inputDepth = lastElement // amount of channels
val outputDepth = filters // amount of channels for the next layer
val outputDepth = filtersInternal // amount of channels for the next layer

fanIn = (inputDepth * kernelSize[0] * kernelSize[1]).toInt()
fanOut = ((outputDepth * kernelSize[0] * kernelSize[1] / (strides[0].toDouble() * strides[1])).roundToInt())
fanIn = (inputDepth * kernelSizeInternal[0] * kernelSizeInternal[1]).toInt()
fanOut = ((outputDepth * kernelSizeInternal[0] * kernelSizeInternal[1] /
(stridesInternal[0].toDouble() * stridesInternal[1])).roundToInt())

val (kernelVariableName, biasVariableName) = defineVariableNames()
createConv2DVariables(tf, kernelVariableName, biasVariableName, kGraph)
}

private fun defineVariableNames(): Pair<String, String> {
return if (name.isNotEmpty()) {
Pair(conv2dKernelVarName(name), conv2dBiasVarName(name))
} else {
Pair(KERNEL_VARIABLE_NAME, BIAS_VARIABLE_NAME)
}
}

private fun createConv2DVariables(
tf: Ops,
kernelVariableName: String,
biasVariableName: String,
kGraph: KGraph
) {
kernel = tf.withName(kernelVariableName).variable(kernelShape, getDType())
if (useBias) bias = tf.withName(biasVariableName).variable(biasShape, getDType())

kernel = addWeight(tf, kGraph, kernelVariableName, kernel, kernelInitializer)
if (useBias) bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializer)
}

override fun computeOutputShape(inputShape: Shape): Shape {
var rows = inputShape.size(1)
var cols = inputShape.size(2)
rows = convOutputLength(
rows, kernelSize[0].toInt(), padding,
strides[1].toInt(), dilations[1].toInt()
rows, kernelSizeInternal[0].toInt(), paddingInternal,
stridesInternal[1].toInt(), dilationsInternal[1].toInt()
)
cols = convOutputLength(
cols, kernelSize[1].toInt(), padding,
strides[2].toInt(), dilations[2].toInt()
cols, kernelSizeInternal[1].toInt(), paddingInternal,
stridesInternal[2].toInt(), dilationsInternal[2].toInt()
)

val shape = Shape.make(inputShape.size(0), rows, cols, filters)
val shape = Shape.make(inputShape.size(0), rows, cols, filtersInternal)
outputShape = TensorShape(shape)
return shape
}
Expand All @@ -130,26 +160,15 @@ public class Conv2D(
isTraining: Operand<Boolean>,
numberOfLosses: Operand<Float>?
): Operand<Float> {
val tfPadding = when (padding) {
ConvPadding.SAME -> "SAME"
ConvPadding.VALID -> "VALID"
ConvPadding.FULL -> "FULL"
}
val paddingName = paddingInternal.paddingName
val options: Conv2d.Options = dilations(dilationsInternal.toList()).dataFormat("NHWC")
var output: Operand<Float> = tf.nn.conv2d(input, kernel, stridesInternal.toMutableList(), paddingName, options)

val options: Conv2d.Options = dilations(dilations.toList()).dataFormat("NHWC")
var output: Operand<Float> = tf.nn.conv2d(input, kernel, strides.toMutableList(), tfPadding, options)

if (useBias) {
if (useBiasInternal) {
output = tf.nn.biasAdd(output, bias)
}

return Activations.convert(activation).apply(tf, output, name)
}

override val weights: Map<String, Array<*>> get() = extractConv2DWeights()

private fun extractConv2DWeights(): Map<String, Array<*>> {
return extractWeights(defineVariableNames().toList())
return Activations.convert(activationInternal).apply(tf, output, name)
}

/** Returns the shape of kernel weights. */
Expand All @@ -158,12 +177,41 @@ public class Conv2D(
/** Returns the shape of bias weights. */
public val biasShapeArray: LongArray get() = TensorShape(biasShape).dims()

override val weights: Map<String, Array<*>> get() = extractConv2DWeights()

override val hasActivation: Boolean get() = true

override val paramCount: Int
get() = (numElementsInShape(shapeToLongArray(kernelShape)) + numElementsInShape(shapeToLongArray(biasShape))).toInt()
get() = (kernelShape.numElements() + biasShape.numElements()).toInt()

override fun toString(): String {
return "Conv2D(filters=$filters, kernelSize=${kernelSize.contentToString()}, strides=${strides.contentToString()}, dilations=${dilations.contentToString()}, activation=$activation, kernelInitializer=$kernelInitializer, biasInitializer=$biasInitializer, kernelShape=$kernelShape, padding=$padding)"
private fun extractConv2DWeights(): Map<String, Array<*>> {
return extractWeights(defineVariableNames().toList())
}

private fun defineVariableNames(): Pair<String, String> {
return if (name.isNotEmpty()) {
Pair(conv2dKernelVarName(name), conv2dBiasVarName(name))
} else {
Pair(kernelVariableName, biasVariableName)
}
}

private fun createConv2DVariables(
tf: Ops,
kernelVariableName: String,
biasVariableName: String,
kGraph: KGraph
) {
kernel = tf.withName(kernelVariableName).variable(kernelShape, getDType())
if (useBiasInternal) bias = tf.withName(biasVariableName).variable(biasShape, getDType())

kernel = addWeight(tf, kGraph, kernelVariableName, kernel, kernelInitializerInternal)
if (useBiasInternal) bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializerInternal)
}
}

private fun assertArraySize(array: LongArray, size: Int, name: String) {
if (array.size != size) {
throw IllegalArgumentException("$name is expected to have size equal $size")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@ package org.jetbrains.kotlinx.dl.api.core.layer.convolutional
/**
* Type of padding.
*/
public enum class ConvPadding {
public enum class ConvPadding(internal val paddingName: String) {
/**
* Results in padding evenly to the left/right or up/down of the input such that output has the same
* height/width dimension as the input.
*/
SAME,
SAME("SAME"),

/** No padding. */
VALID,
/** No padding. Results in smaller output size for `kernelSize > 1` */
VALID("VALID"),

/** Full padding. For Keras compatibility goals. */
FULL
FULL("FULL");
}
Original file line number Diff line number Diff line change
Expand Up @@ -137,19 +137,14 @@ public class DepthwiseConv2D(
isTraining: Operand<Boolean>,
numberOfLosses: Operand<Float>?
): Operand<Float> {
val tfPadding = when (padding) {
ConvPadding.SAME -> "SAME"
ConvPadding.VALID -> "VALID"
ConvPadding.FULL -> "FULL"
}

val paddingName = padding.paddingName
val options: DepthwiseConv2dNative.Options = dilations(dilations.toList()).dataFormat("NHWC")
var output: Operand<Float> =
tf.nn.depthwiseConv2dNative(
input,
depthwiseKernel,
strides.toMutableList(),
tfPadding,
paddingName,
options
)

Expand All @@ -175,11 +170,7 @@ public class DepthwiseConv2D(
override val hasActivation: Boolean get() = true

override val paramCount: Int
get() = (numElementsInShape(shapeToLongArray(depthwiseKernelShape)) + numElementsInShape(
shapeToLongArray(
biasShape
)
)).toInt()
get() = (depthwiseKernelShape.numElements() + biasShape.numElements()).toInt()

override fun toString(): String {
return "DepthwiseConv2D(kernelSize=${kernelSize.contentToString()}, strides=${strides.contentToString()}, dilations=${dilations.contentToString()}, activation=$activation, depthMultiplier=$depthMultiplier, depthwiseInitializer=$depthwiseInitializer, biasInitializer=$biasInitializer, padding=$padding, useBias=$useBias, depthwiseKernel=$depthwiseKernel, bias=$bias, biasShape=$biasShape, depthwiseKernelShape=$depthwiseKernelShape)"
Expand Down
Loading

0 comments on commit 54cd45c

Please sign in to comment.