Skip to content

Commit

Permalink
Add transposed convolution layers
Browse files Browse the repository at this point in the history
  • Loading branch information
juliabeliaeva committed Jan 18, 2022
1 parent 6b1c592 commit 1d5492e
Show file tree
Hide file tree
Showing 22 changed files with 1,386 additions and 74 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -159,4 +159,12 @@ internal fun IntArray.toLongArray(): LongArray {
1 -> longArrayOf(this[0].toLong())
else -> LongArray(size) { this[it].toLong() }
}
}

internal fun LongArray.toIntArray(): IntArray {
return when (size) {
0 -> intArrayOf()
1 -> intArrayOf(this[0].toInt())
else -> IntArray(size) { this[it].toInt() }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public abstract class AbstractConv(
protected abstract val biasRegularizer: Regularizer?
protected abstract val activityRegularizer: Regularizer?
protected abstract val padding: ConvPadding
protected abstract val useBias: Boolean
internal abstract val useBias: Boolean

/** Tensor with kernel weights */
protected lateinit var kernel: KVariable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
import org.jetbrains.kotlinx.dl.api.core.layer.requireArraySize
import org.jetbrains.kotlinx.dl.api.core.layer.toLongList
import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
import org.jetbrains.kotlinx.dl.api.core.shape.TensorShape
import org.jetbrains.kotlinx.dl.api.core.shape.convOutputLength
import org.jetbrains.kotlinx.dl.api.core.util.convBiasVarName
import org.jetbrains.kotlinx.dl.api.core.util.convKernelVarName
Expand All @@ -21,8 +22,6 @@ import org.tensorflow.op.Ops
import org.tensorflow.op.core.Squeeze
import org.tensorflow.op.nn.Conv2d

private const val EXTRA_DIM = 1L

/**
* 1D convolution layer (e.g. convolution over audio data).
*
Expand Down Expand Up @@ -79,10 +78,6 @@ public class Conv1D(

override val kernelSize: IntArray = intArrayOf(kernelLength)

/** Axis of height for which the extra dimension is added (unsqueezed) before actual
* convolution operation and the output from actual implementation are squeezed. */
private val squeezeAxis = Squeeze.axis(listOf(EXTRA_DIM))

override fun kernelVarName(name: String): String = convKernelVarName(name, dim = 1)

override fun biasVarName(name: String): String = convBiasVarName(name, dim = 1)
Expand All @@ -91,15 +86,13 @@ public class Conv1D(
tf: Ops,
input: Operand<Float>
): Operand<Float> {
val expandedInput = tf.expandDims(input, tf.constant(EXTRA_DIM))
val expandedKernel = tf.expandDims(kernel.variable, tf.constant(EXTRA_DIM - 1))
val expandedStrides = intArrayOf(strides[0], 1, strides[1], strides[2])
val expandedDilations = intArrayOf(dilations[0], 1, dilations[1], dilations[2])
val options = Conv2d.dilations(expandedDilations.toLongList()).dataFormat("NHWC")
val result = tf.nn.conv2d(
expandedInput, expandedKernel, expandedStrides.toLongList(), padding.paddingName, options
)
return tf.squeeze(result, squeezeAxis)
return tf.withExpandedDimensions(input) { expandedInput ->
val options = Conv2d.dilations(expand(dilations).toLongList()).dataFormat("NHWC")
return@withExpandedDimensions tf.nn.conv2d(
expandedInput, tf.expandKernel(kernel.variable), expand(strides).toLongList(),
padding.paddingName, options
)
}
}

protected override fun defineOutputShape(inputShape: Shape): Shape {
Expand All @@ -122,4 +115,53 @@ public class Conv1D(
"dilation=${dilations.contentToString()}, activation=$activation, kernelInitializer=$kernelInitializer, " +
"biasInitializer=$biasInitializer, kernelShape=${kernel.shape}, biasShape=${bias?.shape}, padding=$padding, " +
"biasRegularizer=$biasRegularizer, kernelRegularizer=$kernelRegularizer, activityRegularizer=$activityRegularizer)"

internal companion object {
internal const val EXTRA_DIM = 1

/** Axis of height for which the extra dimension is added (unsqueezed) before actual
* convolution operation and the output from actual implementation are squeezed. */
private val squeezeAxis = Squeeze.axis(listOf(EXTRA_DIM.toLong()))

internal fun expandKernel(kernel: IntArray): IntArray {
return kernel.withAdded(EXTRA_DIM - 1, 1)
}

internal fun Ops.expandKernel(kernel: Operand<Float>): Operand<Float> {
return expandDims(kernel, constant(EXTRA_DIM - 1))
}

internal fun TensorShape.expand(): TensorShape {
return TensorShape(dims().withAdded(EXTRA_DIM, 1))
}

internal fun expand(array: IntArray): IntArray {
return array.withAdded(EXTRA_DIM, 1)
}

/**
* Adds an extra dimension to the input, performs the provided operation
* and squeezes the result by removing the dimension added previously.
* This allows to perform 2D operations on 1D inputs.
*/
internal fun Ops.withExpandedDimensions(input: Operand<Float>,
operation: (Operand<Float>) -> Operand<Float>
): Operand<Float> {
val expandedInput = expandDims(input, constant(EXTRA_DIM))
val expandedOutput = operation(expandedInput)
return squeeze(expandedOutput, squeezeAxis)
}

internal fun LongArray.withAdded(position: Int, element: Long): LongArray {
return toMutableList().apply { add(position, element) }.toLongArray()
}

internal fun IntArray.withAdded(position: Int, element: Int): IntArray {
return toMutableList().apply { add(position, element) }.toIntArray()
}

internal fun IntArray.withAdded(position: Int, elements: List<Int>): IntArray {
return toMutableList().apply { addAll(position, elements) }.toIntArray()
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
* Copyright 2021-2022 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved.
* Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file.
*/

package org.jetbrains.kotlinx.dl.api.core.layer.convolutional

import org.jetbrains.kotlinx.dl.api.core.activation.Activations
import org.jetbrains.kotlinx.dl.api.core.initializer.HeNormal
import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform
import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
import org.jetbrains.kotlinx.dl.api.core.layer.NoGradients
import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.Conv1D.Companion.EXTRA_DIM
import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.Conv1D.Companion.expand
import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.Conv1D.Companion.expandKernel
import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.Conv1D.Companion.withAdded
import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.Conv1D.Companion.withExpandedDimensions
import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.Conv2DTranspose.Companion.withStandardPadding
import org.jetbrains.kotlinx.dl.api.core.layer.requireArraySize
import org.jetbrains.kotlinx.dl.api.core.layer.toLongList
import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
import org.tensorflow.Operand
import org.tensorflow.op.Ops

/**
* 1D convolution transpose layer.
*
* This is an operation going in the opposite direction of a normal convolution:
* it transforms a tensor shaped like an output of some convolution into tensor that has the shape of the input.
*
* This layer expects input data of size `(N, L, C)` where
* ```
* N - batch size
* L - length of signal sequence
* C - number of channels
* ```
*
* Note: dilation values greater than 1 are not supported on cpu
* (see https://github.com/tensorflow/tensorflow/issues/28264).
*
* @property [filters] dimensionality of the output space (i.e. the number of filters in the convolution)
* @property [kernelLength] size of the convolutional kernel (one number)
* @property [strides] strides of the convolution for each dimension of the input tensor (three numbers)
* @property [dilations] dilations of the convolution for each dimension of the input tensor (three numbers).
* Currently, dilation values greater than 1 are not supported on cpu.
* @property [activation] activation function
* @property [kernelInitializer] initializer for the kernel
* @property [biasInitializer] initializer for the bias
* @property [kernelRegularizer] regularizer for the kernel
* @property [biasRegularizer] regularizer for the bias
* @property [activityRegularizer] regularizer function applied to the output of the layer
* @property [padding] type of padding to use
* @property [outputPadding] the amount of explicit padding to use (six numbers: two for each dimension).
* @property [useBias] a flag that specifies if the bias should be used
* @param [name] custom layer name
*/
public class Conv1DTranspose(
public override val filters: Int = 3,
public val kernelLength: Int = 3,
public override val strides: IntArray = intArrayOf(1, 1, 1),
public override val dilations: IntArray = intArrayOf(1, 1, 1),
public override val activation: Activations = Activations.Relu,
public override val kernelInitializer: Initializer = HeNormal(),
public override val biasInitializer: Initializer = HeUniform(),
public override val kernelRegularizer: Regularizer? = null,
public override val biasRegularizer: Regularizer? = null,
public override val activityRegularizer: Regularizer? = null,
public override val padding: ConvPadding = ConvPadding.SAME,
public override val outputPadding: IntArray? = null,
public override val useBias: Boolean = true,
name: String = ""
) : ConvTranspose(dimensions = 1, name), NoGradients {

init {
requireArraySize(strides, dimensions + 2, "strides")
requireArraySize(dilations, dimensions + 2, "dilations")
if (outputPadding != null) requireArraySize(outputPadding, 2 * (dimensions + 2), "outputPadding")
isTrainable = false
}

override val kernelSize: IntArray = intArrayOf(kernelLength)

override fun convImplementation(tf: Ops, input: Operand<Float>): Operand<Float> {
return tf.withExpandedDimensions(input) { expandedInput ->
val expandedOutputPadding = outputPadding?.withAdded(EXTRA_DIM * 2, listOf(0, 0))
return@withExpandedDimensions tf.nn.conv2dBackpropInput(
tf.shapeWithDynamicBatchSize(outputShape.expand(), input),
tf.expandKernel(kernel.variable),
expandedInput,
expand(strides).toLongList(),
if (outputPadding != null) Conv2DTranspose.EXPLICIT else padding.paddingName,
*Conv2DTranspose.buildOptions(
expand(dilations),
expandedOutputPadding?.withStandardPadding(
padding,
expandKernel(kernelSize),
expand(dilations)
)
)
)
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/*
* Copyright 2021-2022 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved.
* Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file.
*/

package org.jetbrains.kotlinx.dl.api.core.layer.convolutional

import org.jetbrains.kotlinx.dl.api.core.activation.Activations
import org.jetbrains.kotlinx.dl.api.core.initializer.HeNormal
import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform
import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
import org.jetbrains.kotlinx.dl.api.core.layer.NoGradients
import org.jetbrains.kotlinx.dl.api.core.layer.requireArraySize
import org.jetbrains.kotlinx.dl.api.core.layer.toLongList
import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
import org.jetbrains.kotlinx.dl.api.core.shape.convTransposeSingleSidePadding
import org.tensorflow.Operand
import org.tensorflow.op.Ops
import org.tensorflow.op.nn.Conv2dBackpropInput

/**
* 2D convolution transpose layer.
*
* This is an operation going in the opposite direction of a normal convolution:
* it transforms a tensor shaped like an output of some convolution into tensor that has the shape of the input.
*
* This layer expects input data of size `(N, H, W, C)` where
* ```
* N - batch size
* H - height
* W - width
* C - number of channels
* ```
*
* Note: dilation values greater than 1 are not supported on cpu
* (see https://github.com/tensorflow/tensorflow/issues/28264).
*
* @property [filters] dimensionality of the output space (i.e. the number of filters in the convolution)
* @property [kernelSize] size of the convolutional kernel (two numbers)
* @property [strides] strides of the convolution for each dimension of the input tensor (four numbers)
* @property [dilations] dilations of the convolution for each dimension of the input tensor (four numbers).
* Currently, dilation values greater than 1 are not supported on cpu.
* @property [activation] activation function
* @property [kernelInitializer] initializer for the kernel
* @property [biasInitializer] initializer for the bias
* @property [kernelRegularizer] regularizer for the kernel
* @property [biasRegularizer] regularizer for the bias
* @property [activityRegularizer] regularizer function applied to the output of the layer
* @property [padding] type of padding to use
* @property [outputPadding] the amount of explicit padding to use (eight numbers: two for each dimension).
* @property [useBias] a flag that specifies if the bias should be used
* @param [name] custom layer name
*/
public class Conv2DTranspose(
public override val filters: Int = 3,
public override val kernelSize: IntArray = intArrayOf(3, 3),
public override val strides: IntArray = intArrayOf(1, 1, 1, 1),
public override val dilations: IntArray = intArrayOf(1, 1, 1, 1),
public override val activation: Activations = Activations.Relu,
public override val kernelInitializer: Initializer = HeNormal(),
public override val biasInitializer: Initializer = HeUniform(),
public override val kernelRegularizer: Regularizer? = null,
public override val biasRegularizer: Regularizer? = null,
public override val activityRegularizer: Regularizer? = null,
public override val padding: ConvPadding = ConvPadding.SAME,
public override val outputPadding: IntArray? = null,
public override val useBias: Boolean = true,
name: String = ""
) : ConvTranspose(dimensions = 2, name), NoGradients {

init {
requireArraySize(kernelSize, dimensions, "kernelSize")
requireArraySize(strides, dimensions + 2, "strides")
requireArraySize(dilations, dimensions + 2, "dilations")
if (outputPadding != null) requireArraySize(outputPadding, 2 * (dimensions + 2), "outputPadding")
isTrainable = false
}

override fun convImplementation(tf: Ops, input: Operand<Float>): Operand<Float> {
return tf.nn.conv2dBackpropInput(
tf.shapeWithDynamicBatchSize(outputShape, input),
kernel.variable,
input,
strides.toLongList(),
if (outputPadding != null) EXPLICIT else padding.paddingName,
*buildOptions(
dilations,
outputPadding?.withStandardPadding(
padding,
kernelSize,
dilations
)
)
)
}

internal companion object {
internal const val EXPLICIT = "EXPLICIT"

/**
* Combines explicitly provided padding value with the standard padding from the provided padding method.
* This is needed since [org.tensorflow.op.NnOps.conv2dBackpropInput] function does not support specifying
* both padding method and explicit output padding at the same time.
*/
internal fun IntArray.withStandardPadding(padding: ConvPadding,
kernelSize: IntArray,
dilations: IntArray
): IntArray {
val withStandardPadding = kernelSize.indices.flatMap { dim ->
listOf(
convTransposeSingleSidePadding(padding, this[2 * dim], kernelSize[dim], dilations[dim + 1]),
convTransposeSingleSidePadding(padding, this[2 * dim + 1], kernelSize[dim], dilations[dim + 1])
)
}
return intArrayOf(0, 0, *(withStandardPadding.toIntArray()), 0, 0)
}

internal fun buildOptions(dilations: IntArray, outputPadding: IntArray?): Array<Conv2dBackpropInput.Options> {
val options = mutableListOf(Conv2dBackpropInput.dilations(dilations.toLongList()))
if (outputPadding != null) {
options.add(Conv2dBackpropInput.explicitPaddings(outputPadding.toLongList()))
}
return options.map { it.dataFormat("NHWC") }.toTypedArray()
}
}
}
Loading

0 comments on commit 1d5492e

Please sign in to comment.