diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/Layer.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/Layer.kt index bb9b0d133..d429fae4d 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/Layer.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/Layer.kt @@ -15,6 +15,7 @@ import org.tensorflow.Operand import org.tensorflow.Shape import org.tensorflow.op.Ops import org.tensorflow.op.core.Variable +import java.lang.IllegalArgumentException /** * Base abstract class for all layers. @@ -171,3 +172,8 @@ public abstract class Layer(public var name: String) { /** Returns amount of neurons. */ public abstract val paramCount: Int } + +internal fun requireArraySize(array: LongArray, size: Int, name: String) = + require (array.size == size) { + "$name is expected to have size equal $size but got ${array.size}" + } diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/AbstractConv.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/AbstractConv.kt new file mode 100644 index 000000000..14fd53f72 --- /dev/null +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/AbstractConv.kt @@ -0,0 +1,142 @@ +/* + * Copyright 2021 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved. + * Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file. + */ + +package org.jetbrains.kotlinx.dl.api.core.layer.convolutional + +import org.jetbrains.kotlinx.dl.api.core.KGraph +import org.jetbrains.kotlinx.dl.api.core.activation.Activations +import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer +import org.jetbrains.kotlinx.dl.api.core.layer.Layer +import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer +import org.jetbrains.kotlinx.dl.api.core.shape.* +import org.jetbrains.kotlinx.dl.api.core.util.getDType +import org.tensorflow.Operand +import org.tensorflow.Shape +import org.tensorflow.op.Ops +import org.tensorflow.op.core.Variable +import java.lang.IllegalArgumentException +import kotlin.math.roundToInt + +public abstract class AbstractConv( + protected val filtersInternal: Long, + protected val kernelSizeInternal: LongArray, + protected val stridesInternal: LongArray, + protected val dilationsInternal: LongArray, + protected val activationInternal: Activations, + protected val kernelInitializerInternal: Initializer, + protected val biasInitializerInternal: Initializer, + protected val kernelRegularizerInternal: Regularizer?, + protected val biasRegularizerInternal: Regularizer?, + protected val activityRegularizerInternal: Regularizer?, + protected val paddingInternal: ConvPadding, + protected val useBiasInternal: Boolean, + protected val kernelVariableName: String, + protected val biasVariableName: String, + name: String +) : Layer(name) { + // weight tensors + protected lateinit var kernel: Variable + protected var bias: Variable? = null + + // weight tensor shapes + protected lateinit var kernelShape: Shape + protected lateinit var biasShape: Shape + + override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape) { + // Amount of channels should be the last value in the inputShape + val numberOfChannels = inputShape.size(inputShape.numDimensions() - 1) + + // Compute shapes of kernel and bias matrices + computeMatricesShapes(numberOfChannels) + + // should be calculated before addWeight because it's used in calculation, + // need to rewrite addWeight to avoid strange behaviour calculate fanIn, fanOut + val inputDepth = getInputDepth(numberOfChannels) // number of input channels + val outputDepth = getOutputDepth(numberOfChannels) // number of output channels + + fanIn = (inputDepth * multiply(*kernelSizeInternal)).toInt() + fanOut = ((outputDepth * multiply(*kernelSizeInternal)).toDouble() / + multiply(*stridesInternal).toDouble()).roundToInt() + + val (kernelVariableName, biasVariableName) = defineVariableNames() + createConvVariables(tf, kernelVariableName, biasVariableName, kGraph) + } + + override fun computeOutputShape(inputShape: Shape): Shape { + val shape = defineOutputShape(inputShape) + outputShape = TensorShape(shape) + return shape + } + + override fun forward( + tf: Ops, + input: Operand, + isTraining: Operand, + numberOfLosses: Operand? + ): Operand { + var output = convImplementation(tf, input) + + if (useBiasInternal) { + output = tf.nn.biasAdd(output, bias) + } + + return Activations.convert(activationInternal).apply(tf, output, name) + } + + /** Returns the shape of kernel weights. */ + public val kernelShapeArray: LongArray get() = TensorShape(kernelShape).dims() + + /** Returns the shape of bias weights. */ + public val biasShapeArray: LongArray get() = TensorShape(biasShape).dims() + + override var weights: Map> + get() = extractConvWeights() + set(value) = assignWeights(value) + + override val hasActivation: Boolean get() = true + + override val paramCount: Int + get() = (kernelShape.numElements() + biasShape.numElements()).toInt() + + private fun extractConvWeights(): Map> = extractWeights(defineVariableNames().toList()) + + private fun defineVariableNames(): Pair = if (name.isNotEmpty()) { + Pair(kernelVarName(name), biasVarName(name)) + } else { + Pair(kernelVariableName, biasVariableName) + } + + private fun createConvVariables( + tf: Ops, + kernelVariableName: String, + biasVariableName: String, + kGraph: KGraph + ) { + kernel = tf.withName(kernelVariableName).variable(kernelShape, getDType()) + if (useBiasInternal) bias = tf.withName(biasVariableName).variable(biasShape, getDType()) + + kernel = addWeight(tf, kGraph, kernelVariableName, kernel, kernelInitializerInternal, kernelRegularizerInternal) + if (useBiasInternal) bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializerInternal, biasRegularizerInternal) + } + + protected open fun getInputDepth(numberOfChannels: Long): Long = numberOfChannels + + protected open fun getOutputDepth(numberOfChannels: Long): Long = filtersInternal + + protected open fun computeMatricesShapes(numberOfChannels: Long) { + kernelShape = shapeFromDims(*kernelSizeInternal, numberOfChannels, filtersInternal) + biasShape = Shape.make(filtersInternal) + } + + protected abstract fun kernelVarName(name: String): String + + protected abstract fun biasVarName(name: String): String + + protected abstract fun convImplementation(tf: Ops, input: Operand): Operand + + protected abstract fun defineOutputShape(inputShape: Shape): Shape +} + +private fun multiply(vararg values: Long) = values.fold(1L, Long::times) diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv1D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv1D.kt index 383dde48f..fbe16dbed 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv1D.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv1D.kt @@ -9,10 +9,16 @@ import org.jetbrains.kotlinx.dl.api.core.activation.Activations import org.jetbrains.kotlinx.dl.api.core.initializer.HeNormal import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer +import org.jetbrains.kotlinx.dl.api.core.layer.requireArraySize import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer +import org.jetbrains.kotlinx.dl.api.core.shape.convOutputLength +import org.jetbrains.kotlinx.dl.api.core.util.convBiasVarName +import org.jetbrains.kotlinx.dl.api.core.util.convKernelVarName import org.tensorflow.Operand +import org.tensorflow.Shape import org.tensorflow.op.Ops import org.tensorflow.op.core.Squeeze +import org.tensorflow.op.nn.Conv2d private const val KERNEL_VARIABLE_NAME = "conv1d_kernel" @@ -36,8 +42,7 @@ private const val EXTRA_DIM = 1L * * @property [filters] The dimensionality of the output space (i.e. the number of filters in the convolution). * @property [kernelSize] Long number, specifying the width of the 1D convolution window. - * @property [strides] Three numbers specifying stride of the pooling - * operation for each dimension of input tensor. + * @property [strides] Three numbers specifying the strides of the pooling operation for each dimension of input tensor. * NOTE: Specifying stride value != 1 is incompatible with specifying `dilation` value != 1. * @property [dilations] Three numbers specifying the dilation rate to use for * dilated convolution sequence dimensions of input tensor. @@ -68,7 +73,7 @@ public class Conv1D( public val padding: ConvPadding = ConvPadding.SAME, public val useBias: Boolean = true, name: String = "", -) : Conv2DImpl( +) : AbstractConv( filtersInternal = filters, kernelSizeInternal = longArrayOf(1, kernelSize), stridesInternal = longArrayOf(strides[0], 1, strides[1], strides[2]), @@ -85,22 +90,46 @@ public class Conv1D( biasVariableName = BIAS_VARIABLE_NAME, name = name ) { + init { + requireArraySize(strides, 3, "strides") + requireArraySize(dilations, 3, "dilations") + } + private val squeezeAxis = Squeeze.axis(listOf(EXTRA_DIM)) - override fun forward( + override fun kernelVarName(name: String): String = convKernelVarName(name, dim = 1) + + override fun biasVarName(name: String): String = convBiasVarName(name, dim = 1) + + override fun convImplementation( tf: Ops, - input: Operand, - isTraining: Operand, - numberOfLosses: Operand? + input: Operand ): Operand { + val options = Conv2d.dilations(dilationsInternal.toList()).dataFormat("NHWC") val reshapedInput = tf.expandDims(input, tf.constant(EXTRA_DIM)) - val result = super.forward(tf, reshapedInput, isTraining, numberOfLosses) + val result = + tf.nn.conv2d(reshapedInput, kernel, stridesInternal.toMutableList(), paddingInternal.paddingName, options) return tf.squeeze(result, squeezeAxis) } - override fun toString(): String { - return "Conv2D(filters=$filters, kernelSize=$kernelSize, strides=$strides, " + - "dilation=$dilations, activation=$activation, kernelInitializer=$kernelInitializer, " + - "biasInitializer=$biasInitializer, kernelShape=$kernelShape, biasShape=$biasShape, padding=$padding)" + protected override fun defineOutputShape(inputShape: Shape): Shape { + val batchSize = inputShape.size(0) + val colsCount = inputShape.size(1) + + val cols = convOutputLength( + colsCount, + kernelSize.toInt(), + paddingInternal, + strides[1].toInt(), + dilations[1].toInt() + ) + + return Shape.make(batchSize, cols, filtersInternal) } + + override fun toString(): String = + "Conv1D(filters=$filters, kernelSize=$kernelSize, strides=${strides.contentToString()}, " + + "dilation=${dilations.contentToString()}, activation=$activation, kernelInitializer=$kernelInitializer, " + + "biasInitializer=$biasInitializer, kernelShape=$kernelShape, biasShape=$biasShape, padding=$padding, " + + "biasRegularizer=$biasRegularizer, kernelRegularizer=$kernelRegularizer, activityRegularizer=$activityRegularizer)" } diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv2D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv2D.kt index 02dad5863..321463512 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv2D.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv2D.kt @@ -5,27 +5,19 @@ package org.jetbrains.kotlinx.dl.api.core.layer.convolutional -import org.jetbrains.kotlinx.dl.api.core.KGraph import org.jetbrains.kotlinx.dl.api.core.activation.Activations import org.jetbrains.kotlinx.dl.api.core.initializer.HeNormal import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer -import org.jetbrains.kotlinx.dl.api.core.layer.Layer +import org.jetbrains.kotlinx.dl.api.core.layer.requireArraySize +import org.jetbrains.kotlinx.dl.api.core.util.convBiasVarName +import org.jetbrains.kotlinx.dl.api.core.util.convKernelVarName import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer -import org.jetbrains.kotlinx.dl.api.core.shape.TensorShape import org.jetbrains.kotlinx.dl.api.core.shape.convOutputLength -import org.jetbrains.kotlinx.dl.api.core.shape.numElements -import org.jetbrains.kotlinx.dl.api.core.shape.shapeFromDims -import org.jetbrains.kotlinx.dl.api.core.util.conv2dBiasVarName -import org.jetbrains.kotlinx.dl.api.core.util.conv2dKernelVarName -import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand import org.tensorflow.Shape import org.tensorflow.op.Ops -import org.tensorflow.op.core.Variable -import org.tensorflow.op.nn.Conv2d import org.tensorflow.op.nn.Conv2d.dilations -import kotlin.math.roundToInt private const val KERNEL_VARIABLE_NAME = "conv2d_kernel" @@ -48,7 +40,7 @@ private const val BIAS_VARIABLE_NAME = "conv2d_bias" * * @property [filters] The dimensionality of the output space (i.e. the number of filters in the convolution). * @property [kernelSize] Two long numbers, specifying the height and width of the 2D convolution window. - * @property [strides] Strides of the pooling operation for each dimension of input tensor. + * @property [strides] Four numbers, specifying the strides of the pooling operation for each dimension of input tensor. * NOTE: Specifying any stride value != 1 is incompatible with specifying any `dilations` value != 1. * @property [dilations] Four numbers, specifying the dilation rate to use for dilated convolution for each dimension of input tensor. * @property [activation] Activation function. @@ -76,7 +68,7 @@ public class Conv2D( public val padding: ConvPadding = ConvPadding.SAME, public val useBias: Boolean = true, name: String = "" -) : Conv2DImpl( +) : AbstractConv( filtersInternal = filters, kernelSizeInternal = kernelSize, stridesInternal = strides, @@ -94,142 +86,49 @@ public class Conv2D( name = name ) { init { - assertArraySize(kernelSize, 2, "kernelSize") - assertArraySize(strides, 4, "strides") - assertArraySize(dilations, 4, "dilations") + requireArraySize(kernelSize, 2, "kernelSize") + requireArraySize(strides, 4, "strides") + requireArraySize(dilations, 4, "dilations") } - override fun toString(): String { - return "Conv2D(filters=$filters, kernelSize=${kernelSize.contentToString()}, strides=${strides.contentToString()}, " + - "dilations=${dilations.contentToString()}, activation=$activation, kernelInitializer=$kernelInitializer, " + - "biasInitializer=$biasInitializer, kernelShape=$kernelShape, biasShape=$biasShape, padding=$padding)" - } -} - -public abstract class Conv2DImpl( - private val filtersInternal: Long, - private val kernelSizeInternal: LongArray, - private val stridesInternal: LongArray, - private val dilationsInternal: LongArray, - private val activationInternal: Activations, - private val kernelInitializerInternal: Initializer, - private val biasInitializerInternal: Initializer, - private val kernelRegularizerInternal: Regularizer? = null, - private val biasRegularizerInternal: Regularizer? = null, - private val activityRegularizerInternal: Regularizer? = null, - private val paddingInternal: ConvPadding, - private val useBiasInternal: Boolean, - private val kernelVariableName: String, - private val biasVariableName: String, - name: String = "" -) : Layer(name) { - // weight tensors - private lateinit var kernel: Variable - private var bias: Variable? = null - - // weight tensor shapes - protected lateinit var kernelShape: Shape - protected lateinit var biasShape: Shape - - override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape) { - // Amount of channels should be the last value in the inputShape (make warning here) - val lastElement = inputShape.size(inputShape.numDimensions() - 1) - - // Compute shapes of kernel and bias matrices - kernelShape = shapeFromDims(*kernelSizeInternal, lastElement, filtersInternal) - biasShape = Shape.make(filtersInternal) - - // should be calculated before addWeight because it's used in calculation, - // need to rewrite addWeight to avoid strange behaviour calculate fanIn, fanOut - val inputDepth = lastElement // amount of channels - val outputDepth = filtersInternal // amount of channels for the next layer - - fanIn = (inputDepth * kernelSizeInternal[0] * kernelSizeInternal[1]).toInt() - fanOut = ((outputDepth * kernelSizeInternal[0] * kernelSizeInternal[1] / - (stridesInternal[0].toDouble() * stridesInternal[1])).roundToInt()) - - val (kernelVariableName, biasVariableName) = defineVariableNames() - createConv2DVariables(tf, kernelVariableName, biasVariableName, kGraph) - } - - override fun computeOutputShape(inputShape: Shape): Shape { - var rows = inputShape.size(1) - var cols = inputShape.size(2) - rows = convOutputLength( - rows, kernelSizeInternal[0].toInt(), paddingInternal, - stridesInternal[1].toInt(), dilationsInternal[1].toInt() - ) - cols = convOutputLength( - cols, kernelSizeInternal[1].toInt(), paddingInternal, - stridesInternal[2].toInt(), dilationsInternal[2].toInt() - ) - - val shape = Shape.make(inputShape.size(0), rows, cols, filtersInternal) - outputShape = TensorShape(shape) - return shape - } - - override fun forward( + override fun convImplementation( tf: Ops, - input: Operand, - isTraining: Operand, - numberOfLosses: Operand? + input: Operand ): Operand { - val paddingName = paddingInternal.paddingName - val options: Conv2d.Options = dilations(dilationsInternal.toList()).dataFormat("NHWC") - var output: Operand = tf.nn.conv2d(input, kernel, stridesInternal.toMutableList(), paddingName, options) - - if (useBiasInternal) { - output = tf.nn.biasAdd(output, bias) - } - - return Activations.convert(activationInternal).apply(tf, output, name) + val options = dilations(dilationsInternal.toList()).dataFormat("NHWC") + return tf.nn.conv2d(input, kernel, stridesInternal.toMutableList(), paddingInternal.paddingName, options) } - /** Returns the shape of kernel weights. */ - public val kernelShapeArray: LongArray get() = TensorShape(kernelShape).dims() - - /** Returns the shape of bias weights. */ - public val biasShapeArray: LongArray get() = TensorShape(biasShape).dims() - - override var weights: Map> - get() = extractConv2DWeights() - set(value) = assignWeights(value) - - override val hasActivation: Boolean get() = true - - override val paramCount: Int - get() = (kernelShape.numElements() + biasShape.numElements()).toInt() - - private fun extractConv2DWeights(): Map> { - return extractWeights(defineVariableNames().toList()) - } + protected override fun defineOutputShape(inputShape: Shape): Shape { + val batchSize = inputShape.size(0) + val rowsCount = inputShape.size(1) + val colsCount = inputShape.size(2) + + val rows = convOutputLength( + rowsCount, + kernelSizeInternal[0].toInt(), + paddingInternal, + stridesInternal[1].toInt(), + dilationsInternal[1].toInt() + ) + val cols = convOutputLength( + colsCount, + kernelSizeInternal[1].toInt(), + paddingInternal, + stridesInternal[2].toInt(), + dilationsInternal[2].toInt() + ) - private fun defineVariableNames(): Pair { - return if (name.isNotEmpty()) { - Pair(conv2dKernelVarName(name), conv2dBiasVarName(name)) - } else { - Pair(kernelVariableName, biasVariableName) - } + return Shape.make(batchSize, rows, cols, filtersInternal) } - private fun createConv2DVariables( - tf: Ops, - kernelVariableName: String, - biasVariableName: String, - kGraph: KGraph - ) { - kernel = tf.withName(kernelVariableName).variable(kernelShape, getDType()) - if (useBiasInternal) bias = tf.withName(biasVariableName).variable(biasShape, getDType()) + override fun toString(): String = + "Conv2D(filters=$filters, kernelSize=${kernelSize.contentToString()}, strides=${strides.contentToString()}, " + + "dilations=${dilations.contentToString()}, activation=$activation, kernelInitializer=$kernelInitializer, " + + "biasInitializer=$biasInitializer, kernelShape=$kernelShape, biasShape=$biasShape, padding=$padding, " + + "biasRegularizer=$biasRegularizer, kernelRegularizer=$kernelRegularizer, activityRegularizer=$activityRegularizer)" - kernel = addWeight(tf, kGraph, kernelVariableName, kernel, kernelInitializerInternal, kernelRegularizerInternal) - if (useBiasInternal) bias = - addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializerInternal, biasRegularizerInternal) - } -} + override fun kernelVarName(name: String): String = convKernelVarName(name, dim = 2) -private fun assertArraySize(array: LongArray, size: Int, name: String) { - if (array.size != size) { - throw IllegalArgumentException("$name is expected to have size equal $size") - } + override fun biasVarName(name: String): String = convBiasVarName(name, dim = 2) } diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv3D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv3D.kt new file mode 100644 index 000000000..e9aa7e431 --- /dev/null +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/Conv3D.kt @@ -0,0 +1,147 @@ +/* + * Copyright 2021 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved. + * Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file. + */ + +package org.jetbrains.kotlinx.dl.api.core.layer.convolutional + +import org.jetbrains.kotlinx.dl.api.core.activation.Activations +import org.jetbrains.kotlinx.dl.api.core.initializer.HeNormal +import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform +import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer +import org.jetbrains.kotlinx.dl.api.core.layer.NoGradients +import org.jetbrains.kotlinx.dl.api.core.layer.requireArraySize +import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer +import org.jetbrains.kotlinx.dl.api.core.shape.* +import org.jetbrains.kotlinx.dl.api.core.util.convBiasVarName +import org.jetbrains.kotlinx.dl.api.core.util.convKernelVarName +import org.tensorflow.Operand +import org.tensorflow.Shape +import org.tensorflow.op.Ops +import org.tensorflow.op.nn.Conv3d.dilations + +private const val KERNEL_VARIABLE_NAME = "conv3d_kernel" + +private const val BIAS_VARIABLE_NAME = "conv3d_bias" + +/** + * 3D convolution layer (e.g. spatial convolution over video frames or 3D images). + * + * This layer creates a convolution kernel that is convolved (actually cross-correlated) + * with the layer input to produce a tensor of outputs. + * Finally, the `activation` is applied to the outputs as well. + * + * It expects input data of size `(N, D, H, W, C)` where + * ``` + * N - batch size + * D - depth + * H - height + * W - width + * C - number of channels + * ``` + * + * @property [filters] The dimensionality of the output space (i.e. the number of filters in the convolution). + * @property [kernelSize] Three long numbers, specifying the height and width of the 3D convolution cube. + * @property [strides] Five numbers, specifying the strides of the pooling operation for each dimension of input tensor. + * NOTE: Specifying any stride value != 1 is incompatible with specifying any `dilations` value != 1. + * @property [dilations] Five numbers, specifying the dilation rate to use for dilated convolution for each dimension of input tensor. + * @property [activation] Activation function. + * @property [kernelInitializer] An initializer for the convolution kernel + * @property [biasInitializer] An initializer for the bias vector. + * @property [kernelRegularizer] Regularizer function applied to the `kernel` weights matrix. + * @property [biasRegularizer] Regularizer function applied to the `bias` vector. + * @property [activityRegularizer] Regularizer function applied to the output of the layer (its "activation"). + * @property [padding] The padding method, either 'valid' or 'same' or 'full'. + * @property [name] Custom layer name. + * @property [useBias] If true the layer uses a bias vector. + * @constructor Creates [Conv3D] object. + * + * @since 0.3 + */ +public class Conv3D( + public val filters: Long = 32, + public val kernelSize: LongArray = longArrayOf(3, 3, 3), + public val strides: LongArray = longArrayOf(1, 1, 1, 1, 1), + public val dilations: LongArray = longArrayOf(1, 1, 1, 1, 1), + public val activation: Activations = Activations.Relu, + public val kernelInitializer: Initializer = HeNormal(), + public val biasInitializer: Initializer = HeUniform(), + public val kernelRegularizer: Regularizer? = null, + public val biasRegularizer: Regularizer? = null, + public val activityRegularizer: Regularizer? = null, + public val padding: ConvPadding = ConvPadding.SAME, + public val useBias: Boolean = true, + name: String = "" +) : AbstractConv( + filtersInternal = filters, + kernelSizeInternal = kernelSize, + stridesInternal = strides, + dilationsInternal = dilations, + activationInternal = activation, + kernelInitializerInternal = kernelInitializer, + biasInitializerInternal = biasInitializer, + kernelRegularizerInternal = kernelRegularizer, + biasRegularizerInternal = biasRegularizer, + activityRegularizerInternal = activityRegularizer, + paddingInternal = padding, + useBiasInternal = useBias, + kernelVariableName = KERNEL_VARIABLE_NAME, + biasVariableName = BIAS_VARIABLE_NAME, + name = name +) { + init { + requireArraySize(kernelSize, 3, "kernelSize") + requireArraySize(strides, 5, "strides") + requireArraySize(dilations, 5, "dilations") + isTrainable = false + } + + override fun kernelVarName(name: String): String = convKernelVarName(name, dim = 3) + + override fun biasVarName(name: String): String = convBiasVarName(name, dim = 3) + + override fun convImplementation( + tf: Ops, + input: Operand + ): Operand { + val options = dilations(dilationsInternal.toList()).dataFormat("NDHWC") + return tf.nn.conv3d(input, kernel, stridesInternal.toMutableList(), paddingInternal.paddingName, options) + } + + protected override fun defineOutputShape(inputShape: Shape): Shape { + val batchSize = inputShape.size(0) + val depthsCount = inputShape.size(1) + val rowsCount = inputShape.size(2) + val colsCount = inputShape.size(3) + + val depths = convOutputLength( + depthsCount, + kernelSizeInternal[0].toInt(), + paddingInternal, + stridesInternal[1].toInt(), + dilationsInternal[1].toInt() + ) + val rows = convOutputLength( + rowsCount, + kernelSizeInternal[1].toInt(), + paddingInternal, + stridesInternal[2].toInt(), + dilationsInternal[2].toInt() + ) + val cols = convOutputLength( + colsCount, + kernelSizeInternal[2].toInt(), + paddingInternal, + stridesInternal[3].toInt(), + dilationsInternal[3].toInt() + ) + + return Shape.make(batchSize, depths, rows, cols, filtersInternal) + } + + override fun toString(): String = + "Conv3D(filters=$filters, kernelSize=${kernelSize.contentToString()}, strides=${strides.contentToString()}, " + + "dilations=${dilations.contentToString()}, activation=$activation, kernelInitializer=$kernelInitializer, " + + "biasInitializer=$biasInitializer, kernelShape=$kernelShape, biasShape=$biasShape, padding=$padding, " + + "biasRegularizer=$biasRegularizer, kernelRegularizer=$kernelRegularizer, activityRegularizer=$activityRegularizer)" +} diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/DepthwiseConv2D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/DepthwiseConv2D.kt index df2120bb9..df9819425 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/DepthwiseConv2D.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/DepthwiseConv2D.kt @@ -5,28 +5,21 @@ package org.jetbrains.kotlinx.dl.api.core.layer.convolutional -import org.jetbrains.kotlinx.dl.api.core.KGraph import org.jetbrains.kotlinx.dl.api.core.activation.Activations import org.jetbrains.kotlinx.dl.api.core.initializer.HeNormal import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer -import org.jetbrains.kotlinx.dl.api.core.layer.Layer import org.jetbrains.kotlinx.dl.api.core.layer.NoGradients +import org.jetbrains.kotlinx.dl.api.core.layer.requireArraySize import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer -import org.jetbrains.kotlinx.dl.api.core.shape.TensorShape import org.jetbrains.kotlinx.dl.api.core.shape.convOutputLength -import org.jetbrains.kotlinx.dl.api.core.shape.numElements import org.jetbrains.kotlinx.dl.api.core.shape.shapeFromDims import org.jetbrains.kotlinx.dl.api.core.util.depthwiseConv2dBiasVarName import org.jetbrains.kotlinx.dl.api.core.util.depthwiseConv2dKernelVarName -import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand import org.tensorflow.Shape import org.tensorflow.op.Ops -import org.tensorflow.op.core.Variable import org.tensorflow.op.nn.DepthwiseConv2dNative -import org.tensorflow.op.nn.DepthwiseConv2dNative.dilations -import kotlin.math.roundToInt private const val KERNEL_VARIABLE_NAME = "depthwise_conv2d_kernel" private const val BIAS_VARIABLE_NAME = "depthwise_conv2d_bias" @@ -71,127 +64,82 @@ public class DepthwiseConv2D( public val padding: ConvPadding = ConvPadding.SAME, public val useBias: Boolean = true, name: String = "" -) : Layer(name), NoGradients { - // weight tensors - private lateinit var depthwiseKernel: Variable - private var bias: Variable? = null - - // weight tensor shapes - private lateinit var biasShape: Shape - private lateinit var depthwiseKernelShape: Shape +) : AbstractConv( + // filtersInternal is not used in any place of this implementation of AbstractConv because + // all its usages are overridden with custom functions that use the depthMultiplier and the + // shape of the input data representing number of channels in it + filtersInternal = -1, + kernelSizeInternal = kernelSize, + stridesInternal = strides, + dilationsInternal = dilations, + activationInternal = activation, + kernelInitializerInternal = depthwiseInitializer, + biasInitializerInternal = biasInitializer, + kernelRegularizerInternal = depthwiseRegularizer, + biasRegularizerInternal = biasRegularizer, + activityRegularizerInternal = activityRegularizer, + paddingInternal = padding, + useBiasInternal = useBias, + kernelVariableName = KERNEL_VARIABLE_NAME, + biasVariableName = BIAS_VARIABLE_NAME, + name = name +), NoGradients { init { + requireArraySize(kernelSize, 2, "kernelSize") + requireArraySize(strides, 4, "strides") + requireArraySize(dilations, 4, "dilations") isTrainable = false } - override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape) { - // Amount of channels should be the last value in the inputShape (make warning here) - val numberOfChannels = inputShape.size(inputShape.numDimensions() - 1) - - // Compute shapes of kernel and bias matrices - depthwiseKernelShape = shapeFromDims(*kernelSize, numberOfChannels, this.depthMultiplier.toLong()) - biasShape = Shape.make(numberOfChannels * this.depthMultiplier) - - // should be calculated before addWeight because it's used in calculation, need to rewrite addWEight to avoid strange behaviour - // calculate fanIn, fanOut - val inputDepth = numberOfChannels // amount of channels - val outputDepth = numberOfChannels * this.depthMultiplier // amount of channels for the next layer - - fanIn = (inputDepth * kernelSize[0] * kernelSize[1]).toInt() - fanOut = ((outputDepth * kernelSize[0] * kernelSize[1] / (strides[0].toDouble() * strides[1])).roundToInt()) - - val (kernelVariableName, biasVariableName) = defineVariableNames() - createDepthwiseConv2DVariables(tf, kernelVariableName, biasVariableName, kGraph) - } - - private fun defineVariableNames(): Pair { - return if (name.isNotEmpty()) { - Pair(depthwiseConv2dKernelVarName(name), depthwiseConv2dBiasVarName(name)) - } else { - Pair(KERNEL_VARIABLE_NAME, BIAS_VARIABLE_NAME) - } + override fun computeMatricesShapes(numberOfChannels: Long) { + kernelShape = shapeFromDims(*kernelSize, numberOfChannels, depthMultiplier.toLong()) + biasShape = Shape.make(numberOfChannels * depthMultiplier) } - private fun createDepthwiseConv2DVariables( - tf: Ops, - kernelVariableName: String, - biasVariableName: String, - kGraph: KGraph - ) { - depthwiseKernel = tf.withName(kernelVariableName).variable(depthwiseKernelShape, getDType()) - if (useBias) bias = tf.withName(biasVariableName).variable(biasShape, getDType()) + override fun getOutputDepth(numberOfChannels: Long): Long = numberOfChannels * depthMultiplier - depthwiseKernel = addWeight( - tf, - kGraph, - kernelVariableName, - depthwiseKernel, - depthwiseInitializer, - depthwiseRegularizer - ) - if (useBias) bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializer, biasRegularizer) - } + override fun kernelVarName(name: String): String = depthwiseConv2dKernelVarName(name) - override fun computeOutputShape(inputShape: Shape): Shape { - var rows = inputShape.size(1) - var cols = inputShape.size(2) - rows = convOutputLength( - rows, kernelSize[0].toInt(), padding, - strides[1].toInt(), dilations[1].toInt() - ) - cols = convOutputLength( - cols, kernelSize[1].toInt(), padding, - strides[2].toInt(), dilations[2].toInt() - ) + override fun biasVarName(name: String): String = depthwiseConv2dBiasVarName(name) - val outFilters = inputShape.size(3) * this.depthMultiplier - return Shape.make(inputShape.size(0), rows, cols, outFilters) - } - - override fun forward( + override fun convImplementation( tf: Ops, - input: Operand, - isTraining: Operand, - numberOfLosses: Operand? + input: Operand ): Operand { - val paddingName = padding.paddingName - val options: DepthwiseConv2dNative.Options = dilations(dilations.toList()).dataFormat("NHWC") - var output: Operand = - tf.nn.depthwiseConv2dNative( - input, - depthwiseKernel, - strides.toMutableList(), - paddingName, - options - ) - - if (useBias) { - output = tf.nn.biasAdd(output, bias) - } - - return Activations.convert(activation).apply(tf, output, name) + val options = DepthwiseConv2dNative.dilations(dilations.toList()).dataFormat("NHWC") + return tf.nn.depthwiseConv2dNative(input, kernel, strides.toMutableList(), padding.paddingName, options) } - override var weights: Map> - get() = extractDepthConv2DWeights() - set(value) = assignWeights(value) + override fun defineOutputShape(inputShape: Shape): Shape { + val batchSize = inputShape.size(0) + val rowsCount = inputShape.size(1) + val colsCount = inputShape.size(2) + val channelsCount = inputShape.size(3) + + val rows = convOutputLength( + rowsCount, + kernelSizeInternal[0].toInt(), + paddingInternal, + stridesInternal[1].toInt(), + dilationsInternal[1].toInt() + ) + val cols = convOutputLength( + colsCount, + kernelSizeInternal[1].toInt(), + paddingInternal, + stridesInternal[2].toInt(), + dilationsInternal[2].toInt() + ) + val filters = channelsCount * depthMultiplier - private fun extractDepthConv2DWeights(): Map> { - return extractWeights(defineVariableNames().toList()) + return Shape.make(batchSize, rows, cols, filters) } - /** Returns the shape of kernel weights. */ - public val kernelShapeArray: LongArray get() = TensorShape(depthwiseKernelShape).dims() - - /** Returns the shape of bias weights. */ - public val biasShapeArray: LongArray get() = TensorShape(biasShape).dims() - - override val hasActivation: Boolean get() = true - - override val paramCount: Int - get() = (depthwiseKernelShape.numElements() + biasShape.numElements()).toInt() - - override fun toString(): String { - return "DepthwiseConv2D(kernelSize=${kernelSize.contentToString()}, strides=${strides.contentToString()}, dilations=${dilations.contentToString()}, activation=$activation, depthMultiplier=$depthMultiplier, depthwiseInitializer=$depthwiseInitializer, biasInitializer=$biasInitializer, padding=$padding, useBias=$useBias, depthwiseKernel=$depthwiseKernel, bias=$bias, biasShape=$biasShape, depthwiseKernelShape=$depthwiseKernelShape)" - } + override fun toString(): String = + "DepthwiseConv2D(kernelSize=${kernelSize.contentToString()}, strides=${strides.contentToString()}, " + + "dilations=${dilations.contentToString()}, activation=$activation, depthMultiplier=$depthMultiplier, " + + "depthwiseInitializer=$depthwiseInitializer, biasInitializer=$biasInitializer, padding=$padding, " + + "useBias=$useBias, depthwiseKernel=$kernel, bias=$bias, biasShape=$biasShape, " + + "depthwiseKernelShape=$kernelShape)" } diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/SeparableConv2D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/SeparableConv2D.kt index 1a2e4f9e7..c0872e09d 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/SeparableConv2D.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/SeparableConv2D.kt @@ -12,6 +12,7 @@ import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer import org.jetbrains.kotlinx.dl.api.core.layer.Layer import org.jetbrains.kotlinx.dl.api.core.layer.NoGradients +import org.jetbrains.kotlinx.dl.api.core.layer.requireArraySize import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer import org.jetbrains.kotlinx.dl.api.core.shape.TensorShape import org.jetbrains.kotlinx.dl.api.core.shape.convOutputLength @@ -95,6 +96,9 @@ public class SeparableConv2D( private lateinit var biasShape: Shape init { + requireArraySize(kernelSize, 2, "kernelSize") + requireArraySize(strides, 4, "strides") + requireArraySize(dilations, 4, "dilations") isTrainable = false } @@ -238,7 +242,8 @@ public class SeparableConv2D( override val paramCount: Int get() = (depthwiseKernelShape.numElements() + pointwiseKernelShape.numElements() + biasShape.numElements()).toInt() - override fun toString(): String { - return "SeparableConv2D(kernelSize=${kernelSize.contentToString()}, strides=${strides.contentToString()}, dilations=${dilations.contentToString()}, activation=$activation, depthwiseInitializer=$depthwiseInitializer, biasInitializer=$biasInitializer, kernelShape=$depthwiseKernelShape, padding=$padding)" - } + override fun toString(): String = + "SeparableConv2D(kernelSize=${kernelSize.contentToString()}, strides=${strides.contentToString()}, " + + "dilations=${dilations.contentToString()}, activation=$activation, depthwiseInitializer=$depthwiseInitializer, " + + "biasInitializer=$biasInitializer, kernelShape=$depthwiseKernelShape, padding=$padding)" } diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool1D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool1D.kt index 1d921be9b..58fd16faa 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool1D.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool1D.kt @@ -8,6 +8,7 @@ package org.jetbrains.kotlinx.dl.api.core.layer.pooling import org.jetbrains.kotlinx.dl.api.core.KGraph import org.jetbrains.kotlinx.dl.api.core.layer.Layer import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.ConvPadding +import org.jetbrains.kotlinx.dl.api.core.layer.requireArraySize import org.jetbrains.kotlinx.dl.api.core.shape.convOutputLength import org.tensorflow.Operand import org.tensorflow.Shape @@ -41,20 +42,14 @@ public class AvgPool1D( set(value) = assignWeights(value) init { - require(poolSize.size == 3) { - "The poolSize should be an array of size 3." - } - - require(strides.size == 3) { - "The strides should be an array of size 3." - } - + requireArraySize(poolSize, 3, "poolSize") + requireArraySize(strides, 3, "strides") require(padding == ConvPadding.VALID || padding == ConvPadding.SAME) { "The padding should be either of ${ConvPadding.VALID} or ${ConvPadding.SAME}." } } - override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape) {} + override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape): Unit = Unit override fun computeOutputShape(inputShape: Shape): Shape { var steps = inputShape.size(1) @@ -86,5 +81,5 @@ public class AvgPool1D( } override fun toString(): String = - "AvgPool1D(poolSize=$poolSize, strides=$strides, padding=$padding)" + "AvgPool1D(poolSize=${poolSize.contentToString()}, strides=${strides.contentToString()}, padding=$padding)" } diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool2D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool2D.kt index 946dffc94..8b5135f8d 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool2D.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool2D.kt @@ -31,7 +31,7 @@ public class AvgPool2D( public val padding: ConvPadding = ConvPadding.VALID, name: String = "" ) : Layer(name) { - override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape) {} + override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape): Unit = Unit override fun computeOutputShape(inputShape: Shape): Shape { var rows = inputShape.size(1) @@ -82,6 +82,4 @@ public class AvgPool2D( override val hasActivation: Boolean get() = false override val paramCount: Int get() = 0 - - } diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool3D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool3D.kt index 5107db18f..1482bde8b 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool3D.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool3D.kt @@ -8,6 +8,7 @@ package org.jetbrains.kotlinx.dl.api.core.layer.pooling import org.jetbrains.kotlinx.dl.api.core.KGraph import org.jetbrains.kotlinx.dl.api.core.layer.Layer import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.ConvPadding +import org.jetbrains.kotlinx.dl.api.core.layer.requireArraySize import org.jetbrains.kotlinx.dl.api.core.shape.convOutputLength import org.tensorflow.Operand import org.tensorflow.Shape @@ -40,20 +41,14 @@ public class AvgPool3D( set(value) = assignWeights(value) init { - require(poolSize.size == 5) { - "The poolSize should be an array of size 5." - } - - require(strides.size == 5) { - "The strides should be an array of size 5." - } - + requireArraySize(poolSize, 5, "poolSize") + requireArraySize(strides, 5, "strides") require(padding == ConvPadding.VALID || padding == ConvPadding.SAME) { "The padding should be either ${ConvPadding.VALID} or ${ConvPadding.SAME}." } } - override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape) {} + override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape): Unit = Unit override fun computeOutputShape(inputShape: Shape): Shape { var dim1 = inputShape.size(1) @@ -82,5 +77,5 @@ public class AvgPool3D( } override fun toString(): String = - "AvgPool3D(poolSize=$poolSize, strides=$strides, padding=$padding)" + "AvgPool3D(poolSize=${poolSize.contentToString()}, strides=${strides.contentToString()}, padding=$padding)" } diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/MaxPool1D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/MaxPool1D.kt index befeb0343..99f78be7c 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/MaxPool1D.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/MaxPool1D.kt @@ -8,6 +8,7 @@ package org.jetbrains.kotlinx.dl.api.core.layer.pooling import org.jetbrains.kotlinx.dl.api.core.KGraph import org.jetbrains.kotlinx.dl.api.core.layer.Layer import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.ConvPadding +import org.jetbrains.kotlinx.dl.api.core.layer.requireArraySize import org.jetbrains.kotlinx.dl.api.core.shape.convOutputLength import org.tensorflow.Operand import org.tensorflow.Shape @@ -41,14 +42,8 @@ public class MaxPool1D( set(value) = assignWeights(value) init { - require(poolSize.size == 3) { - "The poolSize should be an array of size 3." - } - - require(strides.size == 3) { - "The strides should be an array of size 3." - } - + requireArraySize(poolSize, 3, "poolSize") + requireArraySize(strides, 3, "strides") require(padding == ConvPadding.VALID || padding == ConvPadding.SAME) { "The padding should be either ${ConvPadding.VALID} or ${ConvPadding.SAME}." } @@ -88,19 +83,18 @@ public class MaxPool1D( * However, it seems it does not work for the case of "channels_first". So, instead * we are choosing to set the value of pool size and strides based on the data format. */ - tfPoolSize[expandAxis-1] = poolSize[1].toInt() - tfStrides[expandAxis-1] = strides[1].toInt() - val tfPadding = padding.paddingName + tfPoolSize[expandAxis - 1] = poolSize[1].toInt() + tfStrides[expandAxis - 1] = strides[1].toInt() val maxPool = tf.nn.maxPool( tfInput, tf.constant(tfPoolSize), tf.constant(tfStrides), - tfPadding + padding.paddingName ) return tf.squeeze(maxPool, Squeeze.axis(listOf(expandAxis.toLong()))) } override fun toString(): String = - "MaxPool1D(poolSize=$poolSize, strides=$strides, padding=$padding)" + "MaxPool1D(poolSize=${poolSize.contentToString()}, strides=${strides.contentToString()}, padding=$padding)" } diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/MaxPool2D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/MaxPool2D.kt index 0e1561b94..b7c6ce110 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/MaxPool2D.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/MaxPool2D.kt @@ -8,6 +8,7 @@ package org.jetbrains.kotlinx.dl.api.core.layer.pooling import org.jetbrains.kotlinx.dl.api.core.KGraph import org.jetbrains.kotlinx.dl.api.core.layer.Layer import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.ConvPadding +import org.jetbrains.kotlinx.dl.api.core.layer.requireArraySize import org.jetbrains.kotlinx.dl.api.core.shape.convOutputLength import org.tensorflow.Operand import org.tensorflow.Shape diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/shape/ShapeFunctions.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/shape/ShapeFunctions.kt index 7455812ac..11a48f783 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/shape/ShapeFunctions.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/shape/ShapeFunctions.kt @@ -167,6 +167,9 @@ internal fun getShapeOfArray(data: Array<*>): Shape { return shapeFromDims(*collectDims(data, mutableListOf())) } +/** Shape property of standard JVM array for better readability of code */ +internal val Array<*>.shape: Shape get() = getShapeOfArray(this) + /** * Create an array of arrays (of arrays...) of Floats with specified [shape] and * initialized with given [initValue]. When the number of dimensions in result tensor @@ -186,15 +189,20 @@ internal fun getFloatArrayOfShape(shape: Shape, initValue: Float = 0.0f): Array< } } -internal fun Any?.castArrayDim(): Array<*> = this as Array<*> +internal fun Any?.castArray(): Array<*> = this as Array<*> -/** Cast Array<*> to Array when sure about its dimensions */ -internal fun Array<*>.cast2DArray(): Array = this.map { it as FloatArray }.toTypedArray() +/** Cast Array<*> to Array when sure about its dimensions where usually T is [FloatArray] */ +internal inline fun Array<*>.cast2D(): Array = + this.map { it as T }.toTypedArray() -/** Cast Array<*> to Array> when sure about its dimensions */ -internal fun Array<*>.cast3DArray(): Array> = this.map { it.castArrayDim().cast2DArray() }.toTypedArray() +/** Cast Array<*> to Array> when sure about its dimensions where usually T is [FloatArray] */ +internal inline fun Array<*>.cast3D(): Array> = + this.map { it.castArray().cast2D() }.toTypedArray() -/** Cast Array<*> to Array>> when sure about its dimensions */ -internal fun Array<*>.cast4DArray(): Array>> = this.map { it.castArrayDim().cast3DArray() }.toTypedArray() +/** Cast Array<*> to Array>> when sure about its dimensions where usually T is [FloatArray] */ +internal inline fun Array<*>.cast4D(): Array>> = + this.map { it.castArray().cast3D() }.toTypedArray() -internal fun Array<*>.cast5DArray(): Array>>> = this.map { it.castArrayDim().cast4DArray() }.toTypedArray() +/** Cast Array<*> to Array>>> when sure about its dimensions where usually T is [FloatArray] */ +internal inline fun Array<*>.cast5D(): Array>>> = + this.map { it.castArray().cast4D() }.toTypedArray() diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/util/nameConventions.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/util/nameConventions.kt index e50d168bf..1366f8dae 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/util/nameConventions.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/util/nameConventions.kt @@ -19,11 +19,11 @@ internal fun defaultInitializerOpName(name: String) = "Init_$name" /** Default optimizer variable name in TensorFlow graph, based on variable's name. */ internal fun defaultOptimizerVariableName(name: String) = "optimizer_$name" -/** Default Conv2d bias variable name in TensorFlow graph, based on variable's name. */ -internal fun conv2dBiasVarName(name: String) = name + "_" + "conv2d_bias" +/** Default Conv bias variable name in TensorFlow graph, based on variable's name. */ +internal fun convBiasVarName(name: String, dim: Int) = name + "_" + "conv${dim}d_bias" -/** Default Conv2d kernel variable name in TensorFlow graph, based on variable's name. */ -internal fun conv2dKernelVarName(name: String) = name + "_" + "conv2d_kernel" +/** Default Conv kernel variable name in TensorFlow graph, based on variable's name. */ +internal fun convKernelVarName(name: String, dim: Int) = name + "_" + "conv${dim}d_kernel" /** Default DepthwiseConv2d bias variable name in TensorFlow graph, based on variable's name. */ internal fun depthwiseConv2dBiasVarName(name: String) = name + "_" + "depthwise_conv2d_bias" diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/KerasConstants.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/KerasConstants.kt index 5947fea5d..87999086f 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/KerasConstants.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/KerasConstants.kt @@ -6,34 +6,41 @@ package org.jetbrains.kotlinx.dl.api.inference.keras // Keras layers +// Core layers +internal const val LAYER_DENSE: String = "Dense" +internal const val LAYER_INPUT: String = "InputLayer" +internal const val LAYER_ACTIVATION: String = "Activation" +// Convolution layers internal const val LAYER_CONV1D: String = "Conv1D" internal const val LAYER_CONV2D: String = "Conv2D" +internal const val LAYER_CONV3D: String = "Conv3D" internal const val LAYER_DEPTHWISE_CONV2D: String = "DepthwiseConv2D" internal const val LAYER_SEPARABLE_CONV2D: String = "SeparableConv2D" -internal const val LAYER_DENSE: String = "Dense" -internal const val LAYER_INPUT: String = "InputLayer" +// Pooling layers internal const val LAYER_MAX_POOL_1D: String = "MaxPooling1D" -internal const val LAYER_MAX_POOLING_2D: String = "MaxPooling2D" -internal const val LAYER_MAX_POOLING_3D: String = "MaxPooling3D" +internal const val LAYER_MAX_POOL_2D: String = "MaxPooling2D" +internal const val LAYER_MAX_POOL_3D: String = "MaxPooling3D" internal const val LAYER_AVG_POOL_1D: String = "AveragePooling1D" -internal const val LAYER_AVG_POOLING_2D: String = "AvgPooling2D" -internal const val LAYER_AVERAGE_POOLING_2D: String = "AveragePooling2D" +internal const val LAYER_AVG_POOL_2D: String = "AveragePooling2D" internal const val LAYER_AVG_POOL_3D: String = "AveragePooling3D" -internal const val LAYER_RESCALING: String = "Rescaling" +internal const val LAYER_GLOBAL_MAX_POOL_1D: String = "GlobalMaxPooling1D" +internal const val LAYER_GLOBAL_AVG_POOL_1D: String = "GlobalAveragePooling1D" +internal const val LAYER_GLOBAL_AVG_POOL_2D: String = "GlobalAveragePooling2D" +internal const val LAYER_GLOBAL_AVG_POOL_3D: String = "GlobalAveragePooling3D" +// Recurrent layers +internal const val LAYER_LSTM: String = "LSTM" +// Normalization layers internal const val LAYER_NORMALIZATION: String = "Normalization" +internal const val LAYER_BATCH_NORM: String = "BatchNormalization" +// Regularization layers +internal const val LAYER_DROPOUT: String = "Dropout" +// Attention layers +// Reshaping layers internal const val LAYER_FLATTEN: String = "Flatten" internal const val LAYER_RESHAPE: String = "Reshape" internal const val LAYER_ZERO_PADDING_2D = "ZeroPadding2D" internal const val LAYER_CROPPING_2D = "Cropping2D" -internal const val LAYER_BATCH_NORM: String = "BatchNormalization" -internal const val LAYER_ACTIVATION: String = "Activation" -internal const val LAYER_RELU: String = "ReLU" -internal const val LAYER_ELU: String = "ELU" -internal const val LAYER_PRELU: String = "PReLU" -internal const val LAYER_LEAKY_RELU: String = "LeakyReLU" -internal const val LAYER_THRESHOLDED_RELU = "ThresholdedReLU" -internal const val LAYER_LSTM: String = "LSTM" -internal const val LAYER_DROPOUT: String = "Dropout" +// Merging layers internal const val LAYER_ADD: String = "Add" internal const val LAYER_MULTIPLY: String = "Multiply" internal const val LAYER_SUBTRACT: String = "Subtract" @@ -41,10 +48,13 @@ internal const val LAYER_AVERAGE: String = "Average" internal const val LAYER_MAXIMUM: String = "Maximum" internal const val LAYER_MINIMUM: String = "Minimum" internal const val LAYER_CONCATENATE: String = "Concatenate" -internal const val LAYER_GLOBAL_AVG_POOLING_2D: String = "GlobalAveragePooling2D" -internal const val LAYER_GLOBAL_AVG_POOLING_1D: String = "GlobalAveragePooling1D" -internal const val LAYER_GLOBAL_MAX_POOL_1D: String = "GlobalMaxPooling1D" -internal const val LAYER_GLOBAL_AVG_POOLING_3D: String = "GlobalAveragePooling3D" +// Locally-connected layers +// Activation layers +internal const val LAYER_RELU: String = "ReLU" +internal const val LAYER_ELU: String = "ELU" +internal const val LAYER_PRELU: String = "PReLU" +internal const val LAYER_LEAKY_RELU: String = "LeakyReLU" +internal const val LAYER_THRESHOLDED_RELU = "ThresholdedReLU" internal const val LAYER_SOFTMAX: String = "Softmax" // Keras data types diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelLoader.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelLoader.kt index 435c3ad05..7eb509c16 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelLoader.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelLoader.kt @@ -113,65 +113,53 @@ private fun convertToLayer( kerasLayer: KerasLayer ): Layer { return when (kerasLayer.class_name) { - LAYER_CONV1D -> createConv1D(kerasLayer.config!!, kerasLayer.config.name!!) - LAYER_CONV2D -> createConv2D(kerasLayer.config!!, kerasLayer.config.name!!) - LAYER_DEPTHWISE_CONV2D -> createDepthwiseConv2D(kerasLayer.config!!, kerasLayer.config.name!!) - LAYER_SEPARABLE_CONV2D -> createSeparableConv2D(kerasLayer.config!!, kerasLayer.config.name!!) - LAYER_FLATTEN -> createFlatten(kerasLayer.config!!.name!!) - LAYER_RESHAPE -> createReshape(kerasLayer.config!!, kerasLayer.config.name!!) - LAYER_MAX_POOL_1D -> createMaxPool1D(kerasLayer.config!!, kerasLayer.config.name!!) - LAYER_MAX_POOLING_2D -> createMaxPooling2D( - kerasLayer.config!!, - kerasLayer.config.name!! - ) - LAYER_AVG_POOL_1D -> createAvgPool1D(kerasLayer.config!!, kerasLayer.config.name!!) - LAYER_MAX_POOLING_3D -> createMaxPooling3D( - kerasLayer.config!!, - kerasLayer.config.name!! - ) - LAYER_AVG_POOLING_2D -> createAvgPooling2D( - kerasLayer.config!!, - kerasLayer.config.name!! - ) - LAYER_AVERAGE_POOLING_2D -> createAvgPooling2D( - kerasLayer.config!!, - kerasLayer.config.name!! - ) - LAYER_AVG_POOL_3D -> createAvgPool3DLayer(kerasLayer.config!!, kerasLayer.config.name!!) - LAYER_DENSE -> createDense(kerasLayer.config!!, kerasLayer.config.name!!) - LAYER_ZERO_PADDING_2D -> createZeroPadding2D(kerasLayer.config!!, kerasLayer.config.name!!) - LAYER_CROPPING_2D -> createCropping2D(kerasLayer.config!!, kerasLayer.config.name!!) - LAYER_BATCH_NORM -> createBatchNorm(kerasLayer.config!!, kerasLayer.config.name!!) + // Core layers LAYER_ACTIVATION -> createActivationLayer(kerasLayer.config!!, kerasLayer.config.name!!) + LAYER_DENSE -> createDenseLayer(kerasLayer.config!!, kerasLayer.config.name!!) + // Convolution layers + LAYER_CONV1D -> createConv1DLayer(kerasLayer.config!!, kerasLayer.config.name!!) + LAYER_CONV2D -> createConv2DLayer(kerasLayer.config!!, kerasLayer.config.name!!) + LAYER_CONV3D -> createConv3DLayer(kerasLayer.config!!, kerasLayer.config.name!!) + LAYER_DEPTHWISE_CONV2D -> createDepthwiseConv2DLayer(kerasLayer.config!!, kerasLayer.config.name!!) + LAYER_SEPARABLE_CONV2D -> createSeparableConv2DLayer(kerasLayer.config!!, kerasLayer.config.name!!) + // Pooling layers + LAYER_MAX_POOL_1D -> createMaxPool1DLayer(kerasLayer.config!!, kerasLayer.config.name!!) + LAYER_MAX_POOL_2D -> createMaxPool2DLayer(kerasLayer.config!!, kerasLayer.config.name!!) + LAYER_MAX_POOL_3D -> createMaxPool3DLayer(kerasLayer.config!!, kerasLayer.config.name!!) + LAYER_AVG_POOL_1D -> createAvgPool1DLayer(kerasLayer.config!!, kerasLayer.config.name!!) + LAYER_AVG_POOL_2D -> createAvgPool2DLayer(kerasLayer.config!!, kerasLayer.config.name!!) + LAYER_AVG_POOL_3D -> createAvgPool3DLayer(kerasLayer.config!!, kerasLayer.config.name!!) + LAYER_GLOBAL_MAX_POOL_1D -> createGlobalMaxPool1DLayer(kerasLayer.config!!, kerasLayer.config.name!!) + LAYER_GLOBAL_AVG_POOL_1D -> createGlobalAvgPool1DLayer(kerasLayer.config!!.name!!) + LAYER_GLOBAL_AVG_POOL_2D -> createGlobalAvgPool2DLayer(kerasLayer.config!!.name!!) + LAYER_GLOBAL_AVG_POOL_3D -> createGlobalAvgPool3DLayer(kerasLayer.config!!.name!!) + // Recurrent layers + // Normalization layers + LAYER_BATCH_NORM -> createBatchNormLayer(kerasLayer.config!!, kerasLayer.config.name!!) + // Regularization layers + LAYER_DROPOUT -> createDropoutLayer(kerasLayer.config!!, kerasLayer.config.name!!) + // Attention layers + // Reshaping layers + LAYER_FLATTEN -> createFlattenLayer(kerasLayer.config!!.name!!) + LAYER_RESHAPE -> createReshapeLayer(kerasLayer.config!!, kerasLayer.config.name!!) + LAYER_CROPPING_2D -> createCropping2DLayer(kerasLayer.config!!, kerasLayer.config.name!!) + LAYER_ZERO_PADDING_2D -> createZeroPadding2DLayer(kerasLayer.config!!, kerasLayer.config.name!!) + // Merging layers + LAYER_ADD -> createAddLayer(kerasLayer.config!!.name!!) + LAYER_AVERAGE -> createAverageLayer(kerasLayer.config!!.name!!) + LAYER_SUBTRACT -> createSubtractLayer(kerasLayer.config!!.name!!) + LAYER_MAXIMUM -> createMaximumLayer(kerasLayer.config!!.name!!) + LAYER_MINIMUM -> createMinimumLayer(kerasLayer.config!!.name!!) + LAYER_MULTIPLY -> createMultiplyLayer(kerasLayer.config!!.name!!) + LAYER_CONCATENATE -> createConcatenateLayer(kerasLayer.config!!, kerasLayer.config.name!!) + // Locally-connected layers + // Activation layers LAYER_RELU -> createReLULayer(kerasLayer.config!!, kerasLayer.config.name!!) LAYER_ELU -> createELULayer(kerasLayer.config!!, kerasLayer.config.name!!) LAYER_PRELU -> createPReLULayer(kerasLayer.config!!, kerasLayer.config.name!!) LAYER_LEAKY_RELU -> createLeakyReLULayer(kerasLayer.config!!, kerasLayer.config.name!!) LAYER_THRESHOLDED_RELU -> createThresholdedReLULayer(kerasLayer.config!!, kerasLayer.config.name!!) LAYER_SOFTMAX -> createSoftmaxLayer(kerasLayer.config!!, kerasLayer.config.name!!) - LAYER_DROPOUT -> createDropoutLayer(kerasLayer.config!!, kerasLayer.config.name!!) - LAYER_ADD -> createAddLayer(kerasLayer.config!!.name!!) - LAYER_AVERAGE -> createAverageLayer(kerasLayer.config!!.name!!) - LAYER_SUBTRACT -> createSubtractLayer( - kerasLayer.config!!.name!! - ) - LAYER_MAXIMUM -> createMaximumLayer(kerasLayer.config!!.name!!) - LAYER_MINIMUM -> createMinimumLayer(kerasLayer.config!!.name!!) - LAYER_MULTIPLY -> createMultiplyLayer( - kerasLayer.config!!.name!! - ) - LAYER_CONCATENATE -> createConcatenateLayer( - kerasLayer.config!!, - kerasLayer.config.name!! - ) - LAYER_GLOBAL_AVG_POOLING_2D -> createGlobalAvgPooling2D( - kerasLayer.config!!.name!! - ) - LAYER_GLOBAL_MAX_POOL_1D -> createGlobalMaxPool1D(kerasLayer.config!!, kerasLayer.config.name!!) - LAYER_GLOBAL_AVG_POOLING_1D -> createGlobalAvgPooling1D(kerasLayer.config!!.name!!) - LAYER_GLOBAL_AVG_POOLING_3D -> createGlobalAvgPooling3D( - kerasLayer.config!!.name!! - ) else -> throw IllegalStateException("${kerasLayer.class_name} is not supported yet!") } } @@ -294,88 +282,215 @@ private fun convertToLayer( return layer } -private fun createGlobalAvgPooling2D( - name: String -): Layer { +private fun convertToRegularizer(regularizer: KerasRegularizer?): Regularizer? { + return if (regularizer != null) { + val l1 = regularizer.config!!.l1 + val l2 = regularizer.config!!.l2 + if (l1 != 0.0 && l2 != 0.0) { + L2L1(l1!!.toFloat(), l2!!.toFloat()) + } else if (l1 == 0.0 && l2 != 0.0) { + L2(l2!!.toFloat()) + } else if (l1 != 0.0 && l2 == 0.0) { + L1(l1!!.toFloat()) + } else { + null + } + } else { + null + } +} + +private fun convertToInitializer(initializer: KerasInitializer): Initializer { + val seed = if (initializer.config!!.seed != null) { + initializer.config.seed!!.toLong() + } else 12L + + return when (initializer.class_name!!) { + INITIALIZER_GLOROT_UNIFORM -> GlorotUniform(seed = seed) + INITIALIZER_GLOROT_NORMAL -> GlorotNormal(seed = seed) + INITIALIZER_HE_NORMAL -> HeNormal(seed = seed) + INITIALIZER_HE_UNIFORM -> HeUniform(seed = seed) + INITIALIZER_LECUN_NORMAL -> LeCunNormal(seed = seed) + INITIALIZER_LECUN_UNIFORM -> LeCunUniform(seed = seed) + INITIALIZER_ZEROS -> RandomUniform( + seed = seed, + minVal = 0.0f, + maxVal = 0.0f + ) // instead of real initializers, because it doesn't influence on nothing + INITIALIZER_CONSTANT -> RandomUniform( + seed = seed, + minVal = 0.0f, + maxVal = 0.0f + ) // instead of real initializers, because it doesn't influence on nothing + INITIALIZER_ONES -> RandomUniform( + seed = seed, + minVal = 1.0f, + maxVal = 1.0f + ) // instead of real initializers, because it doesn't influence on nothing*/ + INITIALIZER_RANDOM_NORMAL -> RandomNormal( + seed = seed, + mean = initializer.config.mean!!.toFloat(), + stdev = initializer.config.stddev!!.toFloat() + ) + INITIALIZER_RANDOM_UNIFORM -> RandomUniform( + seed = seed, + minVal = initializer.config.minval!!.toFloat(), + maxVal = initializer.config.maxval!!.toFloat() + ) + INITIALIZER_TRUNCATED_NORMAL -> TruncatedNormal(seed = seed) + INITIALIZER_VARIANCE_SCALING -> convertVarianceScalingInitializer(initializer) + INITIALIZER_ORTHOGONAL -> Orthogonal( seed = seed, gain = initializer.config.gain!!.toFloat() ) + /*INITIALIZER_CONSTANT -> Constant(initializer.config.value!!.toFloat())*/ + INITIALIZER_IDENTITY -> Identity(initializer.config.gain?.toFloat() ?: 1f) + else -> throw IllegalStateException("${initializer.class_name} is not supported yet!") + } +} + +private fun convertVarianceScalingInitializer(initializer: KerasInitializer): Initializer { + val seed = if (initializer.config!!.seed != null) { + initializer.config.seed!!.toLong() + } else 12L + + val config = initializer.config + val scale = config.scale!! + val mode: Mode = convertMode(config.mode!!) + val distribution: Distribution = convertDistribution(config.distribution!!) + return if (scale == 2.0 && mode == Mode.FAN_IN) { + when (distribution) { + Distribution.UNIFORM -> HeUniform(seed) + Distribution.TRUNCATED_NORMAL -> { + HeNormal(seed) + } + else -> VarianceScaling(scale, mode, distribution, seed) + } + } else { + when (mode) { + Mode.FAN_IN -> { + when (distribution) { + Distribution.UNIFORM -> LeCunUniform(seed) + Distribution.TRUNCATED_NORMAL -> { + LeCunNormal(seed) + } + else -> VarianceScaling(scale, mode, distribution, seed) + } + } + Mode.FAN_AVG -> { + when (distribution) { + Distribution.UNIFORM -> GlorotUniform(seed) + Distribution.TRUNCATED_NORMAL -> { + GlorotNormal(seed) + } + else -> VarianceScaling(scale, mode, distribution, seed) + } + } + else -> VarianceScaling(scale, mode, distribution, seed) + } + } +} + +private fun convertDistribution(distribution: String): Distribution { + return when (distribution) { + "truncated_normal" -> Distribution.TRUNCATED_NORMAL + "uniform" -> Distribution.UNIFORM + "untruncated_normal" -> Distribution.UNTRUNCATED_NORMAL + else -> Distribution.TRUNCATED_NORMAL + } +} + +private fun convertMode(mode: String): Mode { + return when (mode) { + "fan_in" -> Mode.FAN_IN + "fan_out" -> Mode.FAN_OUT + "fan_avg" -> Mode.FAN_AVG + else -> Mode.FAN_AVG + } +} + +private fun convertToActivation(activation: String): Activations { + return when (activation) { + ACTIVATION_RELU -> Activations.Relu + ACTIVATION_SIGMOID -> Activations.Sigmoid + ACTIVATION_SOFTMAX -> Activations.Softmax + ACTIVATION_LINEAR -> Activations.Linear + ACTIVATION_TANH -> Activations.Tanh + ACTIVATION_RELU6 -> Activations.Relu6 + ACTIVATION_ELU -> Activations.Elu + ACTIVATION_SELU -> Activations.Selu + ACTIVATION_LOG_SOFTMAX -> Activations.LogSoftmax + ACTIVATION_EXP -> Activations.Exponential + ACTIVATION_SOFTPLUS -> Activations.SoftPlus + ACTIVATION_SOFTSIGN -> Activations.SoftSign + ACTIVATION_HARD_SIGMOID -> Activations.HardSigmoid + ACTIVATION_SWISH -> Activations.Swish + else -> throw IllegalStateException("$activation is not supported yet!") + } +} + +/** + * The layer creator functions should be put below. + */ + +private fun createGlobalAvgPool2DLayer(name: String): Layer { return GlobalAvgPool2D( name = name ) } -private fun createGlobalAvgPooling1D( - name: String -): Layer { +private fun createGlobalAvgPool1DLayer(name: String): Layer { return GlobalAvgPool1D( name = name ) } -private fun createGlobalAvgPooling3D( - name: String -): Layer { +private fun createGlobalAvgPool3DLayer(name: String): Layer { return GlobalAvgPool3D( name = name ) } -private fun createGlobalMaxPool1D(config: LayerConfig, name: String): Layer { +private fun createGlobalMaxPool1DLayer(config: LayerConfig, name: String): Layer { return GlobalMaxPool1D( name = name ) } -private fun createAddLayer( - name: String -): Layer { +private fun createAddLayer(name: String): Layer { return Add( name = name ) } -private fun createSubtractLayer( - name: String -): Layer { +private fun createSubtractLayer(name: String): Layer { return Subtract( name = name ) } -private fun createAverageLayer( - name: String -): Layer { +private fun createAverageLayer(name: String): Layer { return Average( name = name ) } -private fun createMaximumLayer( - name: String -): Layer { +private fun createMaximumLayer(name: String): Layer { return Maximum( name = name ) } -private fun createMinimumLayer( - name: String -): Layer { +private fun createMinimumLayer(name: String): Layer { return Minimum( name = name ) } -private fun createMultiplyLayer( - name: String -): Layer { +private fun createMultiplyLayer(name: String): Layer { return Multiply( name = name ) } -private fun createConcatenateLayer( - config: LayerConfig, - name: String -): Layer { +private fun createConcatenateLayer(config: LayerConfig, name: String): Layer { return Concatenate( axis = config.axis!! as Int, name = name @@ -447,7 +562,7 @@ private fun createSoftmaxLayer(config: LayerConfig, name: String): Layer { ) } -private fun createBatchNorm(config: LayerConfig, name: String): Layer { +private fun createBatchNormLayer(config: LayerConfig, name: String): Layer { return BatchNorm( axis = config.axis!! as List, momentum = config.momentum!!, @@ -464,7 +579,7 @@ private fun createBatchNorm(config: LayerConfig, name: String): Layer { ) } -private fun createDense(config: LayerConfig, name: String): Dense { +private fun createDenseLayer(config: LayerConfig, name: String): Layer { return Dense( outputSize = config.units!!, activation = convertToActivation(config.activation!!), @@ -477,151 +592,7 @@ private fun createDense(config: LayerConfig, name: String): Dense { ) } -private fun convertToRegularizer(regularizer: KerasRegularizer?): Regularizer? { - return if (regularizer != null) { - val l1 = regularizer.config!!.l1 - val l2 = regularizer.config!!.l2 - if (l1 != 0.0 && l2 != 0.0) { - L2L1(l1!!.toFloat(), l2!!.toFloat()) - } else if (l1 == 0.0 && l2 != 0.0) { - L2(l2!!.toFloat()) - } else if (l1 != 0.0 && l2 == 0.0) { - L1(l1!!.toFloat()) - } else { - null - } - } else { - null - } -} - -private fun convertToInitializer(initializer: KerasInitializer): Initializer { - val seed = if (initializer.config!!.seed != null) { - initializer.config.seed!!.toLong() - } else 12L - - return when (initializer.class_name!!) { - INITIALIZER_GLOROT_UNIFORM -> GlorotUniform(seed = seed) - INITIALIZER_GLOROT_NORMAL -> GlorotNormal(seed = seed) - INITIALIZER_HE_NORMAL -> HeNormal(seed = seed) - INITIALIZER_HE_UNIFORM -> HeUniform(seed = seed) - INITIALIZER_LECUN_NORMAL -> LeCunNormal(seed = seed) - INITIALIZER_LECUN_UNIFORM -> LeCunUniform(seed = seed) - INITIALIZER_ZEROS -> RandomUniform( - seed = seed, - minVal = 0.0f, - maxVal = 0.0f - ) // instead of real initializers, because it doesn't influence on nothing - INITIALIZER_CONSTANT -> RandomUniform( - seed = seed, - minVal = 0.0f, - maxVal = 0.0f - ) // instead of real initializers, because it doesn't influence on nothing - INITIALIZER_ONES -> RandomUniform( - seed = seed, - minVal = 1.0f, - maxVal = 1.0f - ) // instead of real initializers, because it doesn't influence on nothing*/ - INITIALIZER_RANDOM_NORMAL -> RandomNormal( - seed = seed, - mean = initializer.config.mean!!.toFloat(), - stdev = initializer.config.stddev!!.toFloat() - ) - INITIALIZER_RANDOM_UNIFORM -> RandomUniform( - seed = seed, - minVal = initializer.config.minval!!.toFloat(), - maxVal = initializer.config.maxval!!.toFloat() - ) - INITIALIZER_TRUNCATED_NORMAL -> TruncatedNormal(seed = seed) - INITIALIZER_VARIANCE_SCALING -> convertVarianceScaling(initializer) - INITIALIZER_ORTHOGONAL -> Orthogonal( seed = seed, gain = initializer.config.gain!!.toFloat() ) - /*INITIALIZER_CONSTANT -> Constant(initializer.config.value!!.toFloat())*/ - INITIALIZER_IDENTITY -> Identity(initializer.config.gain?.toFloat() ?: 1f) - else -> throw IllegalStateException("${initializer.class_name} is not supported yet!") - } -} - -private fun convertVarianceScaling(initializer: KerasInitializer): Initializer { - val seed = if (initializer.config!!.seed != null) { - initializer.config.seed!!.toLong() - } else 12L - - val config = initializer.config - val scale = config.scale!! - val mode: Mode = convertMode(config.mode!!) - val distribution: Distribution = convertDistribution(config.distribution!!) - return if (scale == 2.0 && mode == Mode.FAN_IN) { - when (distribution) { - Distribution.UNIFORM -> HeUniform(seed) - Distribution.TRUNCATED_NORMAL -> { - HeNormal(seed) - } - else -> VarianceScaling(scale, mode, distribution, seed) - } - } else { - when (mode) { - Mode.FAN_IN -> { - when (distribution) { - Distribution.UNIFORM -> LeCunUniform(seed) - Distribution.TRUNCATED_NORMAL -> { - LeCunNormal(seed) - } - else -> VarianceScaling(scale, mode, distribution, seed) - } - } - Mode.FAN_AVG -> { - when (distribution) { - Distribution.UNIFORM -> GlorotUniform(seed) - Distribution.TRUNCATED_NORMAL -> { - GlorotNormal(seed) - } - else -> VarianceScaling(scale, mode, distribution, seed) - } - } - else -> VarianceScaling(scale, mode, distribution, seed) - } - } -} - -private fun convertDistribution(distribution: String): Distribution { - return when (distribution) { - "truncated_normal" -> Distribution.TRUNCATED_NORMAL - "uniform" -> Distribution.UNIFORM - "untruncated_normal" -> Distribution.UNTRUNCATED_NORMAL - else -> Distribution.TRUNCATED_NORMAL - } -} - -private fun convertMode(mode: String): Mode { - return when (mode) { - "fan_in" -> Mode.FAN_IN - "fan_out" -> Mode.FAN_OUT - "fan_avg" -> Mode.FAN_AVG - else -> Mode.FAN_AVG - } -} - -private fun convertToActivation(activation: String): Activations { - return when (activation) { - ACTIVATION_RELU -> Activations.Relu - ACTIVATION_SIGMOID -> Activations.Sigmoid - ACTIVATION_SOFTMAX -> Activations.Softmax - ACTIVATION_LINEAR -> Activations.Linear - ACTIVATION_TANH -> Activations.Tanh - ACTIVATION_RELU6 -> Activations.Relu6 - ACTIVATION_ELU -> Activations.Elu - ACTIVATION_SELU -> Activations.Selu - ACTIVATION_LOG_SOFTMAX -> Activations.LogSoftmax - ACTIVATION_EXP -> Activations.Exponential - ACTIVATION_SOFTPLUS -> Activations.SoftPlus - ACTIVATION_SOFTSIGN -> Activations.SoftSign - ACTIVATION_HARD_SIGMOID -> Activations.HardSigmoid - ACTIVATION_SWISH -> Activations.Swish - else -> throw IllegalStateException("$activation is not supported yet!") - } -} - -private fun createMaxPool1D(config: LayerConfig, name: String): Layer { +private fun createMaxPool1DLayer(config: LayerConfig, name: String): Layer { val poolSize = config.pool_size!! val addedOnesPoolSize = longArrayOf(1, poolSize[0].toLong(), 1) val strides = config.strides!! @@ -634,7 +605,7 @@ private fun createMaxPool1D(config: LayerConfig, name: String): Layer { ) } -private fun createMaxPooling2D(config: LayerConfig, name: String): MaxPool2D { +private fun createMaxPool2DLayer(config: LayerConfig, name: String): Layer { val poolSize = config.pool_size!!.toIntArray() val addedOnesPoolSize = IntArray(4) addedOnesPoolSize[0] = 1 @@ -649,10 +620,15 @@ private fun createMaxPooling2D(config: LayerConfig, name: String): MaxPool2D { addedOnesStrides[2] = strides[1] addedOnesStrides[3] = 1 - return MaxPool2D(addedOnesPoolSize, addedOnesStrides, padding = convertPadding(config.padding!!), name = name) + return MaxPool2D( + poolSize = addedOnesPoolSize, + strides = addedOnesStrides, + padding = convertPadding(config.padding!!), + name = name + ) } -private fun createAvgPool1D(config: LayerConfig, name: String): Layer { +private fun createAvgPool1DLayer(config: LayerConfig, name: String): Layer { val poolSize = config.pool_size!! val addedOnesPoolSize = longArrayOf(1, poolSize[0].toLong(), 1) val strides = config.strides!! @@ -665,7 +641,7 @@ private fun createAvgPool1D(config: LayerConfig, name: String): Layer { ) } -private fun createAvgPooling2D(config: LayerConfig, name: String): AvgPool2D { +private fun createAvgPool2DLayer(config: LayerConfig, name: String): Layer { val poolSize = config.pool_size!!.toIntArray() val addedOnesPoolSize = IntArray(4) addedOnesPoolSize[0] = 1 @@ -680,7 +656,12 @@ private fun createAvgPooling2D(config: LayerConfig, name: String): AvgPool2D { addedOnesStrides[2] = strides[1] addedOnesStrides[3] = 1 - return AvgPool2D(addedOnesPoolSize, addedOnesStrides, padding = convertPadding(config.padding!!), name = name) + return AvgPool2D( + poolSize = addedOnesPoolSize, + strides = addedOnesStrides, + padding = convertPadding(config.padding!!), + name = name + ) } private fun createAvgPool3DLayer(config: LayerConfig, name: String): Layer { @@ -696,7 +677,7 @@ private fun createAvgPool3DLayer(config: LayerConfig, name: String): Layer { ) } -private fun createMaxPooling3D(config: LayerConfig, name: String): MaxPool3D { +private fun createMaxPool3DLayer(config: LayerConfig, name: String): Layer { val poolSize = config.pool_size!!.toIntArray() val addedOnesPoolSize = IntArray(5) addedOnesPoolSize[0] = 1 @@ -713,7 +694,12 @@ private fun createMaxPooling3D(config: LayerConfig, name: String): MaxPool3D { addedOnesStrides[3] = strides[2] addedOnesStrides[4] = 1 - return MaxPool3D(addedOnesPoolSize, addedOnesStrides, padding = convertPadding(config.padding!!), name = name) + return MaxPool3D( + poolSize = addedOnesPoolSize, + strides = addedOnesStrides, + padding = convertPadding(config.padding!!), + name = name + ) } private fun convertPadding(padding: KerasPadding): ConvPadding { @@ -725,15 +711,15 @@ private fun convertPadding(padding: KerasPadding): ConvPadding { } } -private fun createFlatten(name: String): Flatten { +private fun createFlattenLayer(name: String): Layer { return Flatten(name = name) } -private fun createReshape(config: LayerConfig, name: String): Reshape { +private fun createReshapeLayer(config: LayerConfig, name: String): Layer { return Reshape(name = name, targetShape = config.target_shape!!) } -private fun createConv1D(config: LayerConfig, name: String): Conv1D { +private fun createConv1DLayer(config: LayerConfig, name: String): Layer { val kernelSize = config.kernel_size!!.map { it.toLong() }[0] val strides = config.strides!!.map { it.toLong() }.toLongArray() @@ -765,7 +751,7 @@ private fun createConv1D(config: LayerConfig, name: String): Conv1D { ) } -private fun createConv2D(config: LayerConfig, name: String): Conv2D { +private fun createConv2DLayer(config: LayerConfig, name: String): Layer { val kernelSize = config.kernel_size!!.map { it.toLong() }.toLongArray() val strides = config.strides!!.map { it.toLong() }.toLongArray() @@ -799,10 +785,43 @@ private fun createConv2D(config: LayerConfig, name: String): Conv2D { ) } -private fun createDepthwiseConv2D( - config: LayerConfig, - name: String -): DepthwiseConv2D { +private fun createConv3DLayer(config: LayerConfig, name: String): Layer { + val kernelSize = config.kernel_size!!.map { it.toLong() }.toLongArray() + val strides = config.strides!!.map { it.toLong() }.toLongArray() + + val addedOnesStrides = LongArray(5) + addedOnesStrides[0] = 1 + addedOnesStrides[1] = strides[0] + addedOnesStrides[2] = strides[1] + addedOnesStrides[3] = strides[2] + addedOnesStrides[4] = 1 + + val dilation = config.dilation_rate!!.map { it.toLong() }.toLongArray() + val addedOnesDilation = LongArray(5) + addedOnesDilation[0] = 1 + addedOnesDilation[1] = dilation[0] + addedOnesDilation[2] = dilation[1] + addedOnesDilation[3] = dilation[2] + addedOnesDilation[4] = 1 + + return Conv3D( + filters = config.filters!!.toLong(), + kernelSize = kernelSize, + strides = addedOnesStrides, + dilations = addedOnesDilation, + activation = convertToActivation(config.activation!!), + kernelInitializer = convertToInitializer(config.kernel_initializer!!), + biasInitializer = convertToInitializer(config.bias_initializer!!), + kernelRegularizer = convertToRegularizer(config.kernel_regularizer), + biasRegularizer = convertToRegularizer(config.bias_regularizer), + activityRegularizer = convertToRegularizer(config.activity_regularizer), + padding = convertPadding(config.padding!!), + useBias = config.use_bias!!, + name = name + ) +} + +private fun createDepthwiseConv2DLayer(config: LayerConfig, name: String): Layer { val kernelSize = config.kernel_size!!.map { it.toLong() }.toLongArray() val strides = config.strides!!.map { it.toLong() }.toLongArray() @@ -836,10 +855,7 @@ private fun createDepthwiseConv2D( ) } -private fun createSeparableConv2D( - config: LayerConfig, - name: String -): SeparableConv2D { +private fun createSeparableConv2DLayer(config: LayerConfig, name: String): Layer { val kernelSize = config.kernel_size!!.map { it.toLong() }.toLongArray() val strides = config.strides!!.map { it.toLong() }.toLongArray() @@ -876,25 +892,19 @@ private fun createSeparableConv2D( ) } -private fun createZeroPadding2D( - config: LayerConfig, - name: String -): ZeroPadding2D { +private fun createZeroPadding2DLayer(config: LayerConfig, name: String): Layer { assert(config.padding is KerasPadding.ZeroPadding2D) return ZeroPadding2D( - (config.padding as KerasPadding.ZeroPadding2D).padding, - config.data_format, - name + padding = (config.padding as KerasPadding.ZeroPadding2D).padding, + dataFormat = config.data_format, + name = name ) } -private fun createCropping2D( - config: LayerConfig, - name: String -): Cropping2D { +private fun createCropping2DLayer(config: LayerConfig, name: String): Layer { val cropping = config.cropping!!.map { it.toIntArray() }.toTypedArray() return Cropping2D( - cropping, - name + cropping = cropping, + name = name ) } diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt index 9e9adcd7f..d7f4e937d 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt @@ -12,8 +12,6 @@ import org.jetbrains.kotlinx.dl.api.core.Sequential import org.jetbrains.kotlinx.dl.api.core.activation.Activations import org.jetbrains.kotlinx.dl.api.core.initializer.* import org.jetbrains.kotlinx.dl.api.core.layer.Layer -import org.jetbrains.kotlinx.dl.api.core.layer.activation.ELU -import org.jetbrains.kotlinx.dl.api.core.layer.activation.ReLU import org.jetbrains.kotlinx.dl.api.core.layer.activation.PReLU import org.jetbrains.kotlinx.dl.api.core.layer.activation.LeakyReLU import org.jetbrains.kotlinx.dl.api.core.layer.activation.Softmax @@ -74,39 +72,49 @@ internal fun GraphTrainableModel.serializeModel(isKerasFullyCompatible: Boolean) private fun convertToKerasLayer(layer: Layer, isKerasFullyCompatible: Boolean, isFunctional: Boolean): KerasLayer { val kerasLayer = when (layer) { - is Conv1D -> createKerasConv1D(layer, isKerasFullyCompatible) - is Conv2D -> createKerasConv2D(layer, isKerasFullyCompatible) - is Flatten -> createKerasFlatten(layer) - is MaxPool1D -> createKerasMaxPool1D(layer) - is MaxPool2D -> createKerasMaxPooling2D(layer) - is MaxPool3D -> createKerasMaxPooling3D(layer) - is AvgPool1D -> createKerasAvgPool1D(layer) - is AvgPool2D -> createKerasAvgPooling2D(layer) - is AvgPool3D -> createKerasAvgPool3DLayer(layer) - is Dense -> createKerasDense(layer, isKerasFullyCompatible) - is ZeroPadding2D -> createKerasZeroPadding2D(layer) - is Input -> createKerasInput(layer) - is BatchNorm -> createKerasBatchNorm(layer, isKerasFullyCompatible) + // Core layers + is Input -> createKerasInputLayer(layer) + is Dense -> createKerasDenseLayer(layer, isKerasFullyCompatible) is ActivationLayer -> createKerasActivationLayer(layer) - is ELU -> createKerasELU(layer) - is ReLU -> createKerasReLU(layer) - is PReLU -> createKerasPReLULayer(layer, isKerasFullyCompatible) - is LeakyReLU -> createKerasLeakyReLU(layer) - is ThresholdedReLU -> createKerasThresholdedReLULayer(layer) + // Convolution layers + is Conv1D -> createKerasConv1DLayer(layer, isKerasFullyCompatible) + is Conv2D -> createKerasConv2DLayer(layer, isKerasFullyCompatible) + is Conv3D -> createKerasConv3DLayer(layer, isKerasFullyCompatible) + is DepthwiseConv2D -> createKerasDepthwiseConv2DLayer(layer, isKerasFullyCompatible) + is SeparableConv2D -> createKerasSeparableConv2DLayer(layer, isKerasFullyCompatible) + // Pooling layers + is MaxPool1D -> createKerasMaxPool1DLayer(layer) + is MaxPool2D -> createKerasMaxPool2DLayer(layer) + is MaxPool3D -> createKerasMaxPool3DLayer(layer) + is AvgPool1D -> createKerasAvgPool1DLayer(layer) + is AvgPool2D -> createKerasAvgPool2DLayer(layer) + is AvgPool3D -> createKerasAvgPool3DLayer(layer) + is GlobalMaxPool1D -> createKerasGlobalMaxPool1DLayer(layer) + is GlobalAvgPool1D -> createKerasGlobalAvgPool1DLayer(layer) + is GlobalAvgPool2D -> createKerasGlobalAvgPool2DLayer(layer) + is GlobalAvgPool3D -> createKerasGlobalAvgPool3DLayer(layer) + // Recurrent layers (e.g. LSTM) + // Normalization layers + is BatchNorm -> createKerasBatchNormLayer(layer, isKerasFullyCompatible) + // Regularization layers (e.g. Dropout) + // Attention layers + // Reshaping layers + is Flatten -> createKerasFlattenLayer(layer) + is ZeroPadding2D -> createKerasZeroPadding2DLayer(layer) + // Merging layers is Add -> createKerasAddLayer(layer) - is Maximum -> createKerasMaximumLayer(layer as Maximum) - is Minimum -> createKerasMinimumLayer(layer as Minimum) - is Subtract -> createKerasSubtractLayer(layer as Subtract) - is Multiply -> createKerasMultiplyLayer(layer as Multiply) - is Average -> createKerasAverageLayer(layer as Average) - is GlobalMaxPool1D -> createKerasGlobalMaxPool1D(layer) - is GlobalAvgPool2D -> createKerasGlobalAveragePooling2DLayer(layer) - is GlobalAvgPool3D -> createKerasGlobalAveragePooling3DLayer(layer) - is DepthwiseConv2D -> createKerasDepthwiseConv2D(layer, isKerasFullyCompatible) - is SeparableConv2D -> createSeparableConv2D(layer, isKerasFullyCompatible) - is Concatenate -> createKerasConcatenate(layer) - is GlobalAvgPool1D -> createKerasGlobalAveragePooling1DLayer(layer) + is Maximum -> createKerasMaximumLayer(layer) + is Minimum -> createKerasMinimumLayer(layer) + is Subtract -> createKerasSubtractLayer(layer) + is Multiply -> createKerasMultiplyLayer(layer) + is Average -> createKerasAverageLayer(layer) + is Concatenate -> createKerasConcatenateLayer(layer) + // Locally-connected layers + // Activation layers is Softmax -> createKerasSoftmaxLayer(layer) + is PReLU -> createKerasPReLULayer(layer, isKerasFullyCompatible) + is LeakyReLU -> createKerasLeakyReLULayer(layer) + is ThresholdedReLU -> createKerasThresholdedReLULayer(layer) else -> throw IllegalStateException("${layer.name} with type ${layer::class.simpleName} is not supported yet!") } @@ -136,23 +144,146 @@ private fun convertToKerasLayer(layer: Layer, isKerasFullyCompatible: Boolean, i return kerasLayer } -private fun createKerasGlobalAveragePooling2DLayer(layer: GlobalAvgPool2D): KerasLayer { +private fun convertToKerasRegularizer(regularizer: Regularizer?): KerasRegularizer? { + return if (regularizer != null) { + val className = "L1L2" + regularizer as L2L1 + val config = KerasRegularizerConfig(l1 = regularizer.l1.toDouble(), l2 = regularizer.l2.toDouble()) + KerasRegularizer(class_name = className, config = config) + } else { + null + } +} + +private fun convertToKerasInitializer(initializer: Initializer, isKerasFullyCompatible: Boolean): KerasInitializer? { + val className: String + val config: KerasInitializerConfig + if (isKerasFullyCompatible) { + val (_className, _config) = when (initializer) { + is GlorotUniform -> convertToVarianceScalingInitializer(initializer as VarianceScaling) + is GlorotNormal -> convertToVarianceScalingInitializer(initializer as VarianceScaling) + is HeNormal -> convertToVarianceScalingInitializer(initializer as VarianceScaling) + is HeUniform -> convertToVarianceScalingInitializer(initializer as VarianceScaling) + is LeCunNormal -> convertToVarianceScalingInitializer(initializer as VarianceScaling) + is LeCunUniform -> convertToVarianceScalingInitializer(initializer as VarianceScaling) + is RandomUniform -> convertToRandomUniformInitializer(initializer) + is Identity -> convertToIdentityInitializer(initializer) + else -> throw IllegalStateException("${initializer::class.simpleName} is not supported yet!") + } + + className = _className + config = _config + } else { + className = when (initializer) { + is GlorotUniform -> INITIALIZER_GLOROT_UNIFORM + is GlorotNormal -> INITIALIZER_GLOROT_NORMAL + is HeNormal -> INITIALIZER_HE_NORMAL + is HeUniform -> INITIALIZER_HE_UNIFORM + is LeCunNormal -> INITIALIZER_LECUN_NORMAL + is LeCunUniform -> INITIALIZER_LECUN_UNIFORM + is Identity -> INITIALIZER_IDENTITY + else -> throw IllegalStateException("${initializer::class.simpleName} is not supported yet!") + } + config = KerasInitializerConfig(seed = 12) + } + + return KerasInitializer(class_name = className, config = config) +} + +private fun convertToRandomUniformInitializer(initializer: RandomUniform): Pair { + return Pair( + INITIALIZER_RANDOM_UNIFORM, KerasInitializerConfig( + minval = initializer.minVal.toDouble(), + maxval = initializer.maxVal.toDouble(), + seed = initializer.seed.toInt() + ) + ) +} + +private fun convertToVarianceScalingInitializer(initializer: VarianceScaling): Pair { + return Pair( + INITIALIZER_VARIANCE_SCALING, KerasInitializerConfig( + seed = initializer.seed.toInt(), + scale = initializer.scale, + mode = convertMode(initializer.mode), + distribution = convertDistribution(initializer.distribution) + ) + ) +} + +private fun convertToIdentityInitializer(initializer: Identity): Pair { + return Pair( + INITIALIZER_IDENTITY, + KerasInitializerConfig( + gain = initializer.gain.toDouble() + ) + ) +} + +private fun convertDistribution(distribution: Distribution): String { + return when (distribution) { + Distribution.TRUNCATED_NORMAL -> "truncated_normal" + Distribution.UNIFORM -> "uniform" + Distribution.UNTRUNCATED_NORMAL -> "untruncated_normal" + } +} + +private fun convertMode(mode: Mode): String { + return when (mode) { + Mode.FAN_IN -> "fan_in" + Mode.FAN_OUT -> "fan_out" + Mode.FAN_AVG -> "fan_avg" + } +} + +private fun convertToKerasPadding(padding: ConvPadding): KerasPadding { + return when (padding) { + ConvPadding.SAME -> KerasPadding.Same + ConvPadding.VALID -> KerasPadding.Valid + ConvPadding.FULL -> KerasPadding.Full + } +} + +private fun convertToKerasActivation(activation: Activations): String? { + return when (activation) { + Activations.Relu -> ACTIVATION_RELU + Activations.Sigmoid -> ACTIVATION_SIGMOID + Activations.Softmax -> ACTIVATION_SOFTMAX + Activations.Linear -> ACTIVATION_LINEAR + Activations.Tanh -> ACTIVATION_TANH + Activations.Relu6 -> ACTIVATION_RELU6 + Activations.Elu -> ACTIVATION_ELU + Activations.Selu -> ACTIVATION_SELU + Activations.LogSoftmax -> ACTIVATION_LOG_SOFTMAX + Activations.Exponential -> ACTIVATION_EXP + Activations.SoftPlus -> ACTIVATION_SOFTPLUS + Activations.SoftSign -> ACTIVATION_SOFTSIGN + Activations.HardSigmoid -> ACTIVATION_HARD_SIGMOID + Activations.Swish -> ACTIVATION_SWISH + } +} + +/** + * The layer creator functions for Keras should be put below. + */ + +private fun createKerasGlobalAvgPool2DLayer(layer: GlobalAvgPool2D): KerasLayer { val configX = LayerConfig( dtype = DATATYPE_FLOAT32, name = layer.name ) - return KerasLayer(class_name = LAYER_GLOBAL_AVG_POOLING_2D, config = configX) + return KerasLayer(class_name = LAYER_GLOBAL_AVG_POOL_2D, config = configX) } -private fun createKerasGlobalAveragePooling1DLayer(layer: GlobalAvgPool1D): KerasLayer { +private fun createKerasGlobalAvgPool1DLayer(layer: GlobalAvgPool1D): KerasLayer { val configX = LayerConfig( dtype = DATATYPE_FLOAT32, name = layer.name ) - return KerasLayer(class_name = LAYER_GLOBAL_AVG_POOLING_1D, config = configX) + return KerasLayer(class_name = LAYER_GLOBAL_AVG_POOL_1D, config = configX) } -private fun createKerasGlobalMaxPool1D(layer: GlobalMaxPool1D): KerasLayer { +private fun createKerasGlobalMaxPool1DLayer(layer: GlobalMaxPool1D): KerasLayer { val configX = LayerConfig( dtype = DATATYPE_FLOAT32, name = layer.name @@ -160,12 +291,12 @@ private fun createKerasGlobalMaxPool1D(layer: GlobalMaxPool1D): KerasLayer { return KerasLayer(class_name = LAYER_GLOBAL_MAX_POOL_1D, config = configX) } -private fun createKerasGlobalAveragePooling3DLayer(layer: GlobalAvgPool3D): KerasLayer { +private fun createKerasGlobalAvgPool3DLayer(layer: GlobalAvgPool3D): KerasLayer { val configX = LayerConfig( dtype = DATATYPE_FLOAT32, name = layer.name ) - return KerasLayer(class_name = LAYER_GLOBAL_AVG_POOLING_3D, config = configX) + return KerasLayer(class_name = LAYER_GLOBAL_AVG_POOL_3D, config = configX) } private fun createKerasAddLayer(layer: Add): KerasLayer { @@ -225,26 +356,6 @@ private fun createKerasActivationLayer(layer: ActivationLayer): KerasLayer { return KerasLayer(class_name = LAYER_ACTIVATION, config = configX) } -private fun createKerasReLU(layer: ReLU): KerasLayer { - val configX = LayerConfig( - dtype = DATATYPE_FLOAT32, - max_value = layer.maxValue?.toDouble(), - negative_slope = layer.negativeSlope.toDouble(), - threshold = layer.threshold.toDouble(), - name = layer.name - ) - return KerasLayer(class_name = LAYER_RELU, config = configX) -} - -private fun createKerasELU(layer: ELU): KerasLayer { - val configX = LayerConfig( - dtype = DATATYPE_FLOAT32, - alpha = layer.alpha.toDouble(), - name = layer.name - ) - return KerasLayer(class_name = LAYER_ELU, config = configX) -} - private fun createKerasPReLULayer(layer: PReLU, isKerasFullyCompatible: Boolean): KerasLayer { val configX = LayerConfig( dtype = DATATYPE_FLOAT32, @@ -265,7 +376,7 @@ private fun createKerasSoftmaxLayer(layer: Softmax): KerasLayer { return KerasLayer(class_name = LAYER_SOFTMAX, config = configX) } -private fun createKerasLeakyReLU(layer: LeakyReLU): KerasLayer { +private fun createKerasLeakyReLULayer(layer: LeakyReLU): KerasLayer { val configX = LayerConfig( dtype = DATATYPE_FLOAT32, alpha = layer.alpha.toDouble(), @@ -283,7 +394,7 @@ private fun createKerasThresholdedReLULayer(layer: ThresholdedReLU): KerasLayer return KerasLayer(class_name = LAYER_THRESHOLDED_RELU, config = configX) } -private fun createKerasBatchNorm(layer: BatchNorm, isKerasFullyCompatible: Boolean): KerasLayer { +private fun createKerasBatchNormLayer(layer: BatchNorm, isKerasFullyCompatible: Boolean): KerasLayer { val configX = LayerConfig( dtype = DATATYPE_FLOAT32, name = layer.name, @@ -306,7 +417,7 @@ private fun createKerasBatchNorm(layer: BatchNorm, isKerasFullyCompatible: Boole return KerasLayer(class_name = LAYER_BATCH_NORM, config = configX) } -private fun createKerasInput(layer: Input): KerasLayer { +private fun createKerasInputLayer(layer: Input): KerasLayer { val shape = mutableListOf() shape.add(null) layer.packedDims.map { it.toInt() }.forEach { shape.add(it) } @@ -320,7 +431,7 @@ private fun createKerasInput(layer: Input): KerasLayer { return KerasLayer(class_name = LAYER_INPUT, config = config) } -private fun createKerasDense(layer: Dense, isKerasFullyCompatible: Boolean): KerasLayer { +private fun createKerasDenseLayer(layer: Dense, isKerasFullyCompatible: Boolean): KerasLayer { val configX = LayerConfig( dtype = DATATYPE_FLOAT32, units = layer.outputSize, @@ -336,186 +447,67 @@ private fun createKerasDense(layer: Dense, isKerasFullyCompatible: Boolean): Ker return KerasLayer(class_name = LAYER_DENSE, config = configX) } -private fun convertToKerasRegularizer(regularizer: Regularizer?): KerasRegularizer? { - return if (regularizer != null) { - val className = "L1L2" - regularizer as L2L1 - val config = KerasRegularizerConfig(l1 = regularizer.l1.toDouble(), l2 = regularizer.l2.toDouble()) - KerasRegularizer(class_name = className, config = config) - } else { - null - } -} - -private fun convertToKerasInitializer(initializer: Initializer, isKerasFullyCompatible: Boolean): KerasInitializer? { - val className: String - val config: KerasInitializerConfig - if (isKerasFullyCompatible) { - val (_className, _config) = when (initializer) { - is GlorotUniform -> convertToVarianceScaling(initializer as VarianceScaling) - is GlorotNormal -> convertToVarianceScaling(initializer as VarianceScaling) - is HeNormal -> convertToVarianceScaling(initializer as VarianceScaling) - is HeUniform -> convertToVarianceScaling(initializer as VarianceScaling) - is LeCunNormal -> convertToVarianceScaling(initializer as VarianceScaling) - is LeCunUniform -> convertToVarianceScaling(initializer as VarianceScaling) - is RandomUniform -> convertToRandomUniform(initializer) - is Identity -> convertToIdentity(initializer) - else -> throw IllegalStateException("${initializer::class.simpleName} is not supported yet!") - } - - className = _className - config = _config - } else { - className = when (initializer) { - is GlorotUniform -> INITIALIZER_GLOROT_UNIFORM - is GlorotNormal -> INITIALIZER_GLOROT_NORMAL - is HeNormal -> INITIALIZER_HE_NORMAL - is HeUniform -> INITIALIZER_HE_UNIFORM - is LeCunNormal -> INITIALIZER_LECUN_NORMAL - is LeCunUniform -> INITIALIZER_LECUN_UNIFORM - is Identity -> INITIALIZER_IDENTITY - else -> throw IllegalStateException("${initializer::class.simpleName} is not supported yet!") - } - config = KerasInitializerConfig(seed = 12) - } - - return KerasInitializer(class_name = className, config = config) -} - -private fun convertToRandomUniform(initializer: RandomUniform): Pair { - return Pair( - INITIALIZER_RANDOM_UNIFORM, KerasInitializerConfig( - minval = initializer.minVal.toDouble(), - maxval = initializer.maxVal.toDouble(), - seed = initializer.seed.toInt() - ) - ) -} - -private fun convertToVarianceScaling(initializer: VarianceScaling): Pair { - return Pair( - INITIALIZER_VARIANCE_SCALING, KerasInitializerConfig( - seed = initializer.seed.toInt(), - scale = initializer.scale, - mode = convertMode(initializer.mode), - distribution = convertDistribution(initializer.distribution) - ) - ) -} - -private fun convertToIdentity(initializer: Identity): Pair { - return Pair( - INITIALIZER_IDENTITY, - KerasInitializerConfig( - gain = initializer.gain.toDouble() - ) - ) -} - -private fun convertDistribution(distribution: Distribution): String { - return when (distribution) { - Distribution.TRUNCATED_NORMAL -> "truncated_normal" - Distribution.UNIFORM -> "uniform" - Distribution.UNTRUNCATED_NORMAL -> "untruncated_normal" - } -} - -private fun convertMode(mode: Mode): String { - return when (mode) { - Mode.FAN_IN -> "fan_in" - Mode.FAN_OUT -> "fan_out" - Mode.FAN_AVG -> "fan_avg" - } -} - -private fun convertPadding(padding: ConvPadding): KerasPadding { - return when (padding) { - ConvPadding.SAME -> KerasPadding.Same - ConvPadding.VALID -> KerasPadding.Valid - ConvPadding.FULL -> KerasPadding.Full - } -} - -private fun convertToKerasActivation(activation: Activations): String? { - return when (activation) { - Activations.Relu -> ACTIVATION_RELU - Activations.Sigmoid -> ACTIVATION_SIGMOID - Activations.Softmax -> ACTIVATION_SOFTMAX - Activations.Linear -> ACTIVATION_LINEAR - Activations.Tanh -> ACTIVATION_TANH - Activations.Relu6 -> ACTIVATION_RELU6 - Activations.Elu -> ACTIVATION_ELU - Activations.Selu -> ACTIVATION_SELU - Activations.LogSoftmax -> ACTIVATION_LOG_SOFTMAX - Activations.Exponential -> ACTIVATION_EXP - Activations.SoftPlus -> ACTIVATION_SOFTPLUS - Activations.SoftSign -> ACTIVATION_SOFTSIGN - Activations.HardSigmoid -> ACTIVATION_HARD_SIGMOID - Activations.Swish -> ACTIVATION_SWISH - } -} - -private fun createKerasMaxPool1D(layer: MaxPool1D): KerasLayer { +private fun createKerasMaxPool1DLayer(layer: MaxPool1D): KerasLayer { val configX = LayerConfig( dtype = DATATYPE_FLOAT32, pool_size = listOf(layer.poolSize[1].toInt()), strides = listOf(layer.strides[1].toInt()), - padding = convertPadding(layer.padding), + padding = convertToKerasPadding(layer.padding), name = layer.name ) return KerasLayer(class_name = LAYER_MAX_POOL_1D, config = configX) } -private fun createKerasMaxPooling2D(layer: MaxPool2D): KerasLayer { +private fun createKerasMaxPool2DLayer(layer: MaxPool2D): KerasLayer { val poolSize = mutableListOf(layer.poolSize[1], layer.poolSize[2]) val strides = mutableListOf(layer.strides[1], layer.strides[2]) val configX = LayerConfig( data_format = CHANNELS_LAST, dtype = DATATYPE_FLOAT32, name = layer.name, - padding = convertPadding(layer.padding), + padding = convertToKerasPadding(layer.padding), pool_size = poolSize, strides = strides ) - return KerasLayer(class_name = LAYER_MAX_POOLING_2D, config = configX) + return KerasLayer(class_name = LAYER_MAX_POOL_2D, config = configX) } -private fun createKerasAvgPool1D(layer: AvgPool1D): KerasLayer { +private fun createKerasAvgPool1DLayer(layer: AvgPool1D): KerasLayer { val configX = LayerConfig( dtype = DATATYPE_FLOAT32, pool_size = listOf(layer.poolSize[1].toInt()), strides = listOf(layer.strides[1].toInt()), - padding = convertPadding(layer.padding), + padding = convertToKerasPadding(layer.padding), name = layer.name ) return KerasLayer(class_name = LAYER_AVG_POOL_1D, config = configX) } -private fun createKerasMaxPooling3D(layer: MaxPool3D): KerasLayer { +private fun createKerasMaxPool3DLayer(layer: MaxPool3D): KerasLayer { val poolSize = mutableListOf(layer.poolSize[1], layer.poolSize[3]) val strides = mutableListOf(layer.strides[1] , layer.strides[3]) val configX = LayerConfig( dtype = DATATYPE_FLOAT32, name = layer.name, - padding = convertPadding(layer.padding), + padding = convertToKerasPadding(layer.padding), pool_size = poolSize, strides = strides ) - return KerasLayer(class_name = LAYER_MAX_POOLING_3D, config = configX) + return KerasLayer(class_name = LAYER_MAX_POOL_3D, config = configX) } -private fun createKerasAvgPooling2D(layer: AvgPool2D): KerasLayer { +private fun createKerasAvgPool2DLayer(layer: AvgPool2D): KerasLayer { val poolSize = mutableListOf(layer.poolSize[1], layer.poolSize[2]) val strides = mutableListOf(layer.strides[1], layer.strides[2]) val configX = LayerConfig( data_format = CHANNELS_LAST, dtype = DATATYPE_FLOAT32, name = layer.name, - padding = convertPadding(layer.padding), + padding = convertToKerasPadding(layer.padding), pool_size = poolSize, strides = strides ) - return KerasLayer(class_name = LAYER_AVG_POOLING_2D, config = configX) + return KerasLayer(class_name = LAYER_AVG_POOL_2D, config = configX) } private fun createKerasAvgPool3DLayer(layer: AvgPool3D): KerasLayer { @@ -523,13 +515,13 @@ private fun createKerasAvgPool3DLayer(layer: AvgPool3D): KerasLayer { dtype = DATATYPE_FLOAT32, pool_size = layer.poolSize.slice(1..3).map { it.toInt() }, strides = layer.strides.slice(1..3).map { it.toInt() }, - padding = convertPadding(layer.padding), + padding = convertToKerasPadding(layer.padding), name = layer.name ) return KerasLayer(class_name = LAYER_AVG_POOL_3D, config = configX) } -private fun createKerasFlatten(layer: Flatten): KerasLayer { +private fun createKerasFlattenLayer(layer: Flatten): KerasLayer { val configX = LayerConfig( data_format = CHANNELS_LAST, dtype = DATATYPE_FLOAT32, @@ -538,7 +530,7 @@ private fun createKerasFlatten(layer: Flatten): KerasLayer { return KerasLayer(class_name = LAYER_FLATTEN, config = configX) } -private fun createKerasConcatenate(layer: Concatenate): KerasLayer { +private fun createKerasConcatenateLayer(layer: Concatenate): KerasLayer { val configX = LayerConfig( dtype = DATATYPE_FLOAT32, axis = layer.axis, @@ -547,7 +539,7 @@ private fun createKerasConcatenate(layer: Concatenate): KerasLayer { return KerasLayer(class_name = LAYER_CONCATENATE, config = configX) } -private fun createKerasConv1D(layer: Conv1D, isKerasFullyCompatible: Boolean): KerasLayer { +private fun createKerasConv1DLayer(layer: Conv1D, isKerasFullyCompatible: Boolean): KerasLayer { val configX = LayerConfig( filters = layer.filters.toInt(), kernel_size = listOf(layer.kernelSize.toInt()), @@ -559,14 +551,14 @@ private fun createKerasConv1D(layer: Conv1D, isKerasFullyCompatible: Boolean): K kernel_regularizer = convertToKerasRegularizer(layer.kernelRegularizer), bias_regularizer = convertToKerasRegularizer(layer.biasRegularizer), activity_regularizer = convertToKerasRegularizer(layer.activityRegularizer), - padding = convertPadding(layer.padding), + padding = convertToKerasPadding(layer.padding), name = layer.name, use_bias = layer.useBias ) return KerasLayer(class_name = LAYER_CONV1D, config = configX) } -private fun createKerasConv2D(layer: Conv2D, isKerasFullyCompatible: Boolean): KerasLayer { +private fun createKerasConv2DLayer(layer: Conv2D, isKerasFullyCompatible: Boolean): KerasLayer { val kernelSize = layer.kernelSize.map { it.toInt() }.toList() val configX = LayerConfig( filters = layer.filters.toInt(), @@ -579,14 +571,34 @@ private fun createKerasConv2D(layer: Conv2D, isKerasFullyCompatible: Boolean): K kernel_regularizer = convertToKerasRegularizer(layer.kernelRegularizer), bias_regularizer = convertToKerasRegularizer(layer.biasRegularizer), activity_regularizer = convertToKerasRegularizer(layer.activityRegularizer), - padding = convertPadding(layer.padding), + padding = convertToKerasPadding(layer.padding), name = layer.name, use_bias = layer.useBias ) return KerasLayer(class_name = LAYER_CONV2D, config = configX) } -private fun createKerasDepthwiseConv2D(layer: DepthwiseConv2D, isKerasFullyCompatible: Boolean): KerasLayer { +private fun createKerasConv3DLayer(layer: Conv3D, isKerasFullyCompatible: Boolean): KerasLayer { + val kernelSize = layer.kernelSize.map { it.toInt() }.toList() + val configX = LayerConfig( + filters = layer.filters.toInt(), + kernel_size = kernelSize, + strides = listOf(layer.strides[1].toInt(), layer.strides[2].toInt(), layer.strides[3].toInt()), + dilation_rate = listOf(layer.dilations[1].toInt(), layer.dilations[2].toInt(), layer.dilations[3].toInt()), + activation = convertToKerasActivation(layer.activation), + kernel_initializer = convertToKerasInitializer(layer.kernelInitializer, isKerasFullyCompatible), + bias_initializer = convertToKerasInitializer(layer.biasInitializer, isKerasFullyCompatible), + kernel_regularizer = convertToKerasRegularizer(layer.kernelRegularizer), + bias_regularizer = convertToKerasRegularizer(layer.biasRegularizer), + activity_regularizer = convertToKerasRegularizer(layer.activityRegularizer), + padding = convertToKerasPadding(layer.padding), + name = layer.name, + use_bias = layer.useBias + ) + return KerasLayer(class_name = LAYER_CONV3D, config = configX) +} + +private fun createKerasDepthwiseConv2DLayer(layer: DepthwiseConv2D, isKerasFullyCompatible: Boolean): KerasLayer { val configX = LayerConfig( kernel_size = layer.kernelSize.map { it.toInt() }, strides = listOf(layer.strides[1].toInt(), layer.strides[2].toInt()), @@ -595,14 +607,14 @@ private fun createKerasDepthwiseConv2D(layer: DepthwiseConv2D, isKerasFullyCompa depthwise_initializer = convertToKerasInitializer(layer.depthwiseInitializer, isKerasFullyCompatible), depth_multiplier = layer.depthMultiplier, bias_initializer = convertToKerasInitializer(layer.biasInitializer, isKerasFullyCompatible), - padding = convertPadding(layer.padding), + padding = convertToKerasPadding(layer.padding), use_bias = layer.useBias, name = layer.name ) return KerasLayer(class_name = LAYER_DEPTHWISE_CONV2D, configX) } -private fun createSeparableConv2D(layer: SeparableConv2D, isKerasFullyCompatible: Boolean): KerasLayer { +private fun createKerasSeparableConv2DLayer(layer: SeparableConv2D, isKerasFullyCompatible: Boolean): KerasLayer { val configX = LayerConfig( filters = layer.filters.toInt(), kernel_size = layer.kernelSize.map { it.toInt() }, @@ -613,14 +625,14 @@ private fun createSeparableConv2D(layer: SeparableConv2D, isKerasFullyCompatible pointwise_initializer = convertToKerasInitializer(layer.pointwiseInitializer, isKerasFullyCompatible), depth_multiplier = layer.depthMultiplier, bias_initializer = convertToKerasInitializer(layer.biasInitializer, isKerasFullyCompatible), - padding = convertPadding(layer.padding), + padding = convertToKerasPadding(layer.padding), use_bias = layer.useBias, name = layer.name ) return KerasLayer(class_name = LAYER_SEPARABLE_CONV2D, config = configX) } -private fun createKerasZeroPadding2D(layer: ZeroPadding2D): KerasLayer { +private fun createKerasZeroPadding2DLayer(layer: ZeroPadding2D): KerasLayer { val configX = LayerConfig( data_format = CHANNELS_LAST, dtype = DATATYPE_FLOAT32, @@ -628,4 +640,4 @@ private fun createKerasZeroPadding2D(layer: ZeroPadding2D): KerasLayer { padding = KerasPadding.ZeroPadding2D(layer.padding) ) return KerasLayer(class_name = LAYER_ZERO_PADDING_2D, config = configX) -} \ No newline at end of file +} diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/WeightLoader.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/WeightLoader.kt index ff63a74ca..7ab6ed91e 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/WeightLoader.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/WeightLoader.kt @@ -188,7 +188,7 @@ private fun fillConv2DVariablesFromKeras( val data = it.data when (it.name) { "kernel:0" -> { - val kernelVariableName = conv2dKernelVarName(layerName) + val kernelVariableName = convKernelVarName(layerName, dim = 2) val kernelShape = (model.getLayer(layerName) as Conv2D).kernelShapeArray require( kernelShape.map { e -> e.toInt() }.toIntArray().contentEquals(dims) @@ -196,7 +196,7 @@ private fun fillConv2DVariablesFromKeras( model.fillVariable(kernelVariableName, data) } "bias:0" -> { - val biasVariableName = conv2dBiasVarName(layerName) + val biasVariableName = convBiasVarName(layerName, dim = 2) val biasShape = (model.getLayer(layerName) as Conv2D).biasShapeArray require( biasShape.map { e -> e.toInt() }.toIntArray().contentEquals(dims) @@ -576,8 +576,8 @@ public fun Functional.loadWeightsForFrozenLayersByPathTemplates( } private fun initConv2DVariablesByDefaultInitializer(name: String, model: GraphTrainableModel) { - val kernelVariableName = conv2dKernelVarName(name) - val biasVariableName = conv2dBiasVarName(name) + val kernelVariableName = convKernelVarName(name, dim = 2) + val biasVariableName = convBiasVarName(name, dim = 2) model.runAssignOpByVarName(kernelVariableName) model.runAssignOpByVarName(biasVariableName) } @@ -721,12 +721,12 @@ private fun fillConv2DVariables( } val kernelData = hdfFile.getDatasetByPath(kernelDataPathTemplate.format(name, name)).data - val kernelVariableName = conv2dKernelVarName(name) + val kernelVariableName = convKernelVarName(name, dim = 2) model.fillVariable(kernelVariableName, kernelData) if (useBias) { val biasData = hdfFile.getDatasetByPath(biasDataPathTemplate.format(name, name)).data - val biasVariableName = conv2dBiasVarName(name) + val biasVariableName = convBiasVarName(name, dim = 2) model.fillVariable(biasVariableName, biasData) } } diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/dataset/OnHeapDataset.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/dataset/OnHeapDataset.kt index 0cee2756c..7aca321d7 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/dataset/OnHeapDataset.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/dataset/OnHeapDataset.kt @@ -5,11 +5,13 @@ package org.jetbrains.kotlinx.dl.dataset +import org.jetbrains.kotlinx.dl.api.core.shape.shape import org.jetbrains.kotlinx.dl.dataset.preprocessor.Preprocessing import org.jetbrains.kotlinx.dl.dataset.preprocessor.generator.FromFolders import java.io.File import java.io.IOException import java.nio.FloatBuffer +import kotlin.math.roundToInt import kotlin.math.truncate import kotlin.random.Random @@ -275,4 +277,75 @@ public class OnHeapDataset internal constructor(private val x: Array batchLength ) } + + override fun toString(): String = buildStringRepr(x.partialToString(), y.partialToString()) + + public fun fullToString(): String = buildStringRepr(x.contentDeepToString(), y.contentToString()) + + private fun buildStringRepr(xString: String, yString: String): String = + "OnHeapDataset(\nx ${x.shape} =\n${xString},\ny [${y.size}] =\n${yString}\n)" +} + +/** + * Create String representation of `FloatArray` where only a part of the data is printed to String. + * + * @param maxSize max number of elements of array present in its string representation + * @param lowPercent percent of data of [maxSize] to be printed from the beginning of array data. + * Rest will be obtained from the tail of the array in order matching the order in array + * @return string representation of [FloatArray] in format like + * `[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, ..., 9.0, 10.0]` + */ +private fun FloatArray.partialToString(maxSize: Int = 10, lowPercent: Double = 0.8): String { + if (size <= maxSize) { + return contentToString() + } + + val lowCount = (lowPercent * maxSize).roundToInt() + val upStart = size - maxSize - 1 + + return generateSequence(0, Int::inc).map { + when { + it < lowCount -> this[it] + it > lowCount -> this[upStart + it] + else -> "..." + } + }.take(maxSize + 1).joinToString(prefix = "[", postfix = "]", separator = ", ") +} + +/** + * Create String representation of `Array` where only a part of the data is printed to String. + * + * @param maxSize max number of elements of array present in its string representation + * @param lowPercent percent of data of [maxSize] to be printed from the beginning of array data. + * Rest will be obtained from the tail of the array in order matching the order in array + * @return string representation of [FloatArray] in format like + * `[[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, ..., 9.0, 10.0], + * [11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, ..., 20.0, 21.0], + * [22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, ..., 31.0, 32.0], + * [33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, ..., 42.0, 43.0], + * [44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, ..., 53.0, 54.0], + * [55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, ..., 64.0, 65.0], + * [66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, ..., 75.0, 76.0], + * [77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0, ..., 86.0, 87.0], + * ..., + * [99.0, 100.0, 101.0, 102.0, 103.0, 104.0, 105.0, 106.0, ..., 108.0, 109.0], + * [110.0, 111.0, 112.0, 113.0, 114.0, 115.0, 116.0, 117.0, ..., 119.0, 120.0]]` + */ +private fun Array.partialToString(maxSize: Int = 10, lowPercent: Double = 0.8): String { + if (size <= maxSize) { + return joinToString(prefix = "[", postfix = "]", separator = ",\n ") { + it.partialToString(maxSize, lowPercent) + } + } + + val lowCount = (lowPercent * maxSize).roundToInt() + val upStart = size - maxSize - 1 + + return generateSequence(0, Int::inc).map { + when { + it < lowCount -> this[it].partialToString(maxSize, lowPercent) + it > lowCount -> this[upStart + it].partialToString(maxSize, lowPercent) + else -> "..." + } + }.take(maxSize + 1).joinToString(prefix = "[", postfix = "]", separator = ",\n ") } diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/dataset/embeddedDatasets.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/dataset/embeddedDatasets.kt index a2c7d4500..7b5c1aab4 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/dataset/embeddedDatasets.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/dataset/embeddedDatasets.kt @@ -5,9 +5,14 @@ package org.jetbrains.kotlinx.dl.dataset +import io.jhdf.HdfFile +import io.jhdf.api.Dataset +import org.jetbrains.kotlinx.dl.api.core.shape.cast2D +import org.jetbrains.kotlinx.dl.api.core.shape.castArray import org.jetbrains.kotlinx.dl.api.inference.keras.loaders.AWS_S3_URL import org.jetbrains.kotlinx.dl.api.inference.keras.loaders.LoadingMode import org.jetbrains.kotlinx.dl.dataset.handler.* +import org.jetbrains.kotlinx.dl.dataset.sound.wav.WavFile import java.io.* import java.net.URL import java.nio.file.Files @@ -16,6 +21,12 @@ import java.nio.file.Paths import java.nio.file.StandardCopyOption import java.util.zip.ZipEntry import java.util.zip.ZipFile +import java.io.File + +import java.io.IOException + +import java.io.FileOutputStream +import java.lang.IllegalStateException /** @@ -35,10 +46,7 @@ import java.util.zip.ZipFile * (num_samples, 28, 28). Y data uint8 arrays of digit labels (integers in range 0-9) with shapes (num_samples,). */ public fun mnist(cacheDirectory: File = File("cache")): Pair { - if (!cacheDirectory.exists()) { - val created = cacheDirectory.mkdir() - if (!created) throw Exception("Directory ${cacheDirectory.absolutePath} could not be created! Create this directory manually.") - } + cacheDirectory.existsOrMkdirs() val trainXpath = loadFile(cacheDirectory, TRAIN_IMAGES_ARCHIVE).absolutePath val trainYpath = loadFile(cacheDirectory, TRAIN_LABELS_ARCHIVE).absolutePath @@ -85,10 +93,7 @@ public fun mnist(cacheDirectory: File = File("cache")): Pair { - if (!cacheDirectory.exists()) { - val created = cacheDirectory.mkdir() - if (!created) throw Exception("Directory ${cacheDirectory.absolutePath} could not be created! Create this directory manually.") - } + cacheDirectory.existsOrMkdirs() val trainXpath = loadFile(cacheDirectory, FASHION_TRAIN_IMAGES_ARCHIVE).absolutePath val trainYpath = loadFile(cacheDirectory, FASHION_TRAIN_LABELS_ARCHIVE).absolutePath @@ -106,6 +111,133 @@ public fun fashionMnist(cacheDirectory: File = File("cache")): Pair { + cacheDirectory.existsOrMkdirs() + + return HdfFile(loadFile(cacheDirectory, MNIST_3D_DATASET)).use { + + val (trainData, trainLabels) = it.extractMnist3DDataset("train") + val (testData, testLabels) = it.extractMnist3DDataset("test") + + Pair( + OnHeapDataset.create(trainData, trainLabels), + OnHeapDataset.create(testData, testLabels) + ) + } +} + +/** Extract mnist3d X data from HD5 file [dataset] */ +private fun extractMnist3DData(dataset: Dataset) = + dataset.data.castArray().cast2D() + .map { it.map(Double::toFloat).toFloatArray() }.toTypedArray() + +/** Extract mnist3d Y labels from HD5 file [dataset] */ +private fun extractMnist3DLabels(dataset: Dataset) = + (dataset.data as LongArray).map(Long::toFloat).toFloatArray() + +/** Extract mnist3d data and labels from HD5 file under specified [label] */ +private fun HdfFile.extractMnist3DDataset(label: String): Pair, FloatArray> = + Pair( + extractMnist3DData(getDatasetByPath("X_$label")), + extractMnist3DLabels(getDatasetByPath("y_$label")) + ) + +public const val FSDD_SOUND_DATA_SIZE: Long = 20480 + +/** + * Loads the [Free Spoken Digits Dataset](https://github.com/Jakobovski/free-spoken-digit-dataset). + * This is a dataset of wav sound files of the 10 digits spoken by different people many times each. + * The test set officially consists of the first 10% of the recordings. Recordings numbered 0-4 (inclusive) + * are in the test and 5-49 are in the training set. + * As the input data files have different number of channels of data we split every input file into separate samples + * that are threaten as separate samples with the same label. + * + * Free Spoken Digits Dataset dataset is made available under the terms of the + * [Creative Commons Attribution-ShareAlike 4.0 International.](https://creativecommons.org/licenses/by-sa/4.0/) + * + * @param [cacheDirectory] Cache directory to cached models and datasets. + * @param [maxTestIndex] Index of max sample to be selected to test part of data. + * + * @return Train and test datasets. Each dataset includes X and Y data. X data are float arrays of sound data with shapes + * (num_samples, FSDD_SOUND_DATA_SIZE) where FSDD_SOUND_DATA_SIZE is at least as long as the longest input sequence and all + * sequences are padded with zeros to have equal length. Y data float arrays of digit labels (integers in range 0-9) + * with shapes (num_samples,). + */ +public fun freeSpokenDigits( + cacheDirectory: File = File("cache"), + maxTestIndex: Int = 5 +): Pair { + cacheDirectory.existsOrMkdirs() + + val path = freeSpokenDigitDatasetPath(cacheDirectory) + val dataset = File("$path/free-spoken-digit-dataset-master/recordings") + .listFiles()?.flatMap(::extractWavFileSamples) + ?: throw IllegalStateException("Cannot find Free Spoken Digits Dataset files in $path") + val maxDataSize = dataset.map { it.first.size }.maxOrNull() + ?: throw IllegalStateException("Empty Free Spoken Digits Dataset") + check(maxDataSize <= FSDD_SOUND_DATA_SIZE) { + "Sound data should be limited to $FSDD_SOUND_DATA_SIZE values but has $maxDataSize" + } + val data = dataset.map(::extractPaddedDataWithIndex) + val labels = dataset.map(::extractLabelWithIndex) + + val (trainData, testData) = data.splitToTrainAndTestByIndex(maxTestIndex) + val (trainLabels, testLabels) = labels.splitToTrainAndTestByIndex(maxTestIndex) + + return Pair( + OnHeapDataset.create(trainData, trainLabels.toFloatArray()), + OnHeapDataset.create(testData, testLabels.toFloatArray()) + ) +} + +/** + * Extract wav file samples from given file and return a list of data from all its + * channels as a triple of (channel_data, label, sample_index) + * + * @param file to read from the sound data + * @return list of triples (channel_data, label, sample_index) from all channels from file + */ +private fun extractWavFileSamples(file: File): List> = + WavFile(file).use { + val data = it.readRemainingFrames() + val parts = file.name.split("_") + val label = parts[0].toFloat() + val index = parts[2].split(".")[0].toInt() + data.map { channel -> Triple(channel, label, index) } + } + +private fun extractPaddedDataWithIndex(dataLabelIndex: Triple): Pair = + Pair(dataLabelIndex.first.copyInto(FloatArray(FSDD_SOUND_DATA_SIZE.toInt())), dataLabelIndex.third) + +private fun extractLabelWithIndex(dataLabelIndex: Triple): Pair = + Pair(dataLabelIndex.second, dataLabelIndex.third) + +private inline fun List>.splitToTrainAndTestByIndex(maxTestIndex: Int): Pair, Array> { + val test = filter { it.second < maxTestIndex }.map { it.first }.toTypedArray() + val train = filter { it.second >= maxTestIndex }.map { it.first }.toTypedArray() + return Pair(train, test) +} /** Path to train images archive of Mnist Dataset. */ private const val CIFAR_10_IMAGES_ARCHIVE: String = "datasets/cifar10/images.zip" @@ -115,10 +247,7 @@ private const val CIFAR_10_LABELS_ARCHIVE: String = "datasets/cifar10/trainLabel /** Returns paths to images and its labels for the Cifar'10 dataset. */ public fun cifar10Paths(cacheDirectory: File = File("cache")): Pair { - if (!cacheDirectory.exists()) { - val created = cacheDirectory.mkdir() - if (!created) throw Exception("Directory ${cacheDirectory.absolutePath} could not be created! Create this directory manually.") - } + cacheDirectory.existsOrMkdirs() val pathToLabel = loadFile(cacheDirectory, CIFAR_10_LABELS_ARCHIVE).absolutePath @@ -130,7 +259,7 @@ public fun cifar10Paths(cacheDirectory: File = File("cache")): Pair String = { "$AWS_S3_URL/$it" }, loadingMode: LoadingMode = LoadingMode.SKIP_LOADING_IF_EXISTS ): File { val fileName = cacheDirectory.absolutePath + "/" + relativePathToFile - val urlString = "$AWS_S3_URL/$relativePathToFile" val file = File(fileName) - file.parentFile.mkdirs() // Will create parent directories if not exists if (!file.exists() || loadingMode == LoadingMode.OVERRIDE_IF_EXISTS) { + val urlString = downloadURLFromRelativePath(relativePathToFile) val inputStream = URL(urlString).openStream() Files.copy(inputStream, Paths.get(fileName), StandardCopyOption.REPLACE_EXISTING) } - return File(fileName) + return file } /** Creates file structure archived in zip file with all directories and sub-directories */ @Throws(IOException::class) -internal fun extractImagesFromZipArchiveToFolder(zipArchivePath: Path, toFolder: Path) { - val bufferSize = 4096 +internal fun extractFromZipArchiveToFolder(zipArchivePath: Path, toFolder: Path, bufferSize: Int = 4096) { val zipFile = ZipFile(zipArchivePath.toFile()) val entries = zipFile.entries() @@ -252,3 +402,11 @@ internal fun extractImagesFromZipArchiveToFolder(zipArchivePath: Path, toFolder: zipFile.close() } +internal fun File.existsOrMkdirs() { + if (!exists()) { + val created = mkdirs() + if (!created) { + throw Exception("Directory $absolutePath could not be created! Create this directory manually.") + } + } +} diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/dataset/handler/MnistUtil.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/dataset/handler/MnistUtil.kt index 47e271c82..7053efcc3 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/dataset/handler/MnistUtil.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/dataset/handler/MnistUtil.kt @@ -29,7 +29,7 @@ public const val TEST_LABELS_ARCHIVE: String = "datasets/mnist/t10k-labels-idx1- public const val NUMBER_OF_CLASSES: Int = 10 /** - * Extracts Fashion Mnist images from [archivePath]. + * Extracts (Fashion) Mnist images from [archivePath]. */ public fun extractImages(archivePath: String): Array { val archiveStream = DataInputStream( @@ -65,7 +65,7 @@ public fun extractImages(archivePath: String): Array { } /** - * Extracts Fashion Mnist labels from [archivePath] with number of classes [numClasses]. + * Extracts (Fashion) Mnist labels from [archivePath] with number of classes [numClasses]. */ public fun extractLabels(archivePath: String, numClasses: Int): FloatArray { val archiveStream = DataInputStream( diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/dataset/sound/wav/WavFile.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/dataset/sound/wav/WavFile.kt new file mode 100644 index 000000000..f4b04fa47 --- /dev/null +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/dataset/sound/wav/WavFile.kt @@ -0,0 +1,225 @@ +/* + * Copyright 2021 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved. + * Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file. + */ + +package org.jetbrains.kotlinx.dl.dataset.sound.wav + +import java.io.File +import java.io.FileInputStream +import java.io.IOException +import java.io.InputStream + +private const val HEADER_LENGTH = 12 + +private const val CHUNK_HEADER_LENGTH = 8 + +private const val FMT_CHUNK_ID: Long = 0x20746D66 + +private const val DATA_CHUNK_ID: Long = 0x61746164 + +private const val RIFF_CHUNK_ID: Long = 0x46464952 + +private const val RIFF_TYPE_ID: Long = 0x45564157 + +/** + * Class for reading WAV audio files. The file opened as WAV file can be read + * only once and the following reading procedures will result in reading empty buffer. + * + * Based on code written by [Andrew Greensted](http://www.labbookpages.co.uk/) + * but modified to more Kotlin idiomatic way with only read option for simplicity. + */ +public class WavFile( + file: File, + private val bufferSize: Int = 4096 +): AutoCloseable { + + private enum class IOState { + READING, + CLOSED + } + + /** Remaining frames that can be read to some external buffer from WAV file */ + public val remainingFrames: Long get() = frames - frameCounter + + /** File format specification with values from [WavFileFormat] */ + public val format: WavFileFormat + + /** Number of frames present in full WAV file */ + public val frames: Long + + private var ioState = IOState.READING + + private val inputStream = FileInputStream(file) + + private val buffer = ByteArray(bufferSize) + + private var bufferPointer = 0 + + private var bytesRead = 0 + + private var frameCounter = 0L + + init { + readWavHeader(file, inputStream, buffer) + + var chunkSize: Long + var fileFormatChunk: WavFileFormat? = null + var numFrames: Long? = null + + while (true) { + bytesRead = inputStream.read(buffer, 0, CHUNK_HEADER_LENGTH) + if (bytesRead != CHUNK_HEADER_LENGTH) { + throw WavFileException("Could not read chunk header") + } + + val chunkID = readLittleEndian(buffer, 0, 4) + chunkSize = readLittleEndian(buffer, 4, 4) + + var numChunkBytes = if (chunkSize % 2 == 1L) chunkSize + 1 else chunkSize + if (chunkID == FMT_CHUNK_ID) { + bytesRead = inputStream.read(buffer, 0, 16) + + val compressionCode = readLittleEndian(buffer, 0, 2).toInt() + if (compressionCode != 1) { + throw WavFileException("Compression Code $compressionCode not supported") + } + + fileFormatChunk = WavFileFormat(buffer) + + numChunkBytes -= 16 + if (numChunkBytes > 0) { + inputStream.skip(numChunkBytes) + } + } else if (chunkID == DATA_CHUNK_ID) { + val format = fileFormatChunk ?: throw WavFileException("Data chunk found before Format chunk") + if (chunkSize % format.blockAlign != 0L) { + throw WavFileException("Data chunk size is not multiple of Block Align") + } + numFrames = chunkSize / format.blockAlign + break + } else { + inputStream.skip(numChunkBytes) + } + } + + this.format = fileFormatChunk ?: throw WavFileException("Did not find a Format chunk") + this.frames = numFrames ?: throw WavFileException("Did not find a Data chunk") + } + + public override fun close() { + ioState = IOState.CLOSED + inputStream.close() + } + + /** + * Read all remaining frames from WAV file and return them as an array of + * results for each of the channels of input file. + * + * @return Array with sound data for each channel + */ + public fun readRemainingFrames(): Array { + val count = remainingFrames + if (count > Int.MAX_VALUE) { + throw WavFileException("Cannot read more at once than array of size ${Int.MAX_VALUE}") + } + val buffer = Array(format.numChannels) { FloatArray(count.toInt()) } + val readCount = readFrames(buffer, count.toInt()) + check(readCount == count.toInt()) { + "Internal error: Should read all remaining data from wav file." + } + return buffer + } + + /** + * Read some number of frames from a specific offset in the buffer into a multi-dimensional + * float array. + * + * @param returnBuffer the buffer to read samples into + * @param count the number of frames to read + * @param offset the buffer offset to read from + * @return the number of frames read + */ + public fun readFrames(returnBuffer: Array, count: Int, offset: Int = 0): Int { + var myOffset = offset + if (ioState != IOState.READING) { + throw IOException("Cannot read from closed WavFile instance") + } + for (f in 0 until count) { + if (frameCounter == frames) { + return f + } + for (c in 0 until format.numChannels) { + returnBuffer[c][myOffset] = format.floatOffset + readSingleSample().toFloat() / format.floatScale + } + myOffset++ + frameCounter++ + } + return count + } + + /** + * Read a single sample from the buffer. + * + * @return the sample read + * @throws IOException Signals that an I/O exception has occurred + * @throws WavFileException a WavFile-specific exception + */ + private fun readSingleSample(): Long { + var resultSample = 0L + for (b in 0 until format.bytesPerSample) { + if (bufferPointer == bytesRead) { + val read = inputStream.read(buffer, 0, bufferSize) + if (read == -1) { + throw WavFileException("Not enough data available") + } + bytesRead = read + bufferPointer = 0 + } + var v = buffer[bufferPointer].toLong() + if (b < format.bytesPerSample - 1 || format.bytesPerSample == 1) { + v = v and 0xFF.toLong() + } + resultSample += (v shl b * 8) + bufferPointer++ + } + return resultSample + } +} + +/** + * Read little-endian data from the buffer. + * + * @param buffer to read from + * @param position the starting position to read from + * @param count the number of bytes to read + * @return a little-endian long value read from buffer + */ +internal fun readLittleEndian(buffer: ByteArray, position: Int, count: Int): Long { + var currPosition = position + count - 1 + var returnValue = (buffer[currPosition].toLong() and 0xFF) + for (b in 0 until count - 1) { + returnValue = (returnValue shl 8) + (buffer[--currPosition].toLong() and 0xFF) + } + return returnValue +} + +private fun readWavHeader(file: File, inputStream: InputStream, buffer: ByteArray) { + val bytesRead = inputStream.read(buffer, 0, HEADER_LENGTH) + if (bytesRead != HEADER_LENGTH) { + throw WavFileException("Not enough wav file bytes for header") + } + val riffChunkID = readLittleEndian(buffer, 0, 4) + val chunkSize = readLittleEndian(buffer, 4, 4) + val riffTypeID = readLittleEndian(buffer, 8, 4) + + if (riffChunkID != RIFF_CHUNK_ID) { + throw WavFileException("Invalid Wav Header data, incorrect riff chunk ID") + } + if (riffTypeID != RIFF_TYPE_ID) { + throw WavFileException("Invalid Wav Header data, incorrect riff type ID") + } + if (file.length() < chunkSize + 8) { + throw WavFileException("Header chunk size ($chunkSize) does not match file size (${file.length()})") + } +} diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/dataset/sound/wav/WavFileException.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/dataset/sound/wav/WavFileException.kt new file mode 100644 index 000000000..4aeaea26e --- /dev/null +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/dataset/sound/wav/WavFileException.kt @@ -0,0 +1,13 @@ +/* + * Copyright 2021 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved. + * Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file. + */ + +package org.jetbrains.kotlinx.dl.dataset.sound.wav + +import java.lang.Exception + +/** + * WavFile-specific exception class that represents error in reading WAV file caused by its format. + */ +public class WavFileException(message: String) : Exception(message) diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/dataset/sound/wav/WavFileFormat.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/dataset/sound/wav/WavFileFormat.kt new file mode 100644 index 000000000..e8f74f8bc --- /dev/null +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/dataset/sound/wav/WavFileFormat.kt @@ -0,0 +1,57 @@ +/* + * Copyright 2021 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved. + * Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file. + */ + +package org.jetbrains.kotlinx.dl.dataset.sound.wav + +/** + * Wav file format representing the specification of the WAV file that is saved in its metadata header. + * + * @param buffer from which the format data is read + */ +public class WavFileFormat(buffer: ByteArray) { + + public val numChannels: Int = readLittleEndian(buffer, 2, 2).toInt() + + public val sampleRate: Long = readLittleEndian(buffer, 4, 4) + + public val blockAlign: Int = readLittleEndian(buffer, 12, 2).toInt() + + private val validBits: Int = readLittleEndian(buffer, 14, 2).toInt() + + public val bytesPerSample: Int = (validBits + 7) / 8 + + public val floatScale: Float + + public val floatOffset: Float + + init { + if (bytesPerSample * numChannels != blockAlign) { + throw WavFileException("Block Align does not agree with bytes required for validBits and number of channels") + } + if (numChannels == 0) { + throw WavFileException("Number of channels specified in header is equal to zero") + } + if (blockAlign == 0) { + throw WavFileException("Block Align specified in header is equal to zero") + } + if (validBits < 2) { + throw WavFileException("Valid Bits specified in header is less than 2") + } + else if (validBits > 64) { + throw WavFileException("Valid Bits specified in header is greater than 64, this is greater than a long can hold") + } + if (validBits > 8) { + this.floatOffset = 0.0f + this.floatScale = (1 shl (validBits - 1)).toFloat() + } else { + this.floatOffset = -1.0f + this.floatScale = 0.5f * ((1 shl validBits) - 1) + } + } + + override fun toString(): String = + "WavFileFormat(numChannels=$numChannels, sampleRate=$sampleRate, blockAlign=$blockAlign, " + + "bytesPerSample=$bytesPerSample, floatScale=$floatScale, floatOffset=$floatOffset)" +} diff --git a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/AvgPool1DTest.kt b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/AvgPool1DTest.kt index ba532acdb..7dd9723ab 100644 --- a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/AvgPool1DTest.kt +++ b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/AvgPool1DTest.kt @@ -16,7 +16,7 @@ import org.tensorflow.Graph import org.tensorflow.Shape import org.tensorflow.op.Ops -const val EPS: Float = 1e-6f +private const val EPS: Float = 1e-6f internal class AvgPool1DTest { @Test diff --git a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/AvgPool3DTest.kt b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/AvgPool3DTest.kt index ab540b441..9666a9024 100644 --- a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/AvgPool3DTest.kt +++ b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/AvgPool3DTest.kt @@ -8,6 +8,7 @@ package org.jetbrains.kotlinx.dl.api.core.layer import org.jetbrains.kotlinx.dl.api.core.KGraph import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.ConvPadding import org.jetbrains.kotlinx.dl.api.core.layer.pooling.AvgPool3D +import org.jetbrains.kotlinx.dl.api.core.shape.shape import org.jetbrains.kotlinx.dl.api.core.shape.toIntArray import org.junit.jupiter.api.Assertions import org.junit.jupiter.api.Test @@ -16,7 +17,7 @@ import org.tensorflow.Graph import org.tensorflow.Shape import org.tensorflow.op.Ops -const val EPS: Float = 1e-6f +private const val EPS: Float = 1e-6f internal class AvgPool3DTest { @@ -53,13 +54,7 @@ internal class AvgPool3DTest { ), ) - private val inputShape: Shape = Shape.make( - input.size.toLong(), - input[0].size.toLong(), - input[0][0].size.toLong(), - input[0][0][0].size.toLong(), - input[0][0][0][0].size.toLong(), - ) + private val inputShape: Shape = input.shape @Test fun default() { diff --git a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/Conv1DTest.kt b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/Conv1DTest.kt index 2f1ba7bf5..e0c1bfcc6 100644 --- a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/Conv1DTest.kt +++ b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/Conv1DTest.kt @@ -30,7 +30,7 @@ internal class Conv1DTest : ConvLayerTest() { @Test fun constantInputTensorWithValidPadding() { - val input = createFloatConv1DTensor(batchSize = 1, size = 3,channels = 1, initValue = 1.0f) + val input = createFloatConv1DTensor(batchSize = 1, size = 3, channels = 1, initValue = 1.0f) val expected = createFloatConv1DTensor(batchSize = 1, size = 2, channels = 16, initValue = 2.0f) assertFloatConv1DTensorsEquals( @@ -46,4 +46,29 @@ internal class Conv1DTest : ConvLayerTest() { expected ) } + + @Test + fun randomInputTensorWithOnesWeight() { + val input = arrayOf( + arrayOf( + floatArrayOf(0.5967f, 0.6496f, 0.1336f, 0.0338f), + floatArrayOf(0.7829f, 0.2899f, 0.2759f, 0.0719f), + floatArrayOf(0.0820f, 0.2821f, 0.7951f, 0.3663f) + ) + ) + val expected = arrayOf(arrayOf(floatArrayOf(input.sum()))) + + assertFloatConv1DTensorsEquals( + Conv1D( + name = "TestConv1D_3", + filters = 1, + kernelInitializer = Constant(1.0f), + biasInitializer = Zeros(), + kernelSize = 3, + padding = ConvPadding.VALID + ), + input, + expected + ) + } } diff --git a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/Conv2DTest.kt b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/Conv2DTest.kt index 2d16a8f2e..3b3178df0 100644 --- a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/Conv2DTest.kt +++ b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/Conv2DTest.kt @@ -46,4 +46,35 @@ internal class Conv2DTest : ConvLayerTest() { expected ) } + + @Test + fun randomInputTensorWithOnesWeight() { + val input = arrayOf( + arrayOf( + arrayOf( + floatArrayOf(0.8373f, 0.8765f, 0.4692f), + floatArrayOf(0.5244f, 0.6573f, 0.9453f) + ), + + arrayOf( + floatArrayOf(0.6919f, 0.0724f, 0.7274f), + floatArrayOf(0.1452f, 0.9262f, 0.7690f) + ) + ) + ) + val expected = arrayOf(arrayOf(arrayOf(floatArrayOf(input.sum())))) + + assertFloatConv2DTensorsEquals( + Conv2D( + name = "TestConv2D_3", + filters = 1, + kernelInitializer = Constant(1.0f), + biasInitializer = Zeros(), + kernelSize = longArrayOf(2, 2), + padding = ConvPadding.VALID + ), + input, + expected + ) + } } diff --git a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/Conv3DTest.kt b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/Conv3DTest.kt new file mode 100644 index 000000000..28997d6b1 --- /dev/null +++ b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/Conv3DTest.kt @@ -0,0 +1,91 @@ +/* + * Copyright 2020 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved. + * Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file. + */ + +package org.jetbrains.kotlinx.dl.api.core.layer + +import org.jetbrains.kotlinx.dl.api.core.initializer.Constant +import org.jetbrains.kotlinx.dl.api.core.initializer.Zeros +import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.Conv3D +import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.ConvPadding +import org.junit.jupiter.api.Test + +internal class Conv3DTest : ConvLayerTest() { + + @Test + fun zeroedInputTensorWithDefaultValues() { + val input = createFloatConv3DTensor(batchSize = 1, depth = 3, height = 3, width = 3, channels = 1, initValue = 0.0f) + val expected = createFloatConv3DTensor(batchSize = 1, depth = 3, height = 3, width = 3, channels = 32, initValue = 0.0f) + + assertFloatConv3DTensorsEquals( + Conv3D( + name = "TestConv3D_1", + biasInitializer = Zeros() + ), + input, + expected + ) + } + + @Test + fun constantInputTensorWithValidPadding() { + val input = createFloatConv3DTensor(batchSize = 1, depth = 3, height = 3, width = 3, channels = 1, initValue = 1.0f) + val expected = createFloatConv3DTensor(batchSize = 1, depth = 2, height = 2, width = 2, channels = 16, initValue = 8.0f) + + assertFloatConv3DTensorsEquals( + Conv3D( + name = "TestConv3D_2", + filters = 16, + kernelInitializer = Constant(1.0f), + biasInitializer = Zeros(), + kernelSize = longArrayOf(2, 2, 2), + padding = ConvPadding.VALID + ), + input, + expected + ) + } + + @Test + fun randomInputTensorWithOnesWeight() { + val input = arrayOf( + arrayOf( + arrayOf( + arrayOf( + floatArrayOf(0.8373f, 0.8765f, 0.4692f), + floatArrayOf(0.5244f, 0.6573f, 0.9453f) + ), + arrayOf( + floatArrayOf(0.6919f, 0.0724f, 0.7274f), + floatArrayOf(0.1452f, 0.9262f, 0.7690f) + ) + ), + arrayOf( + arrayOf( + floatArrayOf(0.453f, 0.3465f, 0.4342f), + floatArrayOf(0.2344f, 0.9673f, 0.1953f) + ), + arrayOf( + floatArrayOf(0.9222f, 0.8924f, 0.7234f), + floatArrayOf(0.2345f, 0.2622f, 0.9012f) + ) + ) + ) + ) + val expected = arrayOf(arrayOf(arrayOf(arrayOf(floatArrayOf(input.sum()))))) + + assertFloatConv3DTensorsEquals( + Conv3D( + name = "TestConv3D_3", + filters = 1, + kernelInitializer = Constant(1.0f), + biasInitializer = Zeros(), + kernelSize = longArrayOf(2, 2, 2), + padding = ConvPadding.VALID + ), + input, + expected + ) + } +} diff --git a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/ConvLayerTest.kt b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/ConvLayerTest.kt index 338e4c38f..10c34dbdd 100644 --- a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/ConvLayerTest.kt +++ b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/ConvLayerTest.kt @@ -13,11 +13,14 @@ import org.junit.jupiter.api.Assertions.assertEquals import org.tensorflow.* import org.tensorflow.op.Ops import org.tensorflow.op.core.Constant +import java.lang.IllegalArgumentException internal typealias FloatConv1DTensor = Array> internal typealias FloatConv2DTensor = Array>> +internal typealias FloatConv3DTensor = Array>>> + internal typealias AnyDTensor = Array<*> open class ConvLayerTest { @@ -29,7 +32,7 @@ open class ConvLayerTest { ) { val actual = expected.copyZeroed() assertTensorsEquals(layer, input, expected, actual, - ::assertFloatConv1DTensorsEquals) { tf, tensor -> tf.constant(tensor.cast3DArray()) } + ::assertFloatConv1DTensorsEquals) { tf, tensor -> tf.constant(tensor.cast3D()) } } protected fun assertFloatConv2DTensorsEquals( @@ -39,7 +42,17 @@ open class ConvLayerTest { ) { val actual = expected.copyZeroed() assertTensorsEquals(layer, input, expected, actual, - ::assertFloatConv2DTensorsEquals) { tf, tensor -> tf.constant(tensor.cast4DArray()) } + ::assertFloatConv2DTensorsEquals) { tf, tensor -> tf.constant(tensor.cast4D()) } + } + + protected fun assertFloatConv3DTensorsEquals( + layer: Layer, + input: FloatConv3DTensor, + expected: FloatConv3DTensor + ) { + val actual = expected.copyZeroed() + assertTensorsEquals(layer, input, expected, actual, + ::assertFloatConv3DTensorsEquals) { tf, tensor -> tf.constant(tensor.cast5D()) } } protected fun createFloatConv1DTensor( @@ -48,7 +61,7 @@ open class ConvLayerTest { channels: Long, initValue: Float ): FloatConv1DTensor = - getFloatArrayOfShape(Shape.make(batchSize, size, channels), initValue).cast3DArray() + getFloatArrayOfShape(Shape.make(batchSize, size, channels), initValue).cast3D() protected fun createFloatConv2DTensor( batchSize: Long, @@ -57,13 +70,26 @@ open class ConvLayerTest { channels: Long, initValue: Float ): FloatConv2DTensor = - getFloatArrayOfShape(Shape.make(batchSize, height, width, channels), initValue).cast4DArray() + getFloatArrayOfShape(Shape.make(batchSize, height, width, channels), initValue).cast4D() + + protected fun createFloatConv3DTensor( + batchSize: Long, + depth: Long, + height: Long, + width: Long, + channels: Long, + initValue: Float + ): FloatConv3DTensor = + getFloatArrayOfShape(Shape.make(batchSize, depth, height, width, channels), initValue).cast5D() private fun FloatConv1DTensor.copyZeroed(): FloatConv1DTensor = - getFloatArrayOfShape(getShapeOfArray(this)).cast3DArray() + getFloatArrayOfShape(this.shape).cast3D() private fun FloatConv2DTensor.copyZeroed(): FloatConv2DTensor = - getFloatArrayOfShape(getShapeOfArray(this)).cast4DArray() + getFloatArrayOfShape(this.shape).cast4D() + + private fun FloatConv3DTensor.copyZeroed(): FloatConv3DTensor = + getFloatArrayOfShape(this.shape).cast5D() private fun assertTensorsEquals( layer: Layer, @@ -82,14 +108,14 @@ open class ConvLayerTest { val isTraining = tf.constant(true) val numberOfLosses = tf.constant(1.0f) - layer.build(tf, kGraph, getShapeOfArray(input)) + layer.build(tf, kGraph, input.shape) val output = layer.forward(tf, inputOp, isTraining, numberOfLosses).asOutput() kGraph.initializeGraphVariables(session) val outputTensor = session.runner().fetch(output).run().first() val outputTensorShape = shapeFromDims(*outputTensor.shape()) outputTensor.copyTo(actual) - assertEquals(getShapeOfArray(expected), outputTensorShape) + assertEquals(expected.shape, outputTensorShape) assertEquals(expected, actual) } } @@ -100,8 +126,8 @@ open class ConvLayerTest { expected: AnyDTensor, actual: AnyDTensor ) { - val expectedTensor = expected.cast3DArray() - val actualTensor = actual.cast3DArray() + val expectedTensor = expected.cast3D() + val actualTensor = actual.cast3D() val msg = "Expected ${expectedTensor.contentDeepToString()} " + "to equal ${actualTensor.contentDeepToString()}" for (i in expectedTensor.indices) { @@ -115,8 +141,8 @@ open class ConvLayerTest { expected: AnyDTensor, actual: AnyDTensor ) { - val expectedTensor = expected.cast4DArray() - val actualTensor = actual.cast4DArray() + val expectedTensor = expected.cast4D() + val actualTensor = actual.cast4D() val msg = "Expected ${expectedTensor.contentDeepToString()} " + "to equal ${actualTensor.contentDeepToString()}" for (i in expectedTensor.indices) { @@ -127,4 +153,29 @@ open class ConvLayerTest { } } } + + private fun assertFloatConv3DTensorsEquals( + expected: AnyDTensor, + actual: AnyDTensor + ) { + val expectedTensor = expected.cast5D() + val actualTensor = actual.cast5D() + val msg = "Expected ${expectedTensor.contentDeepToString()} " + + "to equal ${actualTensor.contentDeepToString()}" + for (i in expectedTensor.indices) { + for (j in expectedTensor[i].indices) { + for (k in expectedTensor[i][j].indices) { + for (l in expectedTensor[i][j][k].indices) { + assertArrayEquals(expectedTensor[i][j][k][l], actualTensor[i][j][k][l], EPS, msg) + } + } + } + } + } + + protected fun Array<*>.sum(): Float = fold(0.0f) { acc, arr -> when(arr) { + is FloatArray -> arr.sum() + acc + is Array<*> -> arr.sum() + acc + else -> throw IllegalArgumentException("Cannot sum array other than Array of FloatArray") + } } } diff --git a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/GlobalAvgPooling1DTest.kt b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/GlobalAvgPool1DTest.kt similarity index 87% rename from api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/GlobalAvgPooling1DTest.kt rename to api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/GlobalAvgPool1DTest.kt index aa55743a7..259e9aefa 100644 --- a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/GlobalAvgPooling1DTest.kt +++ b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/GlobalAvgPool1DTest.kt @@ -3,7 +3,7 @@ package org.jetbrains.kotlinx.dl.api.core.layer import org.jetbrains.kotlinx.dl.api.core.layer.pooling.GlobalAvgPool1D import org.junit.jupiter.api.Test -internal class GlobalAvgPooling1DTest : PoolLayerTest() { +internal class GlobalAvgPool1DTest : PoolLayerTest() { @Test fun globalAvgPool1DTest(){ val input = Array(2, { Array(3, { FloatArray(4) { 0f } } ) } ) diff --git a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/GlobalAvgPooling2DTest.kt b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/GlobalAvgPool2DTest.kt similarity index 87% rename from api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/GlobalAvgPooling2DTest.kt rename to api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/GlobalAvgPool2DTest.kt index 11a5e4ef2..4a837a274 100644 --- a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/GlobalAvgPooling2DTest.kt +++ b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/GlobalAvgPool2DTest.kt @@ -3,7 +3,7 @@ package org.jetbrains.kotlinx.dl.api.core.layer import org.jetbrains.kotlinx.dl.api.core.layer.pooling.GlobalAvgPool2D import org.junit.jupiter.api.Test -internal class GlobalAvgPooling2DTest : PoolLayerTest() { +internal class GlobalAvgPool2DTest : PoolLayerTest() { @Test fun globalAvgPool2DTest() { val input = Array(2, { Array(4, { Array(5, { FloatArray(3) { 0f } }) }) }) diff --git a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/GlobalAvgPooling3DTest.kt b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/GlobalAvgPool3DTest.kt similarity index 88% rename from api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/GlobalAvgPooling3DTest.kt rename to api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/GlobalAvgPool3DTest.kt index dd0c61c08..53d849171 100644 --- a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/GlobalAvgPooling3DTest.kt +++ b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/GlobalAvgPool3DTest.kt @@ -3,7 +3,7 @@ package org.jetbrains.kotlinx.dl.api.core.layer import org.jetbrains.kotlinx.dl.api.core.layer.pooling.GlobalAvgPool3D import org.junit.jupiter.api.Test -internal class GlobalAvgPooling3DTest : PoolLayerTest() { +internal class GlobalAvgPool3DTest : PoolLayerTest() { @Test fun globalAvgPool3DTest() { val input = Array(2, { Array(3, { Array(4, { Array(5, { FloatArray(6) { 0f } }) }) }) }) diff --git a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/PoolLayerTest.kt b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/PoolLayerTest.kt index 6f4306ea5..2871e30c4 100644 --- a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/PoolLayerTest.kt +++ b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/PoolLayerTest.kt @@ -18,9 +18,9 @@ open class PoolLayerTest { expected: Array, ) { val actual = Array(expected.size) { FloatArray(expected[0].size) { 0f } } - assertPoolingLayer(layer, input, expected, actual, ::assertGlobalAvgPoolEquals) { tf, tensor -> + assertPoolLayer(layer, input, expected, actual, ::assertGlobalAvgPoolEquals) { tf, tensor -> tf.constant( - tensor.cast3DArray() + tensor.cast3D() ) } } @@ -31,9 +31,9 @@ open class PoolLayerTest { expected: Array, ) { val actual = Array(expected.size) { FloatArray(expected[0].size) { 0f } } - assertPoolingLayer(layer, input, expected, actual, ::assertGlobalAvgPoolEquals) { tf, tensor -> + assertPoolLayer(layer, input, expected, actual, ::assertGlobalAvgPoolEquals) { tf, tensor -> tf.constant( - tensor.cast4DArray() + tensor.cast4D() ) } } @@ -44,14 +44,14 @@ open class PoolLayerTest { expected: Array, ) { val actual = Array(expected.size) { FloatArray(expected[0].size) { 0f } } - assertPoolingLayer(layer, input, expected, actual, ::assertGlobalAvgPoolEquals) { tf, tensor -> + assertPoolLayer(layer, input, expected, actual, ::assertGlobalAvgPoolEquals) { tf, tensor -> tf.constant( - tensor.cast5DArray() + tensor.cast5D() ) } } - private fun assertPoolingLayer( + private fun assertPoolLayer( layer: Layer, input: Array<*>, expected: Array<*>, @@ -69,11 +69,9 @@ open class PoolLayerTest { val numberOfLosses = tf.constant(1.0f) val output = layer.forward(tf, inputOp, isTraining, numberOfLosses).asOutput().tensor() - val expectedShape = getShapeOfArray(expected) - val actualShape = shapeFromDims(*output.shape()) output.copyTo(actual) - assertEquals(expectedShape, actualShape) + assertEquals(expected.shape, actualShape) assertEqual(expected, actual) } } @@ -82,8 +80,8 @@ open class PoolLayerTest { expected: Array<*>, actual: Array<*> ) { - val expectedTensor = expected.cast2DArray() - val actualTensor = actual.cast2DArray() + val expectedTensor = expected.cast2D() + val actualTensor = actual.cast2D() val msg = "Expected ${expectedTensor.contentDeepToString()} " + "to equal ${actualTensor.contentDeepToString()}" for (i in expectedTensor.indices) { diff --git a/examples/src/main/kotlin/examples/cnn/fsdd/SoundNet.kt b/examples/src/main/kotlin/examples/cnn/fsdd/SoundNet.kt new file mode 100644 index 000000000..45a26c734 --- /dev/null +++ b/examples/src/main/kotlin/examples/cnn/fsdd/SoundNet.kt @@ -0,0 +1,145 @@ +/* + * Copyright 2021 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved. + * Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file. + */ + +package examples.cnn.fsdd + +import org.jetbrains.kotlinx.dl.api.core.Sequential +import org.jetbrains.kotlinx.dl.api.core.activation.Activations +import org.jetbrains.kotlinx.dl.api.core.initializer.HeNormal +import org.jetbrains.kotlinx.dl.api.core.layer.Layer +import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.Conv1D +import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.ConvPadding +import org.jetbrains.kotlinx.dl.api.core.layer.core.Dense +import org.jetbrains.kotlinx.dl.api.core.layer.core.Input +import org.jetbrains.kotlinx.dl.api.core.layer.pooling.MaxPool1D +import org.jetbrains.kotlinx.dl.api.core.layer.reshaping.Flatten +import org.jetbrains.kotlinx.dl.api.core.loss.Losses +import org.jetbrains.kotlinx.dl.api.core.metric.Metrics +import org.jetbrains.kotlinx.dl.api.core.optimizer.Adam +import org.jetbrains.kotlinx.dl.dataset.FSDD_SOUND_DATA_SIZE +import org.jetbrains.kotlinx.dl.dataset.freeSpokenDigits +import org.jetbrains.kotlinx.dl.dataset.handler.NUMBER_OF_CLASSES + +private const val EPOCHS = 10 +private const val TRAINING_BATCH_SIZE = 500 +private const val TEST_BATCH_SIZE = 500 +private const val NUM_CHANNELS = 1L +private const val SEED = 12L + +/** + * Create a single building block for the SoundNet to simplify its structure. + * Single block consists of two identical [Conv1D] layers followed by [MaxPool1D]. + * + * @param filters number of filters in conv layers + * @param kernelSize in conv layers + * @param poolStride stride for poolSize and stride in maxpooling layer + * @return array of layers to be registered in [Sequential] as vararg + */ +private fun soundBlock(filters: Long, kernelSize: Long, poolStride: Long): Array = + arrayOf( + Conv1D( + filters = filters, + kernelSize = kernelSize, + strides = longArrayOf(1, 1, 1), + activation = Activations.Relu, + kernelInitializer = HeNormal(SEED), + biasInitializer = HeNormal(SEED), + padding = ConvPadding.SAME + ), + Conv1D( + filters = filters, + kernelSize = kernelSize, + strides = longArrayOf(1, 1, 1), + activation = Activations.Relu, + kernelInitializer = HeNormal(SEED), + biasInitializer = HeNormal(SEED), + padding = ConvPadding.SAME + ), + MaxPool1D( + poolSize = longArrayOf(1, poolStride, 1), + strides = longArrayOf(1, poolStride, 1), + padding = ConvPadding.SAME + ) + ) + +/** + * This is an CNN that uses only 1D parts for convolutions and max pooling of the input sound data. + * This network should achieve ~55% of accuracy on test data from FSDD after 10 epochs and ~85% after + * 100 epochs. + */ +private val soundNet = Sequential.of( + Input( + FSDD_SOUND_DATA_SIZE, + NUM_CHANNELS + ), + *soundBlock( + filters = 4, + kernelSize = 8, + poolStride = 2 + ), + *soundBlock( + filters = 4, + kernelSize = 16, + poolStride = 4 + ), + *soundBlock( + filters = 8, + kernelSize = 16, + poolStride = 4 + ), + *soundBlock( + filters = 8, + kernelSize = 16, + poolStride = 4 + ), + Flatten(), + Dense( + outputSize = 1024, + activation = Activations.Relu, + kernelInitializer = HeNormal(SEED), + biasInitializer = HeNormal(SEED) + ), + Dense( + outputSize = NUMBER_OF_CLASSES, + activation = Activations.Linear, + kernelInitializer = HeNormal(SEED), + biasInitializer = HeNormal(SEED) + ) +) + +/** + * This example shows how to do audio classification from scratch using only Conv1D layers (without Conv2D) + * and dense layers on the example of some toy network. + * We demonstrate the workflow on the Free Spoken Digits Dataset. + * + * It includes: + * - dataset loading from S3 + * - model compilation + * - model training + * - model evaluation + */ +fun soundNet() { + val (train, test) = freeSpokenDigits() + + soundNet.use { + it.compile( + optimizer = Adam(), + loss = Losses.SOFT_MAX_CROSS_ENTROPY_WITH_LOGITS, + metric = Metrics.ACCURACY + ) + + it.init() + + var accuracy = it.evaluate(dataset = test, batchSize = TEST_BATCH_SIZE).metrics[Metrics.ACCURACY] + println("Accuracy before: $accuracy") + + it.fit(dataset = train, epochs = EPOCHS, batchSize = TRAINING_BATCH_SIZE) + + accuracy = it.evaluate(dataset = test, batchSize = TEST_BATCH_SIZE).metrics[Metrics.ACCURACY] + println("Accuracy after: $accuracy") + } +} + +fun main(): Unit = soundNet() diff --git a/examples/src/test/kotlin/examples/cnn/fsdd/FreeSpokenDigitsTestSuite.kt b/examples/src/test/kotlin/examples/cnn/fsdd/FreeSpokenDigitsTestSuite.kt new file mode 100644 index 000000000..313c73039 --- /dev/null +++ b/examples/src/test/kotlin/examples/cnn/fsdd/FreeSpokenDigitsTestSuite.kt @@ -0,0 +1,16 @@ +/* + * Copyright 2021 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved. + * Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file. + */ + +package examples.cnn.fsdd + +import org.junit.jupiter.api.Test + +class FreeSpokenDigitsTestSuite { + + @Test + fun soundNetTest() { + soundNet() + } +}