From a7e343d14609297aed601a8b5dfbb982d3f1e948 Mon Sep 17 00:00:00 2001 From: Masoud Kazemi Date: Thu, 3 Jun 2021 18:46:54 +0430 Subject: [PATCH 1/4] Add AvgPool3D layer --- .../dl/api/core/layer/pooling/AvgPool3D.kt | 113 +++++++ .../dl/api/inference/keras/KerasConstants.kt | 1 + .../dl/api/inference/keras/ModelLoader.kt | 17 +- .../dl/api/inference/keras/ModelSaver.kt | 18 +- .../dl/api/core/layer/AvgPool3DTest.kt | 311 ++++++++++++++++++ 5 files changed, 452 insertions(+), 8 deletions(-) create mode 100644 api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool3D.kt create mode 100644 api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/AvgPool3DTest.kt diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool3D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool3D.kt new file mode 100644 index 000000000..da0ac6862 --- /dev/null +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool3D.kt @@ -0,0 +1,113 @@ +/* +* Copyright 2020 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved. +* Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file. +*/ + +package org.jetbrains.kotlinx.dl.api.core.layer.pooling + +import org.jetbrains.kotlinx.dl.api.core.KGraph +import org.jetbrains.kotlinx.dl.api.core.layer.Layer +import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.ConvPadding +import org.jetbrains.kotlinx.dl.api.core.shape.convOutputLength +import org.jetbrains.kotlinx.dl.api.inference.keras.CHANNELS_FIRST +import org.jetbrains.kotlinx.dl.api.inference.keras.CHANNELS_LAST +import org.tensorflow.Operand +import org.tensorflow.Shape +import org.tensorflow.op.Ops +import org.tensorflow.op.nn.AvgPool3d + +/** + * Average pooling operation for 3D data (e.g. video, spatio-temporal). + * + * Downsamples the input by taking the average over a window of size [poolSize]. + * + * @property [poolSize] Size of the pooling window. + * @property [strides] The amount of shift for pooling window in each pooling step. If + * `null`, it will default to [poolSize]. + * @property [padding] Padding strategy; can be either of [ConvPadding.VALID] which means no + * padding, or [ConvPadding.SAME] which means padding the input equally such that the output + * has the same dimension as the input. + * @property [dataFormat] Data format of input; can be either of [CHANNELS_LAST] or [CHANNELS_FIRST]. + */ +public class AvgPool3D( + public val poolSize: IntArray = intArrayOf(2, 2, 2), + public val strides: IntArray? = null, + public val padding: ConvPadding = ConvPadding.VALID, + public val dataFormat: String = CHANNELS_LAST, + name: String = "" +) : Layer(name) { + + override val hasActivation: Boolean + get() = false + override val paramCount: Int + get() = 0 + override val weights: Map> + get() = emptyMap() + + init { + require(dataFormat == CHANNELS_LAST || dataFormat == CHANNELS_FIRST) { + "The dataFormat should be either \"$CHANNELS_LAST\" or \"$CHANNELS_FIRST\"." + } + + require(padding == ConvPadding.VALID || padding == ConvPadding.SAME) { + "The padding should be either ${ConvPadding.VALID} or ${ConvPadding.SAME}." + } + + require(poolSize.size == 3) { + "The length of poolSize array should be 3." + } + + require(strides == null || strides.size == 3) { + "The strides should be either `null` or an array of length 3." + } + } + + override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape) {} + + override fun computeOutputShape(inputShape: Shape): Shape { + val axis1 = if (dataFormat == CHANNELS_LAST) 1 else 2 + var dim1 = inputShape.size(axis1) + var dim2 = inputShape.size(axis1 + 1) + var dim3 = inputShape.size(axis1 + 2) + val strides1 = strides?.get(0) ?: poolSize[0] + val strides2 = strides?.get(1) ?: poolSize[1] + val strides3 = strides?.get(2) ?: poolSize[2] + dim1 = convOutputLength(dim1, poolSize[0], padding, strides1) + dim2 = convOutputLength(dim2, poolSize[1], padding, strides2) + dim3 = convOutputLength(dim3, poolSize[3], padding, strides3) + + return if (dataFormat == CHANNELS_LAST) { + Shape.make(inputShape.size(0), dim1, dim2, dim3, inputShape.size(4)) + } else { + Shape.make(inputShape.size(0), inputShape.size(1), dim1, dim2, dim3) + } + } + + override fun forward( + tf: Ops, + input: Operand, + isTraining: Operand, + numberOfLosses: Operand? + ): Operand { + val tfPoolSize = longArrayOf(1, poolSize[0].toLong(), poolSize[1].toLong(), poolSize[2].toLong(), 1) + val tfStrides = longArrayOf( + 1, + (strides?.get(0) ?: poolSize[0]).toLong(), + (strides?.get(1) ?: poolSize[1]).toLong(), + (strides?.get(2) ?: poolSize[2]).toLong(), + 1 + ) + val tfPadding = padding.paddingName + val tfDataFormat = if (dataFormat == CHANNELS_LAST) "NDHWC" else "NCDHW" + return tf.nn.avgPool3d( + input, + tfPoolSize.toList(), + tfStrides.toList(), + tfPadding, + AvgPool3d.dataFormat(tfDataFormat) + ) + } + + override fun toString(): String = + "AvgPool3D(poolSize=$poolSize, strides=$strides, padding=$padding, dataFormat=$dataFormat)" +} diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/KerasConstants.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/KerasConstants.kt index 013946644..a2ca560d2 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/KerasConstants.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/KerasConstants.kt @@ -15,6 +15,7 @@ internal const val LAYER_INPUT: String = "InputLayer" internal const val LAYER_MAX_POOLING_2D: String = "MaxPooling2D" internal const val LAYER_AVG_POOLING_2D: String = "AvgPooling2D" internal const val LAYER_AVERAGE_POOLING_2D: String = "AveragePooling2D" +internal const val LAYER_AVG_POOL_3D: String = "AveragePooling3D" internal const val LAYER_RESCALING: String = "Rescaling" internal const val LAYER_NORMALIZATION: String = "Normalization" internal const val LAYER_FLATTEN: String = "Flatten" diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelLoader.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelLoader.kt index f55418cbe..4a268fd0a 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelLoader.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelLoader.kt @@ -26,10 +26,7 @@ import org.jetbrains.kotlinx.dl.api.core.layer.core.Dense import org.jetbrains.kotlinx.dl.api.core.layer.core.Input import org.jetbrains.kotlinx.dl.api.core.layer.merge.* import org.jetbrains.kotlinx.dl.api.core.layer.normalization.BatchNorm -import org.jetbrains.kotlinx.dl.api.core.layer.pooling.AvgPool2D -import org.jetbrains.kotlinx.dl.api.core.layer.pooling.GlobalAvgPool1D -import org.jetbrains.kotlinx.dl.api.core.layer.pooling.GlobalAvgPool2D -import org.jetbrains.kotlinx.dl.api.core.layer.pooling.MaxPool2D +import org.jetbrains.kotlinx.dl.api.core.layer.pooling.* import org.jetbrains.kotlinx.dl.api.core.layer.regularization.Dropout import org.jetbrains.kotlinx.dl.api.core.layer.reshaping.Cropping2D import org.jetbrains.kotlinx.dl.api.core.layer.reshaping.Flatten @@ -150,6 +147,7 @@ private fun convertToSequentialLayer( kerasLayer.config!!, kerasLayer.config.name!! ) + LAYER_AVG_POOL_3D -> createAvgPool3D(kerasLayer.config!!, kerasLayer.config.name!!) LAYER_DENSE -> createDense(kerasLayer.config!!, kerasLayer.config.name!!) LAYER_ZERO_PADDING_2D -> createZeroPadding2D(kerasLayer.config!!, kerasLayer.config.name!!) LAYER_CROPPING_2D -> createCropping2D(kerasLayer.config!!, kerasLayer.config.name!!) @@ -286,6 +284,7 @@ private fun convertToLayer( kerasLayer.config!!, kerasLayer.config.name!! ) + LAYER_AVG_POOL_3D -> createAvgPool3D(kerasLayer.config!!, kerasLayer.config.name!!) LAYER_DENSE -> createDense(kerasLayer.config!!, kerasLayer.config.name!!) LAYER_ZERO_PADDING_2D -> createZeroPadding2D(kerasLayer.config!!, kerasLayer.config.name!!) LAYER_CROPPING_2D -> createCropping2D(kerasLayer.config!!, kerasLayer.config.name!!) @@ -648,6 +647,16 @@ private fun createAvgPooling2D(config: LayerConfig, name: String): AvgPool2D { return AvgPool2D(addedOnesPoolSize, addedOnesStrides, padding = convertPadding(config.padding!!), name = name) } +private fun createAvgPool3D(config: LayerConfig, name: String): Layer { + return AvgPool3D( + poolSize = config.pool_size!!.toIntArray(), + strides = config.strides?.toIntArray() ?: config.pool_size.toIntArray(), + padding = convertPadding(config.padding!!), + dataFormat = config.data_format!!, + name = name + ) +} + private fun convertPadding(padding: KerasPadding): ConvPadding { return when (padding) { is KerasPadding.Same -> ConvPadding.SAME diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt index f7f2a7204..297ae1353 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt @@ -26,10 +26,7 @@ import org.jetbrains.kotlinx.dl.api.core.layer.core.Input import org.jetbrains.kotlinx.dl.api.core.layer.merge.Add import org.jetbrains.kotlinx.dl.api.core.layer.merge.Concatenate import org.jetbrains.kotlinx.dl.api.core.layer.normalization.BatchNorm -import org.jetbrains.kotlinx.dl.api.core.layer.pooling.AvgPool2D -import org.jetbrains.kotlinx.dl.api.core.layer.pooling.GlobalAvgPool1D -import org.jetbrains.kotlinx.dl.api.core.layer.pooling.GlobalAvgPool2D -import org.jetbrains.kotlinx.dl.api.core.layer.pooling.MaxPool2D +import org.jetbrains.kotlinx.dl.api.core.layer.pooling.* import org.jetbrains.kotlinx.dl.api.core.layer.reshaping.Flatten import org.jetbrains.kotlinx.dl.api.core.layer.reshaping.ZeroPadding2D import org.jetbrains.kotlinx.dl.api.inference.keras.config.* @@ -78,6 +75,7 @@ private fun convertToKerasLayer(layer: Layer, isKerasFullyCompatible: Boolean, i is Flatten -> createKerasFlatten(layer) is MaxPool2D -> createKerasMaxPooling2D(layer) is AvgPool2D -> createKerasAvgPooling2D(layer) + is AvgPool3D -> createAvgPool3D(layer) is Dense -> createKerasDense(layer, isKerasFullyCompatible) is ZeroPadding2D -> createKerasZeroPadding2D(layer) is Input -> createKerasInput(layer) @@ -364,6 +362,18 @@ private fun createKerasAvgPooling2D(layer: AvgPool2D): KerasLayer { return KerasLayer(class_name = LAYER_AVG_POOLING_2D, config = configX) } +private fun createAvgPool3D(layer: AvgPool3D): KerasLayer { + val configX = LayerConfig( + dtype = DATATYPE_FLOAT32, + pool_size = layer.poolSize.toList(), + strides = layer.strides?.toList() ?: layer.poolSize.toList(), + padding = convertPadding(layer.padding), + data_format = layer.dataFormat, + name = layer.name + ) + return KerasLayer(class_name = LAYER_AVG_POOL_3D, config = configX) +} + private fun createKerasFlatten(layer: Flatten): KerasLayer { val configX = LayerConfig( data_format = CHANNELS_LAST, diff --git a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/AvgPool3DTest.kt b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/AvgPool3DTest.kt new file mode 100644 index 000000000..c2edb63cd --- /dev/null +++ b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/AvgPool3DTest.kt @@ -0,0 +1,311 @@ +/* +* Copyright 2020 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved. +* Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file. +*/ + +package org.jetbrains.kotlinx.dl.api.core.layer + +import org.jetbrains.kotlinx.dl.api.core.KGraph +import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.ConvPadding +import org.jetbrains.kotlinx.dl.api.core.layer.pooling.AvgPool3D +import org.jetbrains.kotlinx.dl.api.core.shape.toIntArray +import org.jetbrains.kotlinx.dl.api.inference.keras.CHANNELS_FIRST +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.Test +import org.tensorflow.EagerSession +import org.tensorflow.Graph +import org.tensorflow.Shape +import org.tensorflow.op.Ops + +const val EPS: Float = 1e-6f + +internal class AvgPool3DTest { + + private val input = arrayOf( + arrayOf( + arrayOf( + arrayOf( + floatArrayOf(1.0f, -2.0f, 3.0f), + floatArrayOf(0.5f, 2.0f, 5.0f), + floatArrayOf(-1.0f, 3.0f, 2.0f), + floatArrayOf(1.5f, -1.0f, 0.5f) + ), + arrayOf( + floatArrayOf(-1.0f, 2.0f, -2.0f), + floatArrayOf(2.5f, 3.0f, 1.0f), + floatArrayOf(-2.0f, 3.0f, 2.5f), + floatArrayOf(-3.0f, 1.0f, 1.5f) + ), + ), + arrayOf( + arrayOf( + floatArrayOf(1.0f, 3.0f, 1.0f), + floatArrayOf(6.0f, -2.5f, 4.0f), + floatArrayOf(7.0f, 0.0f, 5.0f), + floatArrayOf(1.0f, 2.0f, 4.0f) + ), + arrayOf( + floatArrayOf(7.0f, -3.0f, 2.0f), + floatArrayOf(1.0f, 2.0f, 2.0f), + floatArrayOf(3.0f, 5.0f, -2.0f), + floatArrayOf(3.0f, -1.0f, 0.0f) + ), + ), + ), + ) + + private val inputShape: Shape = Shape.make( + input.size.toLong(), + input[0].size.toLong(), + input[0][0].size.toLong(), + input[0][0][0].size.toLong(), + input[0][0][0][0].size.toLong(), + ) + + @Test + fun default() { + val layer = AvgPool3D() + val expected = arrayOf( + arrayOf( + arrayOf( + arrayOf( + floatArrayOf(18.0f/8, 4.5f/8, 16.0f/8), + floatArrayOf(9.5f/8, 12.0f/8, 13.5f/8), + ), + ), + ), + ) + + EagerSession.create().use { + val tf = Ops.create() + layer.build(tf, KGraph(Graph().toGraphDef()), inputShape) + + val inputOp = tf.constant(input) + val isTraining = tf.constant(true) + val numberOfLosses = tf.constant(1.0f) + val output = layer.forward(tf, inputOp, isTraining, numberOfLosses).asOutput() + + // Check output shape is correct. + val expectedShape = intArrayOf(input.size, 1, 1, 2, input[0][0][0][0].size) + Assertions.assertArrayEquals( + expectedShape, + output.shape().toIntArray() + ) + + // Check output values are correct. + val actual = Array(input.size) { + Array(1) { Array(1) { Array(2) { FloatArray(input[0][0][0][0].size) } } } + } + output.tensor().copyTo(actual) + for (i in expected.indices) { + for (j in expected[i].indices) { + for (k in expected[i][j].indices) { + for (l in expected[i][j][k].indices) { + Assertions.assertArrayEquals( + expected[i][j][k][l], + actual[i][j][k][l], + EPS + ) + } + } + } + } + } + } + + /** + * TODO: the following test, i.e. testing with `dataFormat = CHANNELS_FIRST` fails with + * the following error: + * "Default Pooling3DOp only supports NDHWC on device type CPU" + * This should be investigated further. + */ + /* + @Test + fun withDataFormat() { + val layer = AvgPool3D(dataFormat = CHANNELS_FIRST) + val expected = arrayOf( + arrayOf( + arrayOf( + arrayOf( + floatArrayOf(8.0f/8), + floatArrayOf(12.0f/8) + ) + ), + arrayOf( + arrayOf( + floatArrayOf(1.5f/8), + floatArrayOf(12.5f/8) + ) + ) + ) + ) + + EagerSession.create().use { + val tf = Ops.create() + layer.build(tf, KGraph(Graph().toGraphDef()), inputShape) + + val inputOp = tf.constant(input) + val isTraining = tf.constant(true) + val numberOfLosses = tf.constant(1.0f) + val output = layer.forward(tf, inputOp, isTraining, numberOfLosses).asOutput() + + // Check output shape is correct. + val expectedShape = intArrayOf(input.size, input[0].size, 1, 2, 1) + Assertions.assertArrayEquals( + expectedShape, + output.shape().toIntArray() + ) + + // Check output values are correct. + val actual = Array(input.size) { + Array(input[0].size) { Array(1) { Array(2) { FloatArray(1) } } } + } + output.tensor().copyTo(actual) + for (i in expected.indices) { + for (j in expected[i].indices) { + for (k in expected[i][j].indices) { + for (l in expected[i][j][k].indices) { + Assertions.assertArrayEquals( + expected[i][j][k][l], + actual[i][j][k][l], + EPS + ) + } + } + } + } + } + } + */ + + @Test + fun withPaddingAndStride() { + val layer = AvgPool3D(strides = intArrayOf(1, 1, 1), padding = ConvPadding.SAME) + val expected = arrayOf( + arrayOf( + arrayOf( + arrayOf( + floatArrayOf(18.0f/8, 4.5f/8, 16.0f/8), + floatArrayOf(17.0f/8, 15.5f/8, 19.5f/8), + floatArrayOf(9.5f/8, 12.0f/8, 13.5f/8), + floatArrayOf(5.0f/8, 2.0f/8, 12.0f/8) + ), + arrayOf( + floatArrayOf(19.0f/8, 8.0f/8, 6.0f/8), + floatArrayOf(9.0f/8, 26.0f/8, 7.0f/8), + floatArrayOf(2.0f/8, 16.0f/8, 4.0f/8), + floatArrayOf(0.0f/8, 0.0f/8, 6.0f/8) + ), + ), + arrayOf( + arrayOf( + floatArrayOf(30.0f/8, -1.0f/8, 18.0f/8), + floatArrayOf(34.0f/8, 9.0f/8, 18.0f/8), + floatArrayOf(28.0f/8, 12.0f/8, 14.0f/8), + floatArrayOf(16.0f/8, 4.0f/8, 16.0f/8) + ), + arrayOf( + floatArrayOf(32.0f/8, -4.0f/8, 16.0f/8), + floatArrayOf(16.0f/8, 28.0f/8, 0.0f/8), + floatArrayOf(24.0f/8, 16.0f/8, -8.0f/8), + floatArrayOf(24.0f/8, -8.0f/8, 0.0f/8) + ), + ), + ), + ) + + EagerSession.create().use { + val tf = Ops.create() + layer.build(tf, KGraph(Graph().toGraphDef()), inputShape) + + val inputOp = tf.constant(input) + val isTraining = tf.constant(true) + val numberOfLosses = tf.constant(1.0f) + val output = layer.forward(tf, inputOp, isTraining, numberOfLosses).asOutput() + + // Check output shape is correct. + val expectedShape = inputShape.toIntArray() + Assertions.assertArrayEquals( + expectedShape, + output.shape().toIntArray() + ) + + // Check output values are correct. + val actual = Array(input.size) { + Array(input[0].size) { + Array(input[0][0].size) { + Array(input[0][0][0].size) { + FloatArray(input[0][0][0][0].size) + } + } + } + } + output.tensor().copyTo(actual) + for (i in expected.indices) { + for (j in expected[i].indices) { + for (k in expected[i][j].indices) { + for (l in expected[i][j][k].indices) { + println("$i $j $k $l") + Assertions.assertArrayEquals( + expected[i][j][k][l], + actual[i][j][k][l], + EPS + ) + } + } + } + } + } + } + + @Test + fun withPoolSizeAndStride() { + val layer = AvgPool3D(poolSize = intArrayOf(2, 2, 3), strides = intArrayOf(1, 1, 1)) + val expected = arrayOf( + arrayOf( + arrayOf( + arrayOf( + floatArrayOf(25.0f/12, 15.5f/12, 23.5f/12), + floatArrayOf(19.5f/12, 16.5f/12, 25.5f/12), + ), + ), + ), + ) + + EagerSession.create().use { + val tf = Ops.create() + layer.build(tf, KGraph(Graph().toGraphDef()), inputShape) + + val inputOp = tf.constant(input) + val isTraining = tf.constant(true) + val numberOfLosses = tf.constant(1.0f) + val output = layer.forward(tf, inputOp, isTraining, numberOfLosses).asOutput() + + // Check output shape is correct. + val expectedShape = intArrayOf(input.size, 1, 1, 2, input[0][0][0][0].size) + Assertions.assertArrayEquals( + expectedShape, + output.shape().toIntArray() + ) + + // Check output values are correct. + val actual = Array(input.size) { + Array(1) { Array(1) { Array(2) { FloatArray(input[0][0][0][0].size) } } } + } + output.tensor().copyTo(actual) + for (i in expected.indices) { + for (j in expected[i].indices) { + for (k in expected[i][j].indices) { + for (l in expected[i][j][k].indices) { + Assertions.assertArrayEquals( + expected[i][j][k][l], + actual[i][j][k][l], + EPS + ) + } + } + } + } + } + } +} From 12be2f6871d2d8b1c408bd89e44dc01900b6f0b1 Mon Sep 17 00:00:00 2001 From: Masoud Kazemi Date: Thu, 10 Jun 2021 20:13:58 +0430 Subject: [PATCH 2/4] Remove dataFormat; Update poolSize and strides types; Update with recent changes --- .../dl/api/core/layer/pooling/AvgPool3D.kt | 72 +++++++------------ .../dl/api/inference/keras/ModelLoader.kt | 9 ++- .../dl/api/inference/keras/ModelSaver.kt | 6 +- .../dl/api/core/layer/AvgPool3DTest.kt | 70 +----------------- 4 files changed, 35 insertions(+), 122 deletions(-) diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool3D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool3D.kt index da0ac6862..a945e5a39 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool3D.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool3D.kt @@ -9,31 +9,26 @@ import org.jetbrains.kotlinx.dl.api.core.KGraph import org.jetbrains.kotlinx.dl.api.core.layer.Layer import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.ConvPadding import org.jetbrains.kotlinx.dl.api.core.shape.convOutputLength -import org.jetbrains.kotlinx.dl.api.inference.keras.CHANNELS_FIRST -import org.jetbrains.kotlinx.dl.api.inference.keras.CHANNELS_LAST import org.tensorflow.Operand import org.tensorflow.Shape import org.tensorflow.op.Ops -import org.tensorflow.op.nn.AvgPool3d /** * Average pooling operation for 3D data (e.g. video, spatio-temporal). * * Downsamples the input by taking the average over a window of size [poolSize]. * - * @property [poolSize] Size of the pooling window. + * @property [poolSize] Size of the pooling window for each dimension of input. * @property [strides] The amount of shift for pooling window in each pooling step. If * `null`, it will default to [poolSize]. * @property [padding] Padding strategy; can be either of [ConvPadding.VALID] which means no * padding, or [ConvPadding.SAME] which means padding the input equally such that the output * has the same dimension as the input. - * @property [dataFormat] Data format of input; can be either of [CHANNELS_LAST] or [CHANNELS_FIRST]. */ public class AvgPool3D( - public val poolSize: IntArray = intArrayOf(2, 2, 2), - public val strides: IntArray? = null, + public val poolSize: LongArray = longArrayOf(1, 2, 2, 2, 1), + public val strides: LongArray? = null, public val padding: ConvPadding = ConvPadding.VALID, - public val dataFormat: String = CHANNELS_LAST, name: String = "" ) : Layer(name) { @@ -41,46 +36,36 @@ public class AvgPool3D( get() = false override val paramCount: Int get() = 0 - override val weights: Map> + override var weights: Map> get() = emptyMap() + set(value) = assignWeights(value) init { - require(dataFormat == CHANNELS_LAST || dataFormat == CHANNELS_FIRST) { - "The dataFormat should be either \"$CHANNELS_LAST\" or \"$CHANNELS_FIRST\"." + require(poolSize.size == 5) { + "The poolSize should be an array of size 5." } - require(padding == ConvPadding.VALID || padding == ConvPadding.SAME) { - "The padding should be either ${ConvPadding.VALID} or ${ConvPadding.SAME}." - } - - require(poolSize.size == 3) { - "The length of poolSize array should be 3." + require(strides == null || strides.size ==5) { + "The strides should be either `null` or an array of size 5." } - require(strides == null || strides.size == 3) { - "The strides should be either `null` or an array of length 3." + require(padding == ConvPadding.VALID || padding == ConvPadding.SAME) { + "The padding should be either ${ConvPadding.VALID} or ${ConvPadding.SAME}." } } override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape) {} override fun computeOutputShape(inputShape: Shape): Shape { - val axis1 = if (dataFormat == CHANNELS_LAST) 1 else 2 - var dim1 = inputShape.size(axis1) - var dim2 = inputShape.size(axis1 + 1) - var dim3 = inputShape.size(axis1 + 2) - val strides1 = strides?.get(0) ?: poolSize[0] - val strides2 = strides?.get(1) ?: poolSize[1] - val strides3 = strides?.get(2) ?: poolSize[2] - dim1 = convOutputLength(dim1, poolSize[0], padding, strides1) - dim2 = convOutputLength(dim2, poolSize[1], padding, strides2) - dim3 = convOutputLength(dim3, poolSize[3], padding, strides3) + var dim1 = inputShape.size(1) + var dim2 = inputShape.size(2) + var dim3 = inputShape.size(3) + val strideValue = strides ?: poolSize + dim1 = convOutputLength(dim1, poolSize[1].toInt(), padding, strideValue[1].toInt()) + dim2 = convOutputLength(dim2, poolSize[2].toInt(), padding, strideValue[2].toInt()) + dim3 = convOutputLength(dim3, poolSize[3].toInt(), padding, strideValue[3].toInt()) - return if (dataFormat == CHANNELS_LAST) { - Shape.make(inputShape.size(0), dim1, dim2, dim3, inputShape.size(4)) - } else { - Shape.make(inputShape.size(0), inputShape.size(1), dim1, dim2, dim3) - } + return Shape.make(inputShape.size(0), dim1, dim2, dim3, inputShape.size(4)) } override fun forward( @@ -89,25 +74,16 @@ public class AvgPool3D( isTraining: Operand, numberOfLosses: Operand? ): Operand { - val tfPoolSize = longArrayOf(1, poolSize[0].toLong(), poolSize[1].toLong(), poolSize[2].toLong(), 1) - val tfStrides = longArrayOf( - 1, - (strides?.get(0) ?: poolSize[0]).toLong(), - (strides?.get(1) ?: poolSize[1]).toLong(), - (strides?.get(2) ?: poolSize[2]).toLong(), - 1 - ) + val strideValue = strides ?: poolSize val tfPadding = padding.paddingName - val tfDataFormat = if (dataFormat == CHANNELS_LAST) "NDHWC" else "NCDHW" return tf.nn.avgPool3d( input, - tfPoolSize.toList(), - tfStrides.toList(), - tfPadding, - AvgPool3d.dataFormat(tfDataFormat) + poolSize.toList(), + strideValue.toList(), + tfPadding ) } override fun toString(): String = - "AvgPool3D(poolSize=$poolSize, strides=$strides, padding=$padding, dataFormat=$dataFormat)" + "AvgPool3D(poolSize=$poolSize, strides=$strides, padding=$padding)" } diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelLoader.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelLoader.kt index 2d8d5495c..d8aff61cb 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelLoader.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelLoader.kt @@ -653,11 +653,14 @@ private fun createAvgPooling2D(config: LayerConfig, name: String): AvgPool2D { } private fun createAvgPool3D(config: LayerConfig, name: String): Layer { + val poolSize = config.pool_size!! + val addedOnesPoolSize = longArrayOf(1, poolSize[0].toLong(), poolSize[1].toLong(), poolSize[2].toLong(), 1) + val strides = config.strides!! + val addedOnesStrides = longArrayOf(1, strides[0].toLong(), strides[1].toLong(), strides[2].toLong(), 1) return AvgPool3D( - poolSize = config.pool_size!!.toIntArray(), - strides = config.strides?.toIntArray() ?: config.pool_size.toIntArray(), + poolSize = addedOnesPoolSize, + strides = addedOnesStrides, padding = convertPadding(config.padding!!), - dataFormat = config.data_format!!, name = name ) } diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt index 8e916af41..f66bd90e6 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt @@ -457,12 +457,12 @@ private fun createKerasAvgPooling2D(layer: AvgPool2D): KerasLayer { } private fun createAvgPool3D(layer: AvgPool3D): KerasLayer { + val strideValue = layer.strides ?: layer.poolSize val configX = LayerConfig( dtype = DATATYPE_FLOAT32, - pool_size = layer.poolSize.toList(), - strides = layer.strides?.toList() ?: layer.poolSize.toList(), + pool_size = layer.poolSize.slice(1..3).map { it.toInt() }, + strides = strideValue.slice(1..3).map { it.toInt() }, padding = convertPadding(layer.padding), - data_format = layer.dataFormat, name = layer.name ) return KerasLayer(class_name = LAYER_AVG_POOL_3D, config = configX) diff --git a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/AvgPool3DTest.kt b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/AvgPool3DTest.kt index c2edb63cd..ff82714b9 100644 --- a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/AvgPool3DTest.kt +++ b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/AvgPool3DTest.kt @@ -9,7 +9,6 @@ import org.jetbrains.kotlinx.dl.api.core.KGraph import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.ConvPadding import org.jetbrains.kotlinx.dl.api.core.layer.pooling.AvgPool3D import org.jetbrains.kotlinx.dl.api.core.shape.toIntArray -import org.jetbrains.kotlinx.dl.api.inference.keras.CHANNELS_FIRST import org.junit.jupiter.api.Assertions import org.junit.jupiter.api.Test import org.tensorflow.EagerSession @@ -113,74 +112,9 @@ internal class AvgPool3DTest { } } - /** - * TODO: the following test, i.e. testing with `dataFormat = CHANNELS_FIRST` fails with - * the following error: - * "Default Pooling3DOp only supports NDHWC on device type CPU" - * This should be investigated further. - */ - /* - @Test - fun withDataFormat() { - val layer = AvgPool3D(dataFormat = CHANNELS_FIRST) - val expected = arrayOf( - arrayOf( - arrayOf( - arrayOf( - floatArrayOf(8.0f/8), - floatArrayOf(12.0f/8) - ) - ), - arrayOf( - arrayOf( - floatArrayOf(1.5f/8), - floatArrayOf(12.5f/8) - ) - ) - ) - ) - - EagerSession.create().use { - val tf = Ops.create() - layer.build(tf, KGraph(Graph().toGraphDef()), inputShape) - - val inputOp = tf.constant(input) - val isTraining = tf.constant(true) - val numberOfLosses = tf.constant(1.0f) - val output = layer.forward(tf, inputOp, isTraining, numberOfLosses).asOutput() - - // Check output shape is correct. - val expectedShape = intArrayOf(input.size, input[0].size, 1, 2, 1) - Assertions.assertArrayEquals( - expectedShape, - output.shape().toIntArray() - ) - - // Check output values are correct. - val actual = Array(input.size) { - Array(input[0].size) { Array(1) { Array(2) { FloatArray(1) } } } - } - output.tensor().copyTo(actual) - for (i in expected.indices) { - for (j in expected[i].indices) { - for (k in expected[i][j].indices) { - for (l in expected[i][j][k].indices) { - Assertions.assertArrayEquals( - expected[i][j][k][l], - actual[i][j][k][l], - EPS - ) - } - } - } - } - } - } - */ - @Test fun withPaddingAndStride() { - val layer = AvgPool3D(strides = intArrayOf(1, 1, 1), padding = ConvPadding.SAME) + val layer = AvgPool3D(strides = longArrayOf(1, 1, 1, 1, 1), padding = ConvPadding.SAME) val expected = arrayOf( arrayOf( arrayOf( @@ -260,7 +194,7 @@ internal class AvgPool3DTest { @Test fun withPoolSizeAndStride() { - val layer = AvgPool3D(poolSize = intArrayOf(2, 2, 3), strides = intArrayOf(1, 1, 1)) + val layer = AvgPool3D(poolSize = longArrayOf(1, 2, 2, 3, 1), strides = longArrayOf(1, 1, 1, 1, 1)) val expected = arrayOf( arrayOf( arrayOf( From 195f320013c644f187429a77abc0ca1591938988 Mon Sep 17 00:00:00 2001 From: Masoud Kazemi Date: Thu, 10 Jun 2021 20:20:54 +0430 Subject: [PATCH 3/4] Fix function names --- .../jetbrains/kotlinx/dl/api/inference/keras/ModelLoader.kt | 4 ++-- .../jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelLoader.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelLoader.kt index d8aff61cb..4ab3ac9fb 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelLoader.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelLoader.kt @@ -131,7 +131,7 @@ private fun convertToLayer( kerasLayer.config!!, kerasLayer.config.name!! ) - LAYER_AVG_POOL_3D -> createAvgPool3D(kerasLayer.config!!, kerasLayer.config.name!!) + LAYER_AVG_POOL_3D -> createAvgPool3DLayer(kerasLayer.config!!, kerasLayer.config.name!!) LAYER_DENSE -> createDense(kerasLayer.config!!, kerasLayer.config.name!!) LAYER_ZERO_PADDING_2D -> createZeroPadding2D(kerasLayer.config!!, kerasLayer.config.name!!) LAYER_CROPPING_2D -> createCropping2D(kerasLayer.config!!, kerasLayer.config.name!!) @@ -652,7 +652,7 @@ private fun createAvgPooling2D(config: LayerConfig, name: String): AvgPool2D { return AvgPool2D(addedOnesPoolSize, addedOnesStrides, padding = convertPadding(config.padding!!), name = name) } -private fun createAvgPool3D(config: LayerConfig, name: String): Layer { +private fun createAvgPool3DLayer(config: LayerConfig, name: String): Layer { val poolSize = config.pool_size!! val addedOnesPoolSize = longArrayOf(1, poolSize[0].toLong(), poolSize[1].toLong(), poolSize[2].toLong(), 1) val strides = config.strides!! diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt index f66bd90e6..a8048d21b 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt @@ -77,7 +77,7 @@ private fun convertToKerasLayer(layer: Layer, isKerasFullyCompatible: Boolean, i is Flatten -> createKerasFlatten(layer) is MaxPool2D -> createKerasMaxPooling2D(layer) is AvgPool2D -> createKerasAvgPooling2D(layer) - is AvgPool3D -> createAvgPool3D(layer) + is AvgPool3D -> createKerasAvgPool3DLayer(layer) is Dense -> createKerasDense(layer, isKerasFullyCompatible) is ZeroPadding2D -> createKerasZeroPadding2D(layer) is Input -> createKerasInput(layer) @@ -456,7 +456,7 @@ private fun createKerasAvgPooling2D(layer: AvgPool2D): KerasLayer { return KerasLayer(class_name = LAYER_AVG_POOLING_2D, config = configX) } -private fun createAvgPool3D(layer: AvgPool3D): KerasLayer { +private fun createKerasAvgPool3DLayer(layer: AvgPool3D): KerasLayer { val strideValue = layer.strides ?: layer.poolSize val configX = LayerConfig( dtype = DATATYPE_FLOAT32, From 3bd564a32a9e343d94b28e79b81d7d34458cdd79 Mon Sep 17 00:00:00 2001 From: Masoud Kazemi Date: Fri, 11 Jun 2021 18:56:10 +0430 Subject: [PATCH 4/4] Change strides property type and default value --- .../dl/api/core/layer/pooling/AvgPool3D.kt | 19 ++++++++----------- .../dl/api/inference/keras/ModelSaver.kt | 3 +-- .../dl/api/core/layer/AvgPool3DTest.kt | 1 - 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool3D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool3D.kt index a945e5a39..5107db18f 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool3D.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/pooling/AvgPool3D.kt @@ -19,15 +19,14 @@ import org.tensorflow.op.Ops * Downsamples the input by taking the average over a window of size [poolSize]. * * @property [poolSize] Size of the pooling window for each dimension of input. - * @property [strides] The amount of shift for pooling window in each pooling step. If - * `null`, it will default to [poolSize]. + * @property [strides] The amount of shift for pooling window per each input dimension in each pooling step. * @property [padding] Padding strategy; can be either of [ConvPadding.VALID] which means no * padding, or [ConvPadding.SAME] which means padding the input equally such that the output * has the same dimension as the input. */ public class AvgPool3D( public val poolSize: LongArray = longArrayOf(1, 2, 2, 2, 1), - public val strides: LongArray? = null, + public val strides: LongArray = longArrayOf(1, 2, 2, 2, 1), public val padding: ConvPadding = ConvPadding.VALID, name: String = "" ) : Layer(name) { @@ -45,8 +44,8 @@ public class AvgPool3D( "The poolSize should be an array of size 5." } - require(strides == null || strides.size ==5) { - "The strides should be either `null` or an array of size 5." + require(strides.size == 5) { + "The strides should be an array of size 5." } require(padding == ConvPadding.VALID || padding == ConvPadding.SAME) { @@ -60,10 +59,9 @@ public class AvgPool3D( var dim1 = inputShape.size(1) var dim2 = inputShape.size(2) var dim3 = inputShape.size(3) - val strideValue = strides ?: poolSize - dim1 = convOutputLength(dim1, poolSize[1].toInt(), padding, strideValue[1].toInt()) - dim2 = convOutputLength(dim2, poolSize[2].toInt(), padding, strideValue[2].toInt()) - dim3 = convOutputLength(dim3, poolSize[3].toInt(), padding, strideValue[3].toInt()) + dim1 = convOutputLength(dim1, poolSize[1].toInt(), padding, strides[1].toInt()) + dim2 = convOutputLength(dim2, poolSize[2].toInt(), padding, strides[2].toInt()) + dim3 = convOutputLength(dim3, poolSize[3].toInt(), padding, strides[3].toInt()) return Shape.make(inputShape.size(0), dim1, dim2, dim3, inputShape.size(4)) } @@ -74,12 +72,11 @@ public class AvgPool3D( isTraining: Operand, numberOfLosses: Operand? ): Operand { - val strideValue = strides ?: poolSize val tfPadding = padding.paddingName return tf.nn.avgPool3d( input, poolSize.toList(), - strideValue.toList(), + strides.toList(), tfPadding ) } diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt index 721704007..d35585ba0 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt @@ -471,11 +471,10 @@ private fun createKerasAvgPooling2D(layer: AvgPool2D): KerasLayer { } private fun createKerasAvgPool3DLayer(layer: AvgPool3D): KerasLayer { - val strideValue = layer.strides ?: layer.poolSize val configX = LayerConfig( dtype = DATATYPE_FLOAT32, pool_size = layer.poolSize.slice(1..3).map { it.toInt() }, - strides = strideValue.slice(1..3).map { it.toInt() }, + strides = layer.strides.slice(1..3).map { it.toInt() }, padding = convertPadding(layer.padding), name = layer.name ) diff --git a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/AvgPool3DTest.kt b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/AvgPool3DTest.kt index ff82714b9..ab540b441 100644 --- a/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/AvgPool3DTest.kt +++ b/api/src/test/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/AvgPool3DTest.kt @@ -179,7 +179,6 @@ internal class AvgPool3DTest { for (j in expected[i].indices) { for (k in expected[i][j].indices) { for (l in expected[i][j][k].indices) { - println("$i $j $k $l") Assertions.assertArrayEquals( expected[i][j][k][l], actual[i][j][k][l],