Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add AvgPool3D layer #99

Merged
merged 6 commits into from
Jun 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* Copyright 2020 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved.
* Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file.
*/

package org.jetbrains.kotlinx.dl.api.core.layer.pooling

import org.jetbrains.kotlinx.dl.api.core.KGraph
import org.jetbrains.kotlinx.dl.api.core.layer.Layer
import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.ConvPadding
import org.jetbrains.kotlinx.dl.api.core.shape.convOutputLength
import org.tensorflow.Operand
import org.tensorflow.Shape
import org.tensorflow.op.Ops

/**
* Average pooling operation for 3D data (e.g. video, spatio-temporal).
*
* Downsamples the input by taking the average over a window of size [poolSize].
*
* @property [poolSize] Size of the pooling window for each dimension of input.
* @property [strides] The amount of shift for pooling window per each input dimension in each pooling step.
* @property [padding] Padding strategy; can be either of [ConvPadding.VALID] which means no
* padding, or [ConvPadding.SAME] which means padding the input equally such that the output
* has the same dimension as the input.
*/
public class AvgPool3D(
public val poolSize: LongArray = longArrayOf(1, 2, 2, 2, 1),
public val strides: LongArray = longArrayOf(1, 2, 2, 2, 1),
public val padding: ConvPadding = ConvPadding.VALID,
name: String = ""
) : Layer(name) {

override val hasActivation: Boolean
get() = false
override val paramCount: Int
get() = 0
override var weights: Map<String, Array<*>>
get() = emptyMap()
set(value) = assignWeights(value)

init {
require(poolSize.size == 5) {
"The poolSize should be an array of size 5."
}

require(strides.size == 5) {
"The strides should be an array of size 5."
}

require(padding == ConvPadding.VALID || padding == ConvPadding.SAME) {
"The padding should be either ${ConvPadding.VALID} or ${ConvPadding.SAME}."
}
}

override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape) {}

override fun computeOutputShape(inputShape: Shape): Shape {
var dim1 = inputShape.size(1)
var dim2 = inputShape.size(2)
var dim3 = inputShape.size(3)
dim1 = convOutputLength(dim1, poolSize[1].toInt(), padding, strides[1].toInt())
dim2 = convOutputLength(dim2, poolSize[2].toInt(), padding, strides[2].toInt())
dim3 = convOutputLength(dim3, poolSize[3].toInt(), padding, strides[3].toInt())

return Shape.make(inputShape.size(0), dim1, dim2, dim3, inputShape.size(4))
}

override fun forward(
tf: Ops,
input: Operand<Float>,
isTraining: Operand<Boolean>,
numberOfLosses: Operand<Float>?
): Operand<Float> {
val tfPadding = padding.paddingName
return tf.nn.avgPool3d(
input,
poolSize.toList(),
strides.toList(),
tfPadding
)
}

override fun toString(): String =
"AvgPool3D(poolSize=$poolSize, strides=$strides, padding=$padding)"
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ internal const val LAYER_MAX_POOLING_2D: String = "MaxPooling2D"
internal const val LAYER_MAX_POOLING_3D: String = "MaxPooling3D"
internal const val LAYER_AVG_POOLING_2D: String = "AvgPooling2D"
internal const val LAYER_AVERAGE_POOLING_2D: String = "AveragePooling2D"
internal const val LAYER_AVG_POOL_3D: String = "AveragePooling3D"
internal const val LAYER_RESCALING: String = "Rescaling"
internal const val LAYER_NORMALIZATION: String = "Normalization"
internal const val LAYER_FLATTEN: String = "Flatten"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ private fun convertToLayer(
kerasLayer.config!!,
kerasLayer.config.name!!
)
LAYER_AVG_POOL_3D -> createAvgPool3DLayer(kerasLayer.config!!, kerasLayer.config.name!!)
LAYER_DENSE -> createDense(kerasLayer.config!!, kerasLayer.config.name!!)
LAYER_ZERO_PADDING_2D -> createZeroPadding2D(kerasLayer.config!!, kerasLayer.config.name!!)
LAYER_CROPPING_2D -> createCropping2D(kerasLayer.config!!, kerasLayer.config.name!!)
Expand Down Expand Up @@ -654,6 +655,19 @@ private fun createAvgPooling2D(config: LayerConfig, name: String): AvgPool2D {
return AvgPool2D(addedOnesPoolSize, addedOnesStrides, padding = convertPadding(config.padding!!), name = name)
}

private fun createAvgPool3DLayer(config: LayerConfig, name: String): Layer {
val poolSize = config.pool_size!!
val addedOnesPoolSize = longArrayOf(1, poolSize[0].toLong(), poolSize[1].toLong(), poolSize[2].toLong(), 1)
val strides = config.strides!!
val addedOnesStrides = longArrayOf(1, strides[0].toLong(), strides[1].toLong(), strides[2].toLong(), 1)
return AvgPool3D(
poolSize = addedOnesPoolSize,
strides = addedOnesStrides,
padding = convertPadding(config.padding!!),
name = name
)
}

private fun createMaxPooling3D(config: LayerConfig, name: String): MaxPool3D {
val poolSize = config.pool_size!!.toIntArray()
val addedOnesPoolSize = IntArray(5)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ private fun convertToKerasLayer(layer: Layer, isKerasFullyCompatible: Boolean, i
is MaxPool2D -> createKerasMaxPooling2D(layer)
is MaxPool3D -> createKerasMaxPooling3D(layer)
is AvgPool2D -> createKerasAvgPooling2D(layer)
is AvgPool3D -> createKerasAvgPool3DLayer(layer)
is Dense -> createKerasDense(layer, isKerasFullyCompatible)
is ZeroPadding2D -> createKerasZeroPadding2D(layer)
is Input -> createKerasInput(layer)
Expand Down Expand Up @@ -469,6 +470,17 @@ private fun createKerasAvgPooling2D(layer: AvgPool2D): KerasLayer {
return KerasLayer(class_name = LAYER_AVG_POOLING_2D, config = configX)
}

private fun createKerasAvgPool3DLayer(layer: AvgPool3D): KerasLayer {
val configX = LayerConfig(
dtype = DATATYPE_FLOAT32,
pool_size = layer.poolSize.slice(1..3).map { it.toInt() },
strides = layer.strides.slice(1..3).map { it.toInt() },
padding = convertPadding(layer.padding),
name = layer.name
)
return KerasLayer(class_name = LAYER_AVG_POOL_3D, config = configX)
}

private fun createKerasFlatten(layer: Flatten): KerasLayer {
val configX = LayerConfig(
data_format = CHANNELS_LAST,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
/*
* Copyright 2020 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved.
* Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file.
*/

package org.jetbrains.kotlinx.dl.api.core.layer

import org.jetbrains.kotlinx.dl.api.core.KGraph
import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.ConvPadding
import org.jetbrains.kotlinx.dl.api.core.layer.pooling.AvgPool3D
import org.jetbrains.kotlinx.dl.api.core.shape.toIntArray
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
import org.tensorflow.EagerSession
import org.tensorflow.Graph
import org.tensorflow.Shape
import org.tensorflow.op.Ops

const val EPS: Float = 1e-6f

internal class AvgPool3DTest {

private val input = arrayOf(
arrayOf(
arrayOf(
arrayOf(
floatArrayOf(1.0f, -2.0f, 3.0f),
floatArrayOf(0.5f, 2.0f, 5.0f),
floatArrayOf(-1.0f, 3.0f, 2.0f),
floatArrayOf(1.5f, -1.0f, 0.5f)
),
arrayOf(
floatArrayOf(-1.0f, 2.0f, -2.0f),
floatArrayOf(2.5f, 3.0f, 1.0f),
floatArrayOf(-2.0f, 3.0f, 2.5f),
floatArrayOf(-3.0f, 1.0f, 1.5f)
),
),
arrayOf(
arrayOf(
floatArrayOf(1.0f, 3.0f, 1.0f),
floatArrayOf(6.0f, -2.5f, 4.0f),
floatArrayOf(7.0f, 0.0f, 5.0f),
floatArrayOf(1.0f, 2.0f, 4.0f)
),
arrayOf(
floatArrayOf(7.0f, -3.0f, 2.0f),
floatArrayOf(1.0f, 2.0f, 2.0f),
floatArrayOf(3.0f, 5.0f, -2.0f),
floatArrayOf(3.0f, -1.0f, 0.0f)
),
),
),
)

private val inputShape: Shape = Shape.make(
input.size.toLong(),
input[0].size.toLong(),
input[0][0].size.toLong(),
input[0][0][0].size.toLong(),
input[0][0][0][0].size.toLong(),
)

@Test
fun default() {
val layer = AvgPool3D()
val expected = arrayOf(
arrayOf(
arrayOf(
arrayOf(
floatArrayOf(18.0f/8, 4.5f/8, 16.0f/8),
floatArrayOf(9.5f/8, 12.0f/8, 13.5f/8),
),
),
),
)

EagerSession.create().use {
val tf = Ops.create()
layer.build(tf, KGraph(Graph().toGraphDef()), inputShape)

val inputOp = tf.constant(input)
val isTraining = tf.constant(true)
val numberOfLosses = tf.constant(1.0f)
val output = layer.forward(tf, inputOp, isTraining, numberOfLosses).asOutput()

// Check output shape is correct.
val expectedShape = intArrayOf(input.size, 1, 1, 2, input[0][0][0][0].size)
Assertions.assertArrayEquals(
expectedShape,
output.shape().toIntArray()
)

// Check output values are correct.
val actual = Array(input.size) {
Array(1) { Array(1) { Array(2) { FloatArray(input[0][0][0][0].size) } } }
}
output.tensor().copyTo(actual)
for (i in expected.indices) {
for (j in expected[i].indices) {
for (k in expected[i][j].indices) {
for (l in expected[i][j][k].indices) {
Assertions.assertArrayEquals(
expected[i][j][k][l],
actual[i][j][k][l],
EPS
)
}
}
}
}
}
}

@Test
fun withPaddingAndStride() {
val layer = AvgPool3D(strides = longArrayOf(1, 1, 1, 1, 1), padding = ConvPadding.SAME)
val expected = arrayOf(
arrayOf(
arrayOf(
arrayOf(
floatArrayOf(18.0f/8, 4.5f/8, 16.0f/8),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did you take these values from Keras tests?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, I created the test data myself :)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not bad:)
Sometimes I just copied test data from Keras; sometimes, if test data seems not appropriate, I use Keras like a test machine writing some simple test in Python to put some test data in inputs and get outputs. A few times, I ignored this method and just used tested primitive itself to get data, but it has a bug in implementation and, as a result, a wrong test:(( But I agree that test inputs/outputs for pooling could be calculated manually (just shared my experience)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, actually I have created test data for all test cases I have written so far; but certainly it's not a sustainable approach and using another tested tool, like Keras or maybe even numpy, for generating test data seems to be a better and more reliable approach.

floatArrayOf(17.0f/8, 15.5f/8, 19.5f/8),
floatArrayOf(9.5f/8, 12.0f/8, 13.5f/8),
floatArrayOf(5.0f/8, 2.0f/8, 12.0f/8)
),
arrayOf(
floatArrayOf(19.0f/8, 8.0f/8, 6.0f/8),
floatArrayOf(9.0f/8, 26.0f/8, 7.0f/8),
floatArrayOf(2.0f/8, 16.0f/8, 4.0f/8),
floatArrayOf(0.0f/8, 0.0f/8, 6.0f/8)
),
),
arrayOf(
arrayOf(
floatArrayOf(30.0f/8, -1.0f/8, 18.0f/8),
floatArrayOf(34.0f/8, 9.0f/8, 18.0f/8),
floatArrayOf(28.0f/8, 12.0f/8, 14.0f/8),
floatArrayOf(16.0f/8, 4.0f/8, 16.0f/8)
),
arrayOf(
floatArrayOf(32.0f/8, -4.0f/8, 16.0f/8),
floatArrayOf(16.0f/8, 28.0f/8, 0.0f/8),
floatArrayOf(24.0f/8, 16.0f/8, -8.0f/8),
floatArrayOf(24.0f/8, -8.0f/8, 0.0f/8)
),
),
),
)

EagerSession.create().use {
val tf = Ops.create()
layer.build(tf, KGraph(Graph().toGraphDef()), inputShape)

val inputOp = tf.constant(input)
val isTraining = tf.constant(true)
val numberOfLosses = tf.constant(1.0f)
val output = layer.forward(tf, inputOp, isTraining, numberOfLosses).asOutput()

// Check output shape is correct.
val expectedShape = inputShape.toIntArray()
Assertions.assertArrayEquals(
expectedShape,
output.shape().toIntArray()
)

// Check output values are correct.
val actual = Array(input.size) {
Array(input[0].size) {
Array(input[0][0].size) {
Array(input[0][0][0].size) {
FloatArray(input[0][0][0][0].size)
}
}
}
}
output.tensor().copyTo(actual)
for (i in expected.indices) {
for (j in expected[i].indices) {
for (k in expected[i][j].indices) {
for (l in expected[i][j][k].indices) {
Assertions.assertArrayEquals(
expected[i][j][k][l],
actual[i][j][k][l],
EPS
)
}
}
}
}
}
}

@Test
fun withPoolSizeAndStride() {
val layer = AvgPool3D(poolSize = longArrayOf(1, 2, 2, 3, 1), strides = longArrayOf(1, 1, 1, 1, 1))
val expected = arrayOf(
arrayOf(
arrayOf(
arrayOf(
floatArrayOf(25.0f/12, 15.5f/12, 23.5f/12),
floatArrayOf(19.5f/12, 16.5f/12, 25.5f/12),
),
),
),
)

EagerSession.create().use {
val tf = Ops.create()
layer.build(tf, KGraph(Graph().toGraphDef()), inputShape)

val inputOp = tf.constant(input)
val isTraining = tf.constant(true)
val numberOfLosses = tf.constant(1.0f)
val output = layer.forward(tf, inputOp, isTraining, numberOfLosses).asOutput()

// Check output shape is correct.
val expectedShape = intArrayOf(input.size, 1, 1, 2, input[0][0][0][0].size)
Assertions.assertArrayEquals(
expectedShape,
output.shape().toIntArray()
)

// Check output values are correct.
val actual = Array(input.size) {
Array(1) { Array(1) { Array(2) { FloatArray(input[0][0][0][0].size) } } }
}
output.tensor().copyTo(actual)
for (i in expected.indices) {
for (j in expected[i].indices) {
for (k in expected[i][j].indices) {
for (l in expected[i][j][k].indices) {
Assertions.assertArrayEquals(
expected[i][j][k][l],
actual[i][j][k][l],
EPS
)
}
}
}
}
}
}
}