Skip to content

Commit

Permalink
Add AvgPool3D layer (#99)
Browse files Browse the repository at this point in the history
* Add AvgPool3D layer

* Remove dataFormat; Update poolSize and strides types; Update with recent changes

* Fix function names

* Change strides property type and default value
  • Loading branch information
mkaze authored Jun 13, 2021
1 parent e6ed4a9 commit 218fb57
Show file tree
Hide file tree
Showing 5 changed files with 357 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* Copyright 2020 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved.
* Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file.
*/

package org.jetbrains.kotlinx.dl.api.core.layer.pooling

import org.jetbrains.kotlinx.dl.api.core.KGraph
import org.jetbrains.kotlinx.dl.api.core.layer.Layer
import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.ConvPadding
import org.jetbrains.kotlinx.dl.api.core.shape.convOutputLength
import org.tensorflow.Operand
import org.tensorflow.Shape
import org.tensorflow.op.Ops

/**
* Average pooling operation for 3D data (e.g. video, spatio-temporal).
*
* Downsamples the input by taking the average over a window of size [poolSize].
*
* @property [poolSize] Size of the pooling window for each dimension of input.
* @property [strides] The amount of shift for pooling window per each input dimension in each pooling step.
* @property [padding] Padding strategy; can be either of [ConvPadding.VALID] which means no
* padding, or [ConvPadding.SAME] which means padding the input equally such that the output
* has the same dimension as the input.
*/
public class AvgPool3D(
public val poolSize: LongArray = longArrayOf(1, 2, 2, 2, 1),
public val strides: LongArray = longArrayOf(1, 2, 2, 2, 1),
public val padding: ConvPadding = ConvPadding.VALID,
name: String = ""
) : Layer(name) {

override val hasActivation: Boolean
get() = false
override val paramCount: Int
get() = 0
override var weights: Map<String, Array<*>>
get() = emptyMap()
set(value) = assignWeights(value)

init {
require(poolSize.size == 5) {
"The poolSize should be an array of size 5."
}

require(strides.size == 5) {
"The strides should be an array of size 5."
}

require(padding == ConvPadding.VALID || padding == ConvPadding.SAME) {
"The padding should be either ${ConvPadding.VALID} or ${ConvPadding.SAME}."
}
}

override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape) {}

override fun computeOutputShape(inputShape: Shape): Shape {
var dim1 = inputShape.size(1)
var dim2 = inputShape.size(2)
var dim3 = inputShape.size(3)
dim1 = convOutputLength(dim1, poolSize[1].toInt(), padding, strides[1].toInt())
dim2 = convOutputLength(dim2, poolSize[2].toInt(), padding, strides[2].toInt())
dim3 = convOutputLength(dim3, poolSize[3].toInt(), padding, strides[3].toInt())

return Shape.make(inputShape.size(0), dim1, dim2, dim3, inputShape.size(4))
}

override fun forward(
tf: Ops,
input: Operand<Float>,
isTraining: Operand<Boolean>,
numberOfLosses: Operand<Float>?
): Operand<Float> {
val tfPadding = padding.paddingName
return tf.nn.avgPool3d(
input,
poolSize.toList(),
strides.toList(),
tfPadding
)
}

override fun toString(): String =
"AvgPool3D(poolSize=$poolSize, strides=$strides, padding=$padding)"
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ internal const val LAYER_MAX_POOLING_3D: String = "MaxPooling3D"
internal const val LAYER_AVG_POOL_1D: String = "AveragePooling1D"
internal const val LAYER_AVG_POOLING_2D: String = "AvgPooling2D"
internal const val LAYER_AVERAGE_POOLING_2D: String = "AveragePooling2D"
internal const val LAYER_AVG_POOL_3D: String = "AveragePooling3D"
internal const val LAYER_RESCALING: String = "Rescaling"
internal const val LAYER_NORMALIZATION: String = "Normalization"
internal const val LAYER_FLATTEN: String = "Flatten"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ private fun convertToLayer(
kerasLayer.config!!,
kerasLayer.config.name!!
)
LAYER_AVG_POOL_3D -> createAvgPool3DLayer(kerasLayer.config!!, kerasLayer.config.name!!)
LAYER_DENSE -> createDense(kerasLayer.config!!, kerasLayer.config.name!!)
LAYER_ZERO_PADDING_2D -> createZeroPadding2D(kerasLayer.config!!, kerasLayer.config.name!!)
LAYER_CROPPING_2D -> createCropping2D(kerasLayer.config!!, kerasLayer.config.name!!)
Expand Down Expand Up @@ -682,6 +683,19 @@ private fun createAvgPooling2D(config: LayerConfig, name: String): AvgPool2D {
return AvgPool2D(addedOnesPoolSize, addedOnesStrides, padding = convertPadding(config.padding!!), name = name)
}

private fun createAvgPool3DLayer(config: LayerConfig, name: String): Layer {
val poolSize = config.pool_size!!
val addedOnesPoolSize = longArrayOf(1, poolSize[0].toLong(), poolSize[1].toLong(), poolSize[2].toLong(), 1)
val strides = config.strides!!
val addedOnesStrides = longArrayOf(1, strides[0].toLong(), strides[1].toLong(), strides[2].toLong(), 1)
return AvgPool3D(
poolSize = addedOnesPoolSize,
strides = addedOnesStrides,
padding = convertPadding(config.padding!!),
name = name
)
}

private fun createMaxPooling3D(config: LayerConfig, name: String): MaxPool3D {
val poolSize = config.pool_size!!.toIntArray()
val addedOnesPoolSize = IntArray(5)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ private fun convertToKerasLayer(layer: Layer, isKerasFullyCompatible: Boolean, i
is MaxPool3D -> createKerasMaxPooling3D(layer)
is AvgPool1D -> createKerasAvgPool1D(layer)
is AvgPool2D -> createKerasAvgPooling2D(layer)
is AvgPool3D -> createKerasAvgPool3DLayer(layer)
is Dense -> createKerasDense(layer, isKerasFullyCompatible)
is ZeroPadding2D -> createKerasZeroPadding2D(layer)
is Input -> createKerasInput(layer)
Expand Down Expand Up @@ -493,6 +494,17 @@ private fun createKerasAvgPooling2D(layer: AvgPool2D): KerasLayer {
return KerasLayer(class_name = LAYER_AVG_POOLING_2D, config = configX)
}

private fun createKerasAvgPool3DLayer(layer: AvgPool3D): KerasLayer {
val configX = LayerConfig(
dtype = DATATYPE_FLOAT32,
pool_size = layer.poolSize.slice(1..3).map { it.toInt() },
strides = layer.strides.slice(1..3).map { it.toInt() },
padding = convertPadding(layer.padding),
name = layer.name
)
return KerasLayer(class_name = LAYER_AVG_POOL_3D, config = configX)
}

private fun createKerasFlatten(layer: Flatten): KerasLayer {
val configX = LayerConfig(
data_format = CHANNELS_LAST,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
/*
* Copyright 2020 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved.
* Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file.
*/

package org.jetbrains.kotlinx.dl.api.core.layer

import org.jetbrains.kotlinx.dl.api.core.KGraph
import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.ConvPadding
import org.jetbrains.kotlinx.dl.api.core.layer.pooling.AvgPool3D
import org.jetbrains.kotlinx.dl.api.core.shape.toIntArray
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
import org.tensorflow.EagerSession
import org.tensorflow.Graph
import org.tensorflow.Shape
import org.tensorflow.op.Ops

const val EPS: Float = 1e-6f

internal class AvgPool3DTest {

private val input = arrayOf(
arrayOf(
arrayOf(
arrayOf(
floatArrayOf(1.0f, -2.0f, 3.0f),
floatArrayOf(0.5f, 2.0f, 5.0f),
floatArrayOf(-1.0f, 3.0f, 2.0f),
floatArrayOf(1.5f, -1.0f, 0.5f)
),
arrayOf(
floatArrayOf(-1.0f, 2.0f, -2.0f),
floatArrayOf(2.5f, 3.0f, 1.0f),
floatArrayOf(-2.0f, 3.0f, 2.5f),
floatArrayOf(-3.0f, 1.0f, 1.5f)
),
),
arrayOf(
arrayOf(
floatArrayOf(1.0f, 3.0f, 1.0f),
floatArrayOf(6.0f, -2.5f, 4.0f),
floatArrayOf(7.0f, 0.0f, 5.0f),
floatArrayOf(1.0f, 2.0f, 4.0f)
),
arrayOf(
floatArrayOf(7.0f, -3.0f, 2.0f),
floatArrayOf(1.0f, 2.0f, 2.0f),
floatArrayOf(3.0f, 5.0f, -2.0f),
floatArrayOf(3.0f, -1.0f, 0.0f)
),
),
),
)

private val inputShape: Shape = Shape.make(
input.size.toLong(),
input[0].size.toLong(),
input[0][0].size.toLong(),
input[0][0][0].size.toLong(),
input[0][0][0][0].size.toLong(),
)

@Test
fun default() {
val layer = AvgPool3D()
val expected = arrayOf(
arrayOf(
arrayOf(
arrayOf(
floatArrayOf(18.0f/8, 4.5f/8, 16.0f/8),
floatArrayOf(9.5f/8, 12.0f/8, 13.5f/8),
),
),
),
)

EagerSession.create().use {
val tf = Ops.create()
layer.build(tf, KGraph(Graph().toGraphDef()), inputShape)

val inputOp = tf.constant(input)
val isTraining = tf.constant(true)
val numberOfLosses = tf.constant(1.0f)
val output = layer.forward(tf, inputOp, isTraining, numberOfLosses).asOutput()

// Check output shape is correct.
val expectedShape = intArrayOf(input.size, 1, 1, 2, input[0][0][0][0].size)
Assertions.assertArrayEquals(
expectedShape,
output.shape().toIntArray()
)

// Check output values are correct.
val actual = Array(input.size) {
Array(1) { Array(1) { Array(2) { FloatArray(input[0][0][0][0].size) } } }
}
output.tensor().copyTo(actual)
for (i in expected.indices) {
for (j in expected[i].indices) {
for (k in expected[i][j].indices) {
for (l in expected[i][j][k].indices) {
Assertions.assertArrayEquals(
expected[i][j][k][l],
actual[i][j][k][l],
EPS
)
}
}
}
}
}
}

@Test
fun withPaddingAndStride() {
val layer = AvgPool3D(strides = longArrayOf(1, 1, 1, 1, 1), padding = ConvPadding.SAME)
val expected = arrayOf(
arrayOf(
arrayOf(
arrayOf(
floatArrayOf(18.0f/8, 4.5f/8, 16.0f/8),
floatArrayOf(17.0f/8, 15.5f/8, 19.5f/8),
floatArrayOf(9.5f/8, 12.0f/8, 13.5f/8),
floatArrayOf(5.0f/8, 2.0f/8, 12.0f/8)
),
arrayOf(
floatArrayOf(19.0f/8, 8.0f/8, 6.0f/8),
floatArrayOf(9.0f/8, 26.0f/8, 7.0f/8),
floatArrayOf(2.0f/8, 16.0f/8, 4.0f/8),
floatArrayOf(0.0f/8, 0.0f/8, 6.0f/8)
),
),
arrayOf(
arrayOf(
floatArrayOf(30.0f/8, -1.0f/8, 18.0f/8),
floatArrayOf(34.0f/8, 9.0f/8, 18.0f/8),
floatArrayOf(28.0f/8, 12.0f/8, 14.0f/8),
floatArrayOf(16.0f/8, 4.0f/8, 16.0f/8)
),
arrayOf(
floatArrayOf(32.0f/8, -4.0f/8, 16.0f/8),
floatArrayOf(16.0f/8, 28.0f/8, 0.0f/8),
floatArrayOf(24.0f/8, 16.0f/8, -8.0f/8),
floatArrayOf(24.0f/8, -8.0f/8, 0.0f/8)
),
),
),
)

EagerSession.create().use {
val tf = Ops.create()
layer.build(tf, KGraph(Graph().toGraphDef()), inputShape)

val inputOp = tf.constant(input)
val isTraining = tf.constant(true)
val numberOfLosses = tf.constant(1.0f)
val output = layer.forward(tf, inputOp, isTraining, numberOfLosses).asOutput()

// Check output shape is correct.
val expectedShape = inputShape.toIntArray()
Assertions.assertArrayEquals(
expectedShape,
output.shape().toIntArray()
)

// Check output values are correct.
val actual = Array(input.size) {
Array(input[0].size) {
Array(input[0][0].size) {
Array(input[0][0][0].size) {
FloatArray(input[0][0][0][0].size)
}
}
}
}
output.tensor().copyTo(actual)
for (i in expected.indices) {
for (j in expected[i].indices) {
for (k in expected[i][j].indices) {
for (l in expected[i][j][k].indices) {
Assertions.assertArrayEquals(
expected[i][j][k][l],
actual[i][j][k][l],
EPS
)
}
}
}
}
}
}

@Test
fun withPoolSizeAndStride() {
val layer = AvgPool3D(poolSize = longArrayOf(1, 2, 2, 3, 1), strides = longArrayOf(1, 1, 1, 1, 1))
val expected = arrayOf(
arrayOf(
arrayOf(
arrayOf(
floatArrayOf(25.0f/12, 15.5f/12, 23.5f/12),
floatArrayOf(19.5f/12, 16.5f/12, 25.5f/12),
),
),
),
)

EagerSession.create().use {
val tf = Ops.create()
layer.build(tf, KGraph(Graph().toGraphDef()), inputShape)

val inputOp = tf.constant(input)
val isTraining = tf.constant(true)
val numberOfLosses = tf.constant(1.0f)
val output = layer.forward(tf, inputOp, isTraining, numberOfLosses).asOutput()

// Check output shape is correct.
val expectedShape = intArrayOf(input.size, 1, 1, 2, input[0][0][0][0].size)
Assertions.assertArrayEquals(
expectedShape,
output.shape().toIntArray()
)

// Check output values are correct.
val actual = Array(input.size) {
Array(1) { Array(1) { Array(2) { FloatArray(input[0][0][0][0].size) } } }
}
output.tensor().copyTo(actual)
for (i in expected.indices) {
for (j in expected[i].indices) {
for (k in expected[i][j].indices) {
for (l in expected[i][j][k].indices) {
Assertions.assertArrayEquals(
expected[i][j][k][l],
actual[i][j][k][l],
EPS
)
}
}
}
}
}
}
}

0 comments on commit 218fb57

Please sign in to comment.