Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add AvgPool3D layer #99

Merged
merged 6 commits into from
Jun 13, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* Copyright 2020 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved.
* Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file.
*/

package org.jetbrains.kotlinx.dl.api.core.layer.pooling

import org.jetbrains.kotlinx.dl.api.core.KGraph
import org.jetbrains.kotlinx.dl.api.core.layer.Layer
import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.ConvPadding
import org.jetbrains.kotlinx.dl.api.core.shape.convOutputLength
import org.jetbrains.kotlinx.dl.api.inference.keras.CHANNELS_FIRST
import org.jetbrains.kotlinx.dl.api.inference.keras.CHANNELS_LAST
import org.tensorflow.Operand
import org.tensorflow.Shape
import org.tensorflow.op.Ops
import org.tensorflow.op.nn.AvgPool3d

/**
* Average pooling operation for 3D data (e.g. video, spatio-temporal).
*
* Downsamples the input by taking the average over a window of size [poolSize].
*
* @property [poolSize] Size of the pooling window.
* @property [strides] The amount of shift for pooling window in each pooling step. If
* `null`, it will default to [poolSize].
* @property [padding] Padding strategy; can be either of [ConvPadding.VALID] which means no
* padding, or [ConvPadding.SAME] which means padding the input equally such that the output
* has the same dimension as the input.
* @property [dataFormat] Data format of input; can be either of [CHANNELS_LAST] or [CHANNELS_FIRST].
*/
public class AvgPool3D(
public val poolSize: IntArray = intArrayOf(2, 2, 2),
public val strides: IntArray? = null,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

set the default value for the strides and convert both, strides and poolSize to the 5d array of longs

Copy link
Contributor Author

@mkaze mkaze Jun 10, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Conversion was done, and I have mentioned the benefit of using null as the default value for strides.

public val padding: ConvPadding = ConvPadding.VALID,
public val dataFormat: String = CHANNELS_LAST,
mkaze marked this conversation as resolved.
Show resolved Hide resolved
name: String = ""
) : Layer(name) {

override val hasActivation: Boolean
get() = false
override val paramCount: Int
get() = 0
override val weights: Map<String, Array<*>>
mkaze marked this conversation as resolved.
Show resolved Hide resolved
get() = emptyMap()

init {
require(dataFormat == CHANNELS_LAST || dataFormat == CHANNELS_FIRST) {
"The dataFormat should be either \"$CHANNELS_LAST\" or \"$CHANNELS_FIRST\"."
}

require(padding == ConvPadding.VALID || padding == ConvPadding.SAME) {
"The padding should be either ${ConvPadding.VALID} or ${ConvPadding.SAME}."
}

require(poolSize.size == 3) {
"The length of poolSize array should be 3."
}

require(strides == null || strides.size == 3) {
"The strides should be either `null` or an array of length 3."
}
}

override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape) {}

override fun computeOutputShape(inputShape: Shape): Shape {
val axis1 = if (dataFormat == CHANNELS_LAST) 1 else 2
var dim1 = inputShape.size(axis1)
var dim2 = inputShape.size(axis1 + 1)
var dim3 = inputShape.size(axis1 + 2)
val strides1 = strides?.get(0) ?: poolSize[0]
val strides2 = strides?.get(1) ?: poolSize[1]
val strides3 = strides?.get(2) ?: poolSize[2]
dim1 = convOutputLength(dim1, poolSize[0], padding, strides1)
dim2 = convOutputLength(dim2, poolSize[1], padding, strides2)
dim3 = convOutputLength(dim3, poolSize[3], padding, strides3)

return if (dataFormat == CHANNELS_LAST) {
mkaze marked this conversation as resolved.
Show resolved Hide resolved
Shape.make(inputShape.size(0), dim1, dim2, dim3, inputShape.size(4))
} else {
Shape.make(inputShape.size(0), inputShape.size(1), dim1, dim2, dim3)
}
}

override fun forward(
tf: Ops,
input: Operand<Float>,
isTraining: Operand<Boolean>,
numberOfLosses: Operand<Float>?
): Operand<Float> {
val tfPoolSize = longArrayOf(1, poolSize[0].toLong(), poolSize[1].toLong(), poolSize[2].toLong(), 1)
val tfStrides = longArrayOf(
1,
(strides?.get(0) ?: poolSize[0]).toLong(),
(strides?.get(1) ?: poolSize[1]).toLong(),
(strides?.get(2) ?: poolSize[2]).toLong(),
1
)
val tfPadding = padding.paddingName
val tfDataFormat = if (dataFormat == CHANNELS_LAST) "NDHWC" else "NCDHW"
return tf.nn.avgPool3d(
input,
tfPoolSize.toList(),
tfStrides.toList(),
tfPadding,
AvgPool3d.dataFormat(tfDataFormat)
)
}

override fun toString(): String =
"AvgPool3D(poolSize=$poolSize, strides=$strides, padding=$padding, dataFormat=$dataFormat)"
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ internal const val LAYER_INPUT: String = "InputLayer"
internal const val LAYER_MAX_POOLING_2D: String = "MaxPooling2D"
internal const val LAYER_AVG_POOLING_2D: String = "AvgPooling2D"
internal const val LAYER_AVERAGE_POOLING_2D: String = "AveragePooling2D"
internal const val LAYER_AVG_POOL_3D: String = "AveragePooling3D"
internal const val LAYER_RESCALING: String = "Rescaling"
internal const val LAYER_NORMALIZATION: String = "Normalization"
internal const val LAYER_FLATTEN: String = "Flatten"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,7 @@ import org.jetbrains.kotlinx.dl.api.core.layer.core.Dense
import org.jetbrains.kotlinx.dl.api.core.layer.core.Input
import org.jetbrains.kotlinx.dl.api.core.layer.merge.*
import org.jetbrains.kotlinx.dl.api.core.layer.normalization.BatchNorm
import org.jetbrains.kotlinx.dl.api.core.layer.pooling.AvgPool2D
import org.jetbrains.kotlinx.dl.api.core.layer.pooling.GlobalAvgPool1D
import org.jetbrains.kotlinx.dl.api.core.layer.pooling.GlobalAvgPool2D
import org.jetbrains.kotlinx.dl.api.core.layer.pooling.MaxPool2D
import org.jetbrains.kotlinx.dl.api.core.layer.pooling.*
import org.jetbrains.kotlinx.dl.api.core.layer.regularization.Dropout
import org.jetbrains.kotlinx.dl.api.core.layer.reshaping.Cropping2D
import org.jetbrains.kotlinx.dl.api.core.layer.reshaping.Flatten
Expand Down Expand Up @@ -150,6 +147,7 @@ private fun convertToSequentialLayer(
kerasLayer.config!!,
kerasLayer.config.name!!
)
LAYER_AVG_POOL_3D -> createAvgPool3D(kerasLayer.config!!, kerasLayer.config.name!!)
LAYER_DENSE -> createDense(kerasLayer.config!!, kerasLayer.config.name!!)
LAYER_ZERO_PADDING_2D -> createZeroPadding2D(kerasLayer.config!!, kerasLayer.config.name!!)
LAYER_CROPPING_2D -> createCropping2D(kerasLayer.config!!, kerasLayer.config.name!!)
Expand Down Expand Up @@ -286,6 +284,7 @@ private fun convertToLayer(
kerasLayer.config!!,
kerasLayer.config.name!!
)
LAYER_AVG_POOL_3D -> createAvgPool3D(kerasLayer.config!!, kerasLayer.config.name!!)
LAYER_DENSE -> createDense(kerasLayer.config!!, kerasLayer.config.name!!)
LAYER_ZERO_PADDING_2D -> createZeroPadding2D(kerasLayer.config!!, kerasLayer.config.name!!)
LAYER_CROPPING_2D -> createCropping2D(kerasLayer.config!!, kerasLayer.config.name!!)
Expand Down Expand Up @@ -648,6 +647,16 @@ private fun createAvgPooling2D(config: LayerConfig, name: String): AvgPool2D {
return AvgPool2D(addedOnesPoolSize, addedOnesStrides, padding = convertPadding(config.padding!!), name = name)
}

private fun createAvgPool3D(config: LayerConfig, name: String): Layer {
return AvgPool3D(
poolSize = config.pool_size!!.toIntArray(),
strides = config.strides?.toIntArray() ?: config.pool_size.toIntArray(),
padding = convertPadding(config.padding!!),
dataFormat = config.data_format!!,
name = name
)
}

private fun convertPadding(padding: KerasPadding): ConvPadding {
return when (padding) {
is KerasPadding.Same -> ConvPadding.SAME
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,7 @@ import org.jetbrains.kotlinx.dl.api.core.layer.core.Input
import org.jetbrains.kotlinx.dl.api.core.layer.merge.Add
import org.jetbrains.kotlinx.dl.api.core.layer.merge.Concatenate
import org.jetbrains.kotlinx.dl.api.core.layer.normalization.BatchNorm
import org.jetbrains.kotlinx.dl.api.core.layer.pooling.AvgPool2D
import org.jetbrains.kotlinx.dl.api.core.layer.pooling.GlobalAvgPool1D
import org.jetbrains.kotlinx.dl.api.core.layer.pooling.GlobalAvgPool2D
import org.jetbrains.kotlinx.dl.api.core.layer.pooling.MaxPool2D
import org.jetbrains.kotlinx.dl.api.core.layer.pooling.*
import org.jetbrains.kotlinx.dl.api.core.layer.reshaping.Flatten
import org.jetbrains.kotlinx.dl.api.core.layer.reshaping.ZeroPadding2D
import org.jetbrains.kotlinx.dl.api.inference.keras.config.*
Expand Down Expand Up @@ -78,6 +75,7 @@ private fun convertToKerasLayer(layer: Layer, isKerasFullyCompatible: Boolean, i
is Flatten -> createKerasFlatten(layer)
is MaxPool2D -> createKerasMaxPooling2D(layer)
is AvgPool2D -> createKerasAvgPooling2D(layer)
is AvgPool3D -> createAvgPool3D(layer)
is Dense -> createKerasDense(layer, isKerasFullyCompatible)
is ZeroPadding2D -> createKerasZeroPadding2D(layer)
is Input -> createKerasInput(layer)
Expand Down Expand Up @@ -364,6 +362,18 @@ private fun createKerasAvgPooling2D(layer: AvgPool2D): KerasLayer {
return KerasLayer(class_name = LAYER_AVG_POOLING_2D, config = configX)
}

private fun createAvgPool3D(layer: AvgPool3D): KerasLayer {
val configX = LayerConfig(
dtype = DATATYPE_FLOAT32,
pool_size = layer.poolSize.toList(),
strides = layer.strides?.toList() ?: layer.poolSize.toList(),
padding = convertPadding(layer.padding),
data_format = layer.dataFormat,
name = layer.name
)
return KerasLayer(class_name = LAYER_AVG_POOL_3D, config = configX)
}

private fun createKerasFlatten(layer: Flatten): KerasLayer {
val configX = LayerConfig(
data_format = CHANNELS_LAST,
Expand Down
Loading