forked from Kotlin/kotlindl
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Remove targetRotation setter api (Kotlin#454)
* Add CameraXCompatibleModelType interface. * Models from the android model hub can accept ImageProxy directly. * Remove CameraXCompatibleModel interface as obsolete. * Add utils to convert ImageProxy YUV_420_888 to Bitmap * Add ConvertToBitmap preprocessing operation and update preprocessing DSL.
- Loading branch information
1 parent
dc97543
commit 77037e1
Showing
11 changed files
with
400 additions
and
96 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
39 changes: 39 additions & 0 deletions
39
...dMain/kotlin/org/jetbrains/kotlinx/dl/dataset/preprocessing/imageproxy/ConvertToBitmap.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
/* | ||
* Copyright 2020-2022 JetBrains s.r.o. and Kotlin Deep Learning project contributors. All Rights Reserved. | ||
* Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE.txt file. | ||
*/ | ||
|
||
package org.jetbrains.kotlinx.dl.dataset.preprocessing.imageproxy | ||
|
||
import android.graphics.Bitmap | ||
import androidx.camera.core.ImageProxy | ||
import org.jetbrains.kotlinx.dl.dataset.shape.TensorShape | ||
import org.jetbrains.kotlinx.dl.dataset.preprocessing.Operation | ||
import org.jetbrains.kotlinx.dl.dataset.preprocessing.bitmap.Rotate | ||
|
||
/** | ||
* Conversion of CameraX [ImageProxy] to [Bitmap]. | ||
* Decoding YUV_420_888 image to RGB bitmap. | ||
* Also appropriate rotation is applied, to match the target rotation of an image. | ||
*/ | ||
public class ConvertToBitmap : Operation<ImageProxy, Bitmap> { | ||
override fun apply(input: ImageProxy): Bitmap { | ||
val bitmap = input.toBitmap() | ||
check(bitmap != null) { "Something went wrong during image proxy to bitmap conversion" } | ||
|
||
val targetRotation = input.imageInfo.rotationDegrees.toFloat() | ||
|
||
return Rotate(targetRotation).apply(bitmap) | ||
} | ||
|
||
/** | ||
* It's not possible to know the shape of the output, because of the rotation operation which depends on the input. | ||
*/ | ||
override fun getOutputShape(inputShape: TensorShape): TensorShape { | ||
return when (inputShape.rank()) { | ||
2 -> TensorShape(-1, -1) | ||
3 -> TensorShape(-1, -1, inputShape[2]) | ||
else -> throw IllegalArgumentException("Input shape must expected to be 2D or 3D") | ||
} | ||
} | ||
} |
153 changes: 153 additions & 0 deletions
153
...ndroidMain/kotlin/org/jetbrains/kotlinx/dl/dataset/preprocessing/imageproxy/imageUtlls.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
package org.jetbrains.kotlinx.dl.dataset.preprocessing.imageproxy | ||
|
||
import android.graphics.Bitmap | ||
import android.graphics.YuvImage | ||
import android.graphics.ImageFormat | ||
import android.graphics.Rect | ||
import android.graphics.BitmapFactory | ||
import androidx.camera.core.ImageProxy | ||
import java.io.ByteArrayOutputStream | ||
|
||
/** | ||
* Converts an [ImageProxy] to a [Bitmap]. | ||
* Currently only supports [ImageFormat.YUV_420_888]. | ||
*/ | ||
public fun ImageProxy.toBitmap(): Bitmap? { | ||
val nv21 = yuv420888ToNv21(this) | ||
val yuvImage = YuvImage(nv21, ImageFormat.NV21, width, height, null) | ||
return yuvImage.toBitmap() | ||
} | ||
|
||
private fun YuvImage.toBitmap(): Bitmap? { | ||
val out = ByteArrayOutputStream() | ||
if (!compressToJpeg(Rect(0, 0, width, height), 100, out)) | ||
return null | ||
val imageBytes: ByteArray = out.toByteArray() | ||
return BitmapFactory.decodeByteArray(imageBytes, 0, imageBytes.size) | ||
} | ||
|
||
private fun yuv420888ToNv21(image: ImageProxy): ByteArray { | ||
val pixelCount = image.cropRect.width() * image.cropRect.height() | ||
val pixelSizeBits = ImageFormat.getBitsPerPixel(ImageFormat.YUV_420_888) | ||
val outputBuffer = ByteArray(pixelCount * pixelSizeBits / 8) | ||
imageToByteBuffer(image, outputBuffer, pixelCount) | ||
return outputBuffer | ||
} | ||
|
||
/** | ||
* Decoding of YUV_420_888 image to NV21 byte representation. | ||
*/ | ||
public fun imageToByteBuffer(image: ImageProxy, outputBuffer: ByteArray, pixelCount: Int) { | ||
assert(image.format == ImageFormat.YUV_420_888) | ||
|
||
val imageCrop = image.cropRect | ||
val imagePlanes = image.planes | ||
|
||
imagePlanes.forEachIndexed { planeIndex, plane -> | ||
// How many values are read in input for each output value written | ||
// Only the Y plane has a value for every pixel, U and V have half the resolution i.e. | ||
// | ||
// Y Plane U Plane V Plane | ||
// =============== ======= ======= | ||
// Y Y Y Y Y Y Y Y U U U U V V V V | ||
// Y Y Y Y Y Y Y Y U U U U V V V V | ||
// Y Y Y Y Y Y Y Y U U U U V V V V | ||
// Y Y Y Y Y Y Y Y U U U U V V V V | ||
// Y Y Y Y Y Y Y Y | ||
// Y Y Y Y Y Y Y Y | ||
// Y Y Y Y Y Y Y Y | ||
val outputStride: Int | ||
|
||
// The index in the output buffer the next value will be written at | ||
// For Y it's zero, for U and V we start at the end of Y and interleave them i.e. | ||
// | ||
// First chunk Second chunk | ||
// =============== =============== | ||
// Y Y Y Y Y Y Y Y V U V U V U V U | ||
// Y Y Y Y Y Y Y Y V U V U V U V U | ||
// Y Y Y Y Y Y Y Y V U V U V U V U | ||
// Y Y Y Y Y Y Y Y V U V U V U V U | ||
// Y Y Y Y Y Y Y Y | ||
// Y Y Y Y Y Y Y Y | ||
// Y Y Y Y Y Y Y Y | ||
var outputOffset: Int | ||
|
||
when (planeIndex) { | ||
0 -> { | ||
outputStride = 1 | ||
outputOffset = 0 | ||
} | ||
1 -> { | ||
outputStride = 2 | ||
// For NV21 format, U is in odd-numbered indices | ||
outputOffset = pixelCount + 1 | ||
} | ||
2 -> { | ||
outputStride = 2 | ||
// For NV21 format, V is in even-numbered indices | ||
outputOffset = pixelCount | ||
} | ||
else -> { | ||
// Image contains more than 3 planes, something strange is going on | ||
return@forEachIndexed | ||
} | ||
} | ||
|
||
val planeBuffer = plane.buffer | ||
val rowStride = plane.rowStride | ||
val pixelStride = plane.pixelStride | ||
|
||
// We have to divide the width and height by two if it's not the Y plane | ||
val planeCrop = if (planeIndex == 0) { | ||
imageCrop | ||
} else { | ||
Rect( | ||
imageCrop.left / 2, | ||
imageCrop.top / 2, | ||
imageCrop.right / 2, | ||
imageCrop.bottom / 2 | ||
) | ||
} | ||
|
||
val planeWidth = planeCrop.width() | ||
val planeHeight = planeCrop.height() | ||
|
||
// Intermediate buffer used to store the bytes of each row | ||
val rowBuffer = ByteArray(plane.rowStride) | ||
|
||
// Size of each row in bytes | ||
val rowLength = if (pixelStride == 1 && outputStride == 1) { | ||
planeWidth | ||
} else { | ||
// Take into account that the stride may include data from pixels other than this | ||
// particular plane and row, and that could be between pixels and not after every | ||
// pixel: | ||
// | ||
// |---- Pixel stride ----| Row ends here --> | | ||
// | Pixel 1 | Other Data | Pixel 2 | Other Data | ... | Pixel N | | ||
// | ||
// We need to get (N-1) * (pixel stride bytes) per row + 1 byte for the last pixel | ||
(planeWidth - 1) * pixelStride + 1 | ||
} | ||
|
||
for (row in 0 until planeHeight) { | ||
// Move buffer position to the beginning of this row | ||
planeBuffer.position( | ||
(row + planeCrop.top) * rowStride + planeCrop.left * pixelStride) | ||
|
||
if (pixelStride == 1 && outputStride == 1) { | ||
// When there is a single stride value for pixel and output, we can just copy | ||
// the entire row in a single step | ||
planeBuffer.get(outputBuffer, outputOffset, rowLength) | ||
outputOffset += rowLength | ||
} else { | ||
// When either pixel or output have a stride > 1 we must copy pixel by pixel | ||
planeBuffer.get(rowBuffer, 0, rowLength) | ||
for (col in 0 until planeWidth) { | ||
outputBuffer[outputOffset] = rowBuffer[col * pixelStride] | ||
outputOffset += outputStride | ||
} | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
12 changes: 0 additions & 12 deletions
12
.../androidMain/kotlin/org/jetbrains/kotlinx/dl/api/inference/onnx/CameraXCompatibleModel.kt
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.