Skip to content

Commit

Permalink
feat(ndk): add support for reading modified-utf8 sequences from `Byte…
Browse files Browse the repository at this point in the history
…Buffers`
  • Loading branch information
lemnik committed Jan 26, 2024
1 parent 6f5ad4b commit 83107a2
Show file tree
Hide file tree
Showing 3 changed files with 238 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -1,11 +1,137 @@
@file:Suppress("MagicNumber") // this file is filled with numbers used in modified-utf8
package com.bugsnag.android.ndk

import java.nio.ByteBuffer
import kotlin.math.min

private const val UTF_REPLACEMENT_CHAR = '\uFFFD'

internal fun ByteBuffer.getNativeInt(): Int = getInt()
internal fun ByteBuffer.getNativeLong(): Long = getLong()

internal fun ByteBuffer.getCString(byteCount: Int): String {
position(position() + byteCount)
return ""
/**
* Decode [allocatedByteCount] as a null-terminated sequence of modified UTF-8 bytes. This reads
* the same format as the JNI `NewUTFStringUTF` function, but also obeys a null-terminator character
* used in C. This function will always consume *exactly* [allocatedByteCount] from this
* `ByteBuffer`, but may return a `String` of fewer (or event zero) characters. This function
* will always return a `String` and invalid UTF-8 sequences will cause the function to return
* what has been successfully decoded up to that point.
*/
internal fun ByteBuffer.getCString(allocatedByteCount: Int): String {
val origin = position()
val maxBytes = min(allocatedByteCount, remaining())

// allocate a CharArray to handle the decoded string
// it can't be longer than the number of bytes in the buffer
val chars = CharArray(maxBytes)
var bytesRead = 0
var outIndex = 0
var c = 0

// fast path for ASCII-7 compatible characters / strings
while (bytesRead < maxBytes) {
c = get(origin + bytesRead).toInt() and 0xff
// 128+ = we need to take the "slow" path
// 0 = null-terminator - this is the end of the string
if (c >= 128 || c == 0) break

chars[outIndex++] = c.toChar()
bytesRead++
}

// make sure we didn't previously reach the end of the string
if (c != 0) {
outIndex = readModifiedUtf8(bytesRead, maxBytes, origin, chars, outIndex)
}

// move the ByteBuffer position to after the string
position(origin + maxBytes)
return String(chars, 0, outIndex)
}

/**
* Read a modified-utf8 string directly from a `ByteBuffer`, this follows the same implementation
* as [java.io.DataInputStream] but also covers an early-exit on null (zero) bytes, staying
* compliant with the C-string format.
*
* @param bytesRead how many bytes have already been read by [getCString]
* @param maxBytes the maximum number of bytes to read for this string
* @param origin the position/index in the ByteBuffer of the first byte for this string,
* this is *not* the first byte to be read by this function
* @param outBuffer the buffer to output the decoded characters into
* @param outIndex the index within [outBuffer] of the first character to decode
*
* @return the length of the string that was decoded
*/
@Suppress("LoopWithTooManyJumpStatements", "CyclomaticComplexMethod")
private fun ByteBuffer.readModifiedUtf8(
bytesRead: Int,
maxBytes: Int,
origin: Int,
outBuffer: CharArray,
outIndex: Int
): Int {
var bytesRead1 = bytesRead
var c: Int
var outIndex1 = outIndex
while (bytesRead1 < maxBytes) {
c = get(origin + bytesRead1).toInt() and 0xff
if (c == 0) {
// null-terminator - this is the end of the string
break
}

when (c shr 4) {
0, 1, 2, 3, 4, 5, 6, 7 -> {
/* 0xxxxxxx*/
bytesRead1++
outBuffer[outIndex1++] = c.toChar()
}

12, 13 -> {
/* 110x xxxx 10xx xxxx*/
bytesRead1 += 2
if (bytesRead1 > maxBytes) {
// Invalid UTF-8 - but we don't error out, we return what we *do* have
outBuffer[outIndex1++] = UTF_REPLACEMENT_CHAR
break
}

val char2 = get(origin + bytesRead1 - 1).toInt() and 0xff
if (char2 and 0xc0 != 0x80) {
// Invalid UTF-8 - but we don't error out, we return what we *do* have
outBuffer[outIndex1++] = UTF_REPLACEMENT_CHAR
}

outBuffer[outIndex1++] = ((c and 0x1f shl 6) or (char2 and 0x3f)).toChar()
}

14 -> {
/* 1110 xxxx 10xx xxxx 10xx xxxx */
bytesRead1 += 3
if (bytesRead1 > maxBytes) {
// Invalid UTF-8 - but we don't error out, we return what we *do* have
outBuffer[outIndex1++] = UTF_REPLACEMENT_CHAR
break
}

val char2 = get(origin + bytesRead1 - 2).toInt() and 0xff
val char3 = get(origin + bytesRead1 - 1).toInt() and 0xff
if (char2 and 0xc0 != 0x80 || char3 and 0xc0 != 0x80) {
// Invalid UTF-8 - but we don't error out, we return what we *do* have
outBuffer[outIndex1++] = UTF_REPLACEMENT_CHAR
}

outBuffer[outIndex1++] =
((c and 0x0f shl 12) or (char2 and 0x3f shl 6) or (char3 and 0x3f)).toChar()
}

else -> {
// Invalid UTF-8 - but we don't error out, we return what we *do* have
outBuffer[outIndex1++] = UTF_REPLACEMENT_CHAR
break
}
}
}
return outIndex1
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ import java.nio.ByteBuffer
import java.nio.ByteOrder
import java.nio.channels.FileChannel

private const val BUGSNAG_EVENT_VERSION = 13

@Suppress("MagicNumber") // this class is filled with numbers defined in event.h
internal object NativeEventDecoder {
fun decode(
event: File,
Expand All @@ -25,7 +28,29 @@ internal object NativeEventDecoder {
eventBytes: ByteBuffer
): Event {
eventBytes.order(ByteOrder.nativeOrder())

val header = decodeHeader(eventBytes)
require(header.version == BUGSNAG_EVENT_VERSION) { "Unsupported event version: ${header.version}" }

if (header.bigEndian == 0) {
eventBytes.order(ByteOrder.BIG_ENDIAN)
}

@Suppress("StopShip") // This is targeting an integration branch
TODO("To be completed")
}

private fun decodeHeader(eventBytes: ByteBuffer): NativeEventHeader {
return NativeEventHeader(
eventBytes.getNativeInt(),
eventBytes.getNativeInt(),
eventBytes.getCString(64)
)
}

private data class NativeEventHeader(
val version: Int,
val bigEndian: Int,
val osBuild: String
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package com.bugsnag.android.ndk

import org.junit.Assert.assertEquals
import org.junit.Test
import java.nio.ByteBuffer

class CStringDecoderTest {
@Test
fun testAscii7Compatible() {
val buffer = ByteBuffer.wrap(
byteArrayOf(
0x63, 0x6f, 0x6d, 0x2e, 0x65, 0x78, 0x61, 0x6d,
0x70, 0x6c, 0x65, 0x2e, 0x62, 0x75, 0x67, 0x73,
0x6e, 0x61, 0x67, 0x2e, 0x61, 0x6e, 0x64, 0x72,
0x6f, 0x69, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00
)
)

assertEquals("com.example.bugsnag.android", buffer.getCString(buffer.remaining()))
}

@Test
fun testEmptyString() {
val buffer = ByteBuffer.allocate(64)
assertEquals("", buffer.getCString(64))
assertEquals(0, buffer.remaining())
}

@Test
fun testNonAscii7Compatible() {
val buffer = ByteBuffer.wrap(extendedBytes)
assertEquals("はい、これは機械翻訳で書かれています", buffer.getCString(buffer.remaining()))
}

@Test
fun testInvalidStrings() {
val buffer = ByteBuffer.wrap(extendedBytes)
assertEquals("はい、これは機械翻訳で書かれていま�", buffer.getCString(extendedBytes.indexOf(0) - 1))

buffer.rewind()
buffer.put(16, 32)
assertEquals("はい、これ�㠯機械翻訳で書かれていま�", buffer.getCString(extendedBytes.indexOf(0) - 1))
}

@Test
fun testGreekStrings() {
val buffer = ByteBuffer.wrap(greekBytes)
assertEquals("ναι, αυτό γράφτηκε με αυτόματη μετάφραση", buffer.getCString(buffer.remaining()))
}
@Test
fun testInvalidGreekStrings() {
val buffer = ByteBuffer.wrap(greekBytes)
assertEquals("ναι, αυτό γράφτηκε με αυτόματη μετάφρασ�", buffer.getCString(greekBytes.indexOf(0) - 1))

buffer.rewind()
buffer.put(9, 32)
assertEquals("ναι, �Πυτό γράφτηκε με αυτόματη μετάφρασ�", buffer.getCString(greekBytes.indexOf(0) - 1))
}

private val greekBytes = byteArrayOf(
-50, -67, -50, -79, -50, -71, 44, 32,
-50, -79, -49, -123, -49, -124, -49, -116,
32, -50, -77, -49, -127, -50, -84, -49,
-122, -49, -124, -50, -73, -50, -70, -50,
-75, 32, -50, -68, -50, -75, 32, -50,
-79, -49, -123, -49, -124, -49, -116, -50,
-68, -50, -79, -49, -124, -50, -73, 32,
-50, -68, -50, -75, -49, -124, -50, -84,
-49, -122, -49, -127, -50, -79, -49, -125,
-50, -73,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
)

private val extendedBytes = byteArrayOf(
-29, -127, -81, -29, -127, -124, -29, -128,
-127, -29, -127, -109, -29, -126, -116, -29,
-127, -81, -26, -87, -97, -26, -94, -80,
-25, -65, -69, -24, -88, -77, -29, -127,
-89, -26, -101, -72, -29, -127, -117, -29,
-126, -116, -29, -127, -90, -29, -127, -124,
-29, -127, -66, -29, -127, -103,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
)
}

0 comments on commit 83107a2

Please sign in to comment.