feat(ndk): add support for reading modified-utf8 sequences from `Byte…

…Buffers`
bugsnag · Jan 26, 2024 · 83107a2 · 83107a2
1 parent 6f5ad4b
commit 83107a2
Show file tree

Hide file tree

Showing 3 changed files with 238 additions and 3 deletions.
diff --git a/bugsnag-plugin-android-ndk/src/main/java/com/bugsnag/android/ndk/ByteBufferExtensions.kt b/bugsnag-plugin-android-ndk/src/main/java/com/bugsnag/android/ndk/ByteBufferExtensions.kt
@@ -1,11 +1,137 @@
+@file:Suppress("MagicNumber") // this file is filled with numbers used in modified-utf8
 package com.bugsnag.android.ndk
 
 import java.nio.ByteBuffer
+import kotlin.math.min
+
+private const val UTF_REPLACEMENT_CHAR = '\uFFFD'
 
 internal fun ByteBuffer.getNativeInt(): Int = getInt()
 internal fun ByteBuffer.getNativeLong(): Long = getLong()
 
-internal fun ByteBuffer.getCString(byteCount: Int): String {
-    position(position() + byteCount)
-    return ""
+/**
+ * Decode [allocatedByteCount] as a null-terminated sequence of modified UTF-8 bytes. This reads
+ * the same format as the JNI `NewUTFStringUTF` function, but also obeys a null-terminator character
+ * used in C. This function will always consume *exactly* [allocatedByteCount] from this
+ * `ByteBuffer`, but may return a `String` of fewer (or event zero) characters. This function
+ * will always return a `String` and invalid UTF-8 sequences will cause the function to return
+ * what has been successfully decoded up to that point.
+ */
+internal fun ByteBuffer.getCString(allocatedByteCount: Int): String {
+    val origin = position()
+    val maxBytes = min(allocatedByteCount, remaining())
+
+    // allocate a CharArray to handle the decoded string
+    // it can't be longer than the number of bytes in the buffer
+    val chars = CharArray(maxBytes)
+    var bytesRead = 0
+    var outIndex = 0
+    var c = 0
+
+    // fast path for ASCII-7 compatible characters / strings
+    while (bytesRead < maxBytes) {
+        c = get(origin + bytesRead).toInt() and 0xff
+        // 128+ = we need to take the "slow" path
+        // 0 = null-terminator - this is the end of the string
+        if (c >= 128 || c == 0) break
+
+        chars[outIndex++] = c.toChar()
+        bytesRead++
+    }
+
+    // make sure we didn't previously reach the end of the string
+    if (c != 0) {
+        outIndex = readModifiedUtf8(bytesRead, maxBytes, origin, chars, outIndex)
+    }
+
+    // move the ByteBuffer position to after the string
+    position(origin + maxBytes)
+    return String(chars, 0, outIndex)
+}
+
+/**
+ * Read a modified-utf8 string directly from a `ByteBuffer`, this follows the same implementation
+ * as [java.io.DataInputStream] but also covers an early-exit on null (zero) bytes, staying
+ * compliant with the C-string format.
+ *
+ * @param bytesRead how many bytes have already been read by [getCString]
+ * @param maxBytes the maximum number of bytes to read for this string
+ * @param origin the position/index in the ByteBuffer of the first byte for this string,
+ *               this is *not* the first byte to be read by this function
+ * @param outBuffer the buffer to output the decoded characters into
+ * @param outIndex the index within [outBuffer] of the first character to decode
+ *
+ * @return the length of the string that was decoded
+ */
+@Suppress("LoopWithTooManyJumpStatements", "CyclomaticComplexMethod")
+private fun ByteBuffer.readModifiedUtf8(
+    bytesRead: Int,
+    maxBytes: Int,
+    origin: Int,
+    outBuffer: CharArray,
+    outIndex: Int
+): Int {
+    var bytesRead1 = bytesRead
+    var c: Int
+    var outIndex1 = outIndex
+    while (bytesRead1 < maxBytes) {
+        c = get(origin + bytesRead1).toInt() and 0xff
+        if (c == 0) {
+            // null-terminator - this is the end of the string
+            break
+        }
+
+        when (c shr 4) {
+            0, 1, 2, 3, 4, 5, 6, 7 -> {
+                /* 0xxxxxxx*/
+                bytesRead1++
+                outBuffer[outIndex1++] = c.toChar()
+            }
+
+            12, 13 -> {
+                /* 110x xxxx   10xx xxxx*/
+                bytesRead1 += 2
+                if (bytesRead1 > maxBytes) {
+                    // Invalid UTF-8 - but we don't error out, we return what we *do* have
+                    outBuffer[outIndex1++] = UTF_REPLACEMENT_CHAR
+                    break
+                }
+
+                val char2 = get(origin + bytesRead1 - 1).toInt() and 0xff
+                if (char2 and 0xc0 != 0x80) {
+                    // Invalid UTF-8 - but we don't error out, we return what we *do* have
+                    outBuffer[outIndex1++] = UTF_REPLACEMENT_CHAR
+                }
+
+                outBuffer[outIndex1++] = ((c and 0x1f shl 6) or (char2 and 0x3f)).toChar()
+            }
+
+            14 -> {
+                /* 1110 xxxx  10xx xxxx  10xx xxxx */
+                bytesRead1 += 3
+                if (bytesRead1 > maxBytes) {
+                    // Invalid UTF-8 - but we don't error out, we return what we *do* have
+                    outBuffer[outIndex1++] = UTF_REPLACEMENT_CHAR
+                    break
+                }
+
+                val char2 = get(origin + bytesRead1 - 2).toInt() and 0xff
+                val char3 = get(origin + bytesRead1 - 1).toInt() and 0xff
+                if (char2 and 0xc0 != 0x80 || char3 and 0xc0 != 0x80) {
+                    // Invalid UTF-8 - but we don't error out, we return what we *do* have
+                    outBuffer[outIndex1++] = UTF_REPLACEMENT_CHAR
+                }
+
+                outBuffer[outIndex1++] =
+                    ((c and 0x0f shl 12) or (char2 and 0x3f shl 6) or (char3 and 0x3f)).toChar()
+            }
+
+            else -> {
+                // Invalid UTF-8 - but we don't error out, we return what we *do* have
+                outBuffer[outIndex1++] = UTF_REPLACEMENT_CHAR
+                break
+            }
+        }
+    }
+    return outIndex1
 }
diff --git a/bugsnag-plugin-android-ndk/src/main/java/com/bugsnag/android/ndk/NativeEventDecoder.kt b/bugsnag-plugin-android-ndk/src/main/java/com/bugsnag/android/ndk/NativeEventDecoder.kt
@@ -7,6 +7,9 @@ import java.nio.ByteBuffer
 import java.nio.ByteOrder
 import java.nio.channels.FileChannel
 
+private const val BUGSNAG_EVENT_VERSION = 13
+
+@Suppress("MagicNumber") // this class is filled with numbers defined in event.h
 internal object NativeEventDecoder {
     fun decode(
         event: File,
@@ -25,7 +28,29 @@ internal object NativeEventDecoder {
         eventBytes: ByteBuffer
     ): Event {
         eventBytes.order(ByteOrder.nativeOrder())
+
+        val header = decodeHeader(eventBytes)
+        require(header.version == BUGSNAG_EVENT_VERSION) { "Unsupported event version: ${header.version}" }
+
+        if (header.bigEndian == 0) {
+            eventBytes.order(ByteOrder.BIG_ENDIAN)
+        }
+
         @Suppress("StopShip") // This is targeting an integration branch
         TODO("To be completed")
     }
+
+    private fun decodeHeader(eventBytes: ByteBuffer): NativeEventHeader {
+        return NativeEventHeader(
+            eventBytes.getNativeInt(),
+            eventBytes.getNativeInt(),
+            eventBytes.getCString(64)
+        )
+    }
+
+    private data class NativeEventHeader(
+        val version: Int,
+        val bigEndian: Int,
+        val osBuild: String
+    )
 }
diff --git a/bugsnag-plugin-android-ndk/src/test/java/com/bugsnag/android/ndk/CStringDecoderTest.kt b/bugsnag-plugin-android-ndk/src/test/java/com/bugsnag/android/ndk/CStringDecoderTest.kt
@@ -0,0 +1,84 @@
+package com.bugsnag.android.ndk
+
+import org.junit.Assert.assertEquals
+import org.junit.Test
+import java.nio.ByteBuffer
+
+class CStringDecoderTest {
+    @Test
+    fun testAscii7Compatible() {
+        val buffer = ByteBuffer.wrap(
+            byteArrayOf(
+                0x63, 0x6f, 0x6d, 0x2e, 0x65, 0x78, 0x61, 0x6d,
+                0x70, 0x6c, 0x65, 0x2e, 0x62, 0x75, 0x67, 0x73,
+                0x6e, 0x61, 0x67, 0x2e, 0x61, 0x6e, 0x64, 0x72,
+                0x6f, 0x69, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00
+            )
+        )
+
+        assertEquals("com.example.bugsnag.android", buffer.getCString(buffer.remaining()))
+    }
+
+    @Test
+    fun testEmptyString() {
+        val buffer = ByteBuffer.allocate(64)
+        assertEquals("", buffer.getCString(64))
+        assertEquals(0, buffer.remaining())
+    }
+
+    @Test
+    fun testNonAscii7Compatible() {
+        val buffer = ByteBuffer.wrap(extendedBytes)
+        assertEquals("はい、これは機械翻訳で書かれています", buffer.getCString(buffer.remaining()))
+    }
+
+    @Test
+    fun testInvalidStrings() {
+        val buffer = ByteBuffer.wrap(extendedBytes)
+        assertEquals("はい、これは機械翻訳で書かれていま�", buffer.getCString(extendedBytes.indexOf(0) - 1))
+
+        buffer.rewind()
+        buffer.put(16, 32)
+        assertEquals("はい、これ�㠯機械翻訳で書かれていま�", buffer.getCString(extendedBytes.indexOf(0) - 1))
+    }
+
+    @Test
+    fun testGreekStrings() {
+        val buffer = ByteBuffer.wrap(greekBytes)
+        assertEquals("ναι, αυτό γράφτηκε με αυτόματη μετάφραση", buffer.getCString(buffer.remaining()))
+    }
+    @Test
+    fun testInvalidGreekStrings() {
+        val buffer = ByteBuffer.wrap(greekBytes)
+        assertEquals("ναι, αυτό γράφτηκε με αυτόματη μετάφρασ�", buffer.getCString(greekBytes.indexOf(0) - 1))
+
+        buffer.rewind()
+        buffer.put(9, 32)
+        assertEquals("ναι, �Πυτό γράφτηκε με αυτόματη μετάφρασ�", buffer.getCString(greekBytes.indexOf(0) - 1))
+    }
+
+    private val greekBytes = byteArrayOf(
+        -50, -67, -50, -79, -50, -71, 44, 32,
+        -50, -79, -49, -123, -49, -124, -49, -116,
+        32, -50, -77, -49, -127, -50, -84, -49,
+        -122, -49, -124, -50, -73, -50, -70, -50,
+        -75, 32, -50, -68, -50, -75, 32, -50,
+        -79, -49, -123, -49, -124, -49, -116, -50,
+        -68, -50, -79, -49, -124, -50, -73, 32,
+        -50, -68, -50, -75, -49, -124, -50, -84,
+        -49, -122, -49, -127, -50, -79, -49, -125,
+        -50, -73,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    )
+
+    private val extendedBytes = byteArrayOf(
+        -29, -127, -81, -29, -127, -124, -29, -128,
+        -127, -29, -127, -109, -29, -126, -116, -29,
+        -127, -81, -26, -87, -97, -26, -94, -80,
+        -25, -65, -69, -24, -88, -77, -29, -127,
+        -89, -26, -101, -72, -29, -127, -117, -29,
+        -126, -116, -29, -127, -90, -29, -127, -124,
+        -29, -127, -66, -29, -127, -103,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    )
+}