ggml-jni: add automation UT for ggml ops --- op_add,op_mul,op_mulmat

zhouwg · Apr 20, 2024 · 2c9b8f6 · 2c9b8f6
1 parent e8a1918
commit 2c9b8f6
Show file tree

Hide file tree

Showing 14 changed files with 382 additions and 108 deletions.
diff --git a/cdeosplayer/cdeosplayer-lib/src/main/java/cdeos/media/player/CDEUtils.java b/cdeosplayer/cdeosplayer-lib/src/main/java/cdeos/media/player/CDEUtils.java
@@ -260,19 +260,20 @@ public class CDEUtils {
      public static final int  ASR_MODE_TRANSCRIPTION_RECORD = 3; // transcription + audio record
 
      //keep sync with ggml-jni.h
-     public static final int BECHMARK_ASR      = 0;
-     public static final int BECHMARK_MEMCPY   = 1;
-     public static final int BECHMARK_MULMAT   = 2;
-     public static final int BECHMARK_FULL     = 3;
-     public static final int BENCHMARK_MATRIX  = 4;
-     public static final int BENCHMARK_LLM     = 5;
-     public static final int BENCHMARK_STABLEDIFFUSION= 6;
-     public static final int BENCHMARK_QNN_SAMPLE     = 7;
-     public static final int BENCHMARK_QNN_SAVER      = 8;
-     public static final int BENCHMARK_QNN_MATRIX     = 9;
-     public static final int BENCHMARK_QNN_GGML       = 10;
-     public static final int BENCHMARK_QNN_COMPLEX    = 11;
-     public static final int BENCHMARK_QNN_GGML_OP    = 12;
+     public static final int BENCHMARK_ASR      = 0;
+     public static final int BENCHMARK_MEMCPY   = 1;
+     public static final int BENCHMARK_MULMAT   = 2;
+     public static final int BENCHMARK_FULL     = 3;
+     //public static final int BENCHMARK_MATRIX  = 4;//not used since 04-20-2024
+     public static final int BENCHMARK_LLM     = 4;
+     public static final int BENCHMARK_STABLEDIFFUSION= 5;
+     public static final int BENCHMARK_QNN_SAMPLE     = 6;
+     public static final int BENCHMARK_QNN_SAVER      = 7;
+     public static final int BENCHMARK_QNN_MATRIX     = 8;
+     public static final int BENCHMARK_QNN_GGML       = 9;
+     public static final int BENCHMARK_QNN_COMPLEX    = 10;
+     public static final int BENCHMARK_QNN_GGML_OP    = 11;
+     public static final int BENCHMARK_QNN_AUTO_UT    = 12;
 
      //keep sync with ggml-qnn.h
      public static final int QNN_BACKEND_CPU           = 0;
@@ -3916,20 +3917,22 @@ public void onClick(DialogInterface dialog, int which) {
 
      public static String getBenchmarkDesc(int benchmarkIndex) {
          switch (benchmarkIndex) {
-             case BECHMARK_FULL:
-                 return "GGML whisper_encode";
+             case BENCHMARK_FULL:
+                 return "GGML whisper full";
 
-             case BECHMARK_MEMCPY:
-                 return "GGML memcopy";
+             case BENCHMARK_MEMCPY:
+                 return "GGML memcpy";
 
-             case BECHMARK_MULMAT:
+             case BENCHMARK_MULMAT:
                  return "GGML matrix multiply";
 
-             case BECHMARK_ASR:
-                 return "GGML ASR inference";
+             case BENCHMARK_ASR:
+                 return "GGML whisper ASR";
 
+                 /*//not used since 04-20-2024
              case BENCHMARK_MATRIX:
                  return "GGML matrix";
+                 */
 
              case BENCHMARK_LLM:
                  return "GGML LLAMA";
@@ -3943,17 +3946,21 @@ public static String getBenchmarkDesc(int benchmarkIndex) {
              case BENCHMARK_QNN_SAVER:
                  return "GGML QNN saver";
 
+
              case BENCHMARK_QNN_MATRIX:
-                 return "GGML QNN matrix manipulate";
+                 return "GGML QNN matrix addition";
 
              case BENCHMARK_QNN_GGML:
-                 return "GGML QNN ggml";
+                 return "GGML QNN mapping ggml tensor";
 
              case BENCHMARK_QNN_COMPLEX:
                  return "GGML QNN complex graph";
 
              case BENCHMARK_QNN_GGML_OP:
-                 return "GGML QNN OP"; //for PoC-S49: implementation of other GGML OP(non-mulmat) using QNN API
+                 return "GGML QNN OP UT"; //UT for PoC-S49: implementation of GGML OPs using QNN API
+
+             case BENCHMARK_QNN_AUTO_UT:
+                 return "GGML QNN OP UT automation"; //automation UT for PoC-S49: implementation of GGML OPs using QNN API
          }
 
          return "unknown";

diff --git a/cdeosplayer/cdeosplayer-lib/src/main/java/org/ggml/ggmljava.java b/cdeosplayer/cdeosplayer-lib/src/main/java/org/ggml/ggmljava.java
@@ -119,7 +119,7 @@ public enum ggml_op {
     /**
      * @param modelPath     /sdcard/kantv/ggml-xxxxxx.bin or  /sdcard/kantv/xxxxxx.gguf or qualcomm's prebuilt dedicated model.so or ""
      * @param audioPath     /sdcard/kantv/jfk.wav
-     * @param nBenchType    0: asr(transcription) 1: memcpy 2: mulmat  3: full/whisper_encode 4: matrix  5: LLAMA  6: stable diffusion 7: QNN sample 8: QNN saver 9: QNN matrix 10: QNN GGML 11: QNN complex 12: QNN GGML OP
+     * @param nBenchType    0: whisper asr 1: memcpy 2: mulmat  3: whisper full 4: LLAMA 5: stable diffusion 6: QNN sample 7: QNN saver 8: QNN matrix 9: QNN GGML 10: QNN complex 11: QNN GGML OP(QNN UT) 12: QNN UT automation
      * @param nThreadCounts 1 - 8
      * @param nBackendType  0: CPU  1: GPU  2: DSP 3: ggml("fake" QNN backend, just for compare performance)
      * @param nOpType       type of matrix manipulate / GGML OP / type of various complex/complicated computation graph

diff --git a/cdeosplayer/kantv/src/main/java/com/cdeos/kantv/ui/fragment/ASRResearchFragment.java b/cdeosplayer/kantv/src/main/java/com/cdeos/kantv/ui/fragment/ASRResearchFragment.java
@@ -255,7 +255,7 @@ public void onNothingSelected(AdapterView<?> parent) {
 
              }
          });
-         spinnerBenchType.setSelection(CDEUtils.BENCHMARK_QNN_GGML_OP);
+         spinnerBenchType.setSelection(CDEUtils.BENCHMARK_ASR);
 
          Spinner spinnerThreadsCounts = mActivity.findViewById(R.id.spinnerThreadCounts);
          String[] arrayThreadCounts = getResources().getStringArray(R.array.threadCounts);

diff --git a/cdeosplayer/kantv/src/main/res/layout/fragment_asr.xml b/cdeosplayer/kantv/src/main/res/layout/fragment_asr.xml
@@ -43,7 +43,7 @@
             <TextView
                 android:layout_width="wrap_content"
                 android:layout_height="wrap_content"
-                android:text="Bench:" />
+                android:text="Bench" />
 
             <Spinner
                 android:id="@+id/spinnerBenchType"
@@ -55,7 +55,7 @@
             <TextView
                 android:layout_width="wrap_content"
                 android:layout_height="wrap_content"
-                android:text="Threads:" />
+                android:text="Threads" />
 
             <Spinner
                 android:id="@+id/spinnerThreadCounts"

diff --git a/cdeosplayer/kantv/src/main/res/values/arrays.xml b/cdeosplayer/kantv/src/main/res/values/arrays.xml
@@ -60,20 +60,20 @@
     </string-array>
 
     <string-array name="benchType">
-        <item>asr</item>
-        <item>mempcpy</item>
-        <item>mulmat</item>
-        <item>full</item>
-        <item>matrix</item>
-        <item>llama</item>
-        <item>stablediffusion</item> <!-- not work on Xiaomi 14 currently -->
-        <!-- step by step -->
-        <item>qnn-sample</item> <!-- "play with /say hello to" QNN Sample -->
-        <item>qnn-saver</item>  <!-- study QNN SDK mechanism by QNN Saver -->
-        <item>qnn-matrix</item> <!-- offload a simple matrix addition operation to QNN -->
-        <item>qnn-ggml</item>   <!-- mapping ggml tensor to QNN tensor -->
-        <item>qnn-complex</item><!-- complex computation graph in C/C++ or GGML, and then offload them to QNN -->
-        <item>qnn-ggml-op</item><!-- for PoC-S49: implementation of other GGML OP(non-mulmat) using QNN API  -->
+        <item>whisper asr</item>            <!-- whisper asr benchmark / asr validation -->
+        <item>memcpy</item>                 <!-- whisper memcopy benchmark -->
+        <item>whisper mulmat</item>         <!-- whisper mulmat benchmark -->
+        <item>whisper full</item>           <!-- whisper full benchmark -->
+        <item>llama</item>                  <!-- llama benchmark,            not work currently -->
+        <item>stablediffusion</item>        <!-- stable diffusion benchmark, not work currently -->
+        <!-- step by step for PoC: Add Qualcomm mobile SoC native backend for GGML, https://github.com/zhouwg/kantv/issues/121 -->
+        <item>qnn-sample</item>             <!-- "play with /say hello to" QNN Sample -->
+        <item>qnn-saver</item>              <!-- study QNN SDK mechanism by QNN Saver -->
+        <item>qnn-matrix</item>             <!-- offload a simple matrix addition operation to QNN -->
+        <item>qnn-ggml</item>               <!-- mapping ggml tensor to QNN tensor -->
+        <item>qnn-complex</item>            <!-- complex computation graph in C/C++ or GGML, and then offload them to QNN -->
+        <item>qnn-ggml-op</item>            <!-- UT for PoC-S49: implementation of other GGML OP(non-mulmat) using QNN API  -->
+        <item>qnn-auto-ut</item>            <!-- automation UT for PoC-S49:implementation of other GGML OP(non-mulmat) using QNN API -->
     </string-array>
 
     <string-array name="threadCounts">