Skip to content

Commit

Permalink
ggml-jni: add automation UT for ggml ops --- op_add,op_mul,op_mulmat
Browse files Browse the repository at this point in the history
  • Loading branch information
zhouwg committed Apr 20, 2024
1 parent e8a1918 commit 2c9b8f6
Show file tree
Hide file tree
Showing 14 changed files with 382 additions and 108 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -260,19 +260,20 @@ public class CDEUtils {
public static final int ASR_MODE_TRANSCRIPTION_RECORD = 3; // transcription + audio record

//keep sync with ggml-jni.h
public static final int BECHMARK_ASR = 0;
public static final int BECHMARK_MEMCPY = 1;
public static final int BECHMARK_MULMAT = 2;
public static final int BECHMARK_FULL = 3;
public static final int BENCHMARK_MATRIX = 4;
public static final int BENCHMARK_LLM = 5;
public static final int BENCHMARK_STABLEDIFFUSION= 6;
public static final int BENCHMARK_QNN_SAMPLE = 7;
public static final int BENCHMARK_QNN_SAVER = 8;
public static final int BENCHMARK_QNN_MATRIX = 9;
public static final int BENCHMARK_QNN_GGML = 10;
public static final int BENCHMARK_QNN_COMPLEX = 11;
public static final int BENCHMARK_QNN_GGML_OP = 12;
public static final int BENCHMARK_ASR = 0;
public static final int BENCHMARK_MEMCPY = 1;
public static final int BENCHMARK_MULMAT = 2;
public static final int BENCHMARK_FULL = 3;
//public static final int BENCHMARK_MATRIX = 4;//not used since 04-20-2024
public static final int BENCHMARK_LLM = 4;
public static final int BENCHMARK_STABLEDIFFUSION= 5;
public static final int BENCHMARK_QNN_SAMPLE = 6;
public static final int BENCHMARK_QNN_SAVER = 7;
public static final int BENCHMARK_QNN_MATRIX = 8;
public static final int BENCHMARK_QNN_GGML = 9;
public static final int BENCHMARK_QNN_COMPLEX = 10;
public static final int BENCHMARK_QNN_GGML_OP = 11;
public static final int BENCHMARK_QNN_AUTO_UT = 12;

//keep sync with ggml-qnn.h
public static final int QNN_BACKEND_CPU = 0;
Expand Down Expand Up @@ -3916,20 +3917,22 @@ public void onClick(DialogInterface dialog, int which) {

public static String getBenchmarkDesc(int benchmarkIndex) {
switch (benchmarkIndex) {
case BECHMARK_FULL:
return "GGML whisper_encode";
case BENCHMARK_FULL:
return "GGML whisper full";

case BECHMARK_MEMCPY:
return "GGML memcopy";
case BENCHMARK_MEMCPY:
return "GGML memcpy";

case BECHMARK_MULMAT:
case BENCHMARK_MULMAT:
return "GGML matrix multiply";

case BECHMARK_ASR:
return "GGML ASR inference";
case BENCHMARK_ASR:
return "GGML whisper ASR";

/*//not used since 04-20-2024
case BENCHMARK_MATRIX:
return "GGML matrix";
*/

case BENCHMARK_LLM:
return "GGML LLAMA";
Expand All @@ -3943,17 +3946,21 @@ public static String getBenchmarkDesc(int benchmarkIndex) {
case BENCHMARK_QNN_SAVER:
return "GGML QNN saver";


case BENCHMARK_QNN_MATRIX:
return "GGML QNN matrix manipulate";
return "GGML QNN matrix addition";

case BENCHMARK_QNN_GGML:
return "GGML QNN ggml";
return "GGML QNN mapping ggml tensor";

case BENCHMARK_QNN_COMPLEX:
return "GGML QNN complex graph";

case BENCHMARK_QNN_GGML_OP:
return "GGML QNN OP"; //for PoC-S49: implementation of other GGML OP(non-mulmat) using QNN API
return "GGML QNN OP UT"; //UT for PoC-S49: implementation of GGML OPs using QNN API

case BENCHMARK_QNN_AUTO_UT:
return "GGML QNN OP UT automation"; //automation UT for PoC-S49: implementation of GGML OPs using QNN API
}

return "unknown";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ public enum ggml_op {
/**
* @param modelPath /sdcard/kantv/ggml-xxxxxx.bin or /sdcard/kantv/xxxxxx.gguf or qualcomm's prebuilt dedicated model.so or ""
* @param audioPath /sdcard/kantv/jfk.wav
* @param nBenchType 0: asr(transcription) 1: memcpy 2: mulmat 3: full/whisper_encode 4: matrix 5: LLAMA 6: stable diffusion 7: QNN sample 8: QNN saver 9: QNN matrix 10: QNN GGML 11: QNN complex 12: QNN GGML OP
* @param nBenchType 0: whisper asr 1: memcpy 2: mulmat 3: whisper full 4: LLAMA 5: stable diffusion 6: QNN sample 7: QNN saver 8: QNN matrix 9: QNN GGML 10: QNN complex 11: QNN GGML OP(QNN UT) 12: QNN UT automation
* @param nThreadCounts 1 - 8
* @param nBackendType 0: CPU 1: GPU 2: DSP 3: ggml("fake" QNN backend, just for compare performance)
* @param nOpType type of matrix manipulate / GGML OP / type of various complex/complicated computation graph
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ public void onNothingSelected(AdapterView<?> parent) {

}
});
spinnerBenchType.setSelection(CDEUtils.BENCHMARK_QNN_GGML_OP);
spinnerBenchType.setSelection(CDEUtils.BENCHMARK_ASR);

Spinner spinnerThreadsCounts = mActivity.findViewById(R.id.spinnerThreadCounts);
String[] arrayThreadCounts = getResources().getStringArray(R.array.threadCounts);
Expand Down
4 changes: 2 additions & 2 deletions cdeosplayer/kantv/src/main/res/layout/fragment_asr.xml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
<TextView
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:text="Bench:" />
android:text="Bench" />

<Spinner
android:id="@+id/spinnerBenchType"
Expand All @@ -55,7 +55,7 @@
<TextView
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:text="Threads:" />
android:text="Threads" />

<Spinner
android:id="@+id/spinnerThreadCounts"
Expand Down
28 changes: 14 additions & 14 deletions cdeosplayer/kantv/src/main/res/values/arrays.xml
Original file line number Diff line number Diff line change
Expand Up @@ -60,20 +60,20 @@
</string-array>

<string-array name="benchType">
<item>asr</item>
<item>mempcpy</item>
<item>mulmat</item>
<item>full</item>
<item>matrix</item>
<item>llama</item>
<item>stablediffusion</item> <!-- not work on Xiaomi 14 currently -->
<!-- step by step -->
<item>qnn-sample</item> <!-- "play with /say hello to" QNN Sample -->
<item>qnn-saver</item> <!-- study QNN SDK mechanism by QNN Saver -->
<item>qnn-matrix</item> <!-- offload a simple matrix addition operation to QNN -->
<item>qnn-ggml</item> <!-- mapping ggml tensor to QNN tensor -->
<item>qnn-complex</item><!-- complex computation graph in C/C++ or GGML, and then offload them to QNN -->
<item>qnn-ggml-op</item><!-- for PoC-S49: implementation of other GGML OP(non-mulmat) using QNN API -->
<item>whisper asr</item> <!-- whisper asr benchmark / asr validation -->
<item>memcpy</item> <!-- whisper memcopy benchmark -->
<item>whisper mulmat</item> <!-- whisper mulmat benchmark -->
<item>whisper full</item> <!-- whisper full benchmark -->
<item>llama</item> <!-- llama benchmark, not work currently -->
<item>stablediffusion</item> <!-- stable diffusion benchmark, not work currently -->
<!-- step by step for PoC: Add Qualcomm mobile SoC native backend for GGML, https://github.com/zhouwg/kantv/issues/121 -->
<item>qnn-sample</item> <!-- "play with /say hello to" QNN Sample -->
<item>qnn-saver</item> <!-- study QNN SDK mechanism by QNN Saver -->
<item>qnn-matrix</item> <!-- offload a simple matrix addition operation to QNN -->
<item>qnn-ggml</item> <!-- mapping ggml tensor to QNN tensor -->
<item>qnn-complex</item> <!-- complex computation graph in C/C++ or GGML, and then offload them to QNN -->
<item>qnn-ggml-op</item> <!-- UT for PoC-S49: implementation of other GGML OP(non-mulmat) using QNN API -->
<item>qnn-auto-ut</item> <!-- automation UT for PoC-S49:implementation of other GGML OP(non-mulmat) using QNN API -->
</string-array>

<string-array name="threadCounts">
Expand Down
Loading

0 comments on commit 2c9b8f6

Please sign in to comment.