PoC: add Qualcomm mobile SoC native backend for GGML --- step3

build code skeleton of stage-2 of PoC:#121
zhouwg · Mar 31, 2024 · ab02352 · ab02352
1 parent 2c740c7
commit ab02352
Show file tree

Hide file tree

Showing 10 changed files with 228 additions and 21 deletions.
diff --git a/cdeosplayer/cdeosplayer-lib/src/main/java/cdeos/media/player/CDEUtils.java b/cdeosplayer/cdeosplayer-lib/src/main/java/cdeos/media/player/CDEUtils.java
@@ -266,7 +266,9 @@ public class CDEUtils {
      public static final int BECHMARK_FULL     = 3; //looks good on Xiaomi 14 after optimized by build optimization
      public static final int BENCHMARK_MATRIX  = 4;
      public static final int BENCHMARK_LLM     = 5;
-     public static final int BENCHMARK_QNN     = 6;
+     public static final int BENCHMARK_QNN_SAMPLE     = 6;
+     public static final int BENCHMARK_QNN_MATRIX     = 7;
+     public static final int BENCHMARK_QNN_GGML       = 8;
 
      private static int       mASRMode = ASR_MODE_NORMAL;
 
@@ -3921,8 +3923,14 @@ public static String getBenchmarkDesc(int benchmarkIndex) {
              case BENCHMARK_LLM:
                  return "GGML LLAMA";
 
-             case BENCHMARK_QNN:
-                 return "GGML QNN";
+             case BENCHMARK_QNN_SAMPLE:
+                 return "GGML QNN sample";
+
+             case BENCHMARK_QNN_MATRIX:
+                 return "GGML QNN matrix manipulate";
+
+             case BENCHMARK_QNN_GGML:
+                 return "GGML QNN ggml";
 
          }
 

diff --git a/cdeosplayer/cdeosplayer-lib/src/main/java/org/ggml/ggmljava.java b/cdeosplayer/cdeosplayer-lib/src/main/java/org/ggml/ggmljava.java
@@ -6,6 +6,15 @@
 public class ggmljava {
     private static final String TAG = ggmljava.class.getName();
 
+    // keep sync with ggml_jni_op in ggml-jni.h
+    public static final int    GGML_JNI_OP_NONE     = 0;
+    public static final int    GGML_JNI_OP_ADD      = 1;
+    public static final int    GGML_JNI_OP_SUB      = 2;
+    public static final int    GGML_JNI_OP_MUL      = 3;
+    public static final int    GGML_JNI_OP_DIV      = 4;
+    public static final int    GGML_JNI_OP_SUM      = 5;
+    public static final int    GGML_JNI_OP_MUL_MAT  = 6;
+
     public static native int  asr_init(String strModelPath, int nThreadCounts, int nASRMode);
 
     public static native void asr_finalize();
@@ -26,12 +35,13 @@ public class ggmljava {
     /**
      * @param modelPath     /sdcard/kantv/ggml-xxxxxx.bin or  /sdcard/kantv/xxxxxx.gguf or qualcomm's dedicated model
      * @param audioPath     /sdcard/kantv/jfk.wav
-     * @param nBenchType    0: asr(transcription) 1: memcpy 2: mulmat  3: full/whisper_encode 4: matrix  5: LLAMA 6: QNN
-     * @param nBackendType  0: CPU  1: GPU  2: DSP
+     * @param nBenchType    0: asr(transcription) 1: memcpy 2: mulmat  3: full/whisper_encode 4: matrix  5: LLAMA 6: QNN sample 7: QNN matrix 8: QNN GGML
      * @param nThreadCounts 1 - 8
+     * @param nBackendType  0: CPU  1: GPU  2: DSP
+     * @param nOpType       type of matrix manipulate / GGML OP
      * @return
      */
-    public static native String ggml_bench(String modelPath, String audioPath, int nBenchType, int nThreadCounts, int nBackendType);
+    public static native String ggml_bench(String modelPath, String audioPath, int nBenchType, int nThreadCounts, int nBackendType, int nOpType);
 
 
     public static native String llm_get_systeminfo();

diff --git a/cdeosplayer/kantv/src/main/java/com/cdeos/kantv/ui/fragment/ASRResearchFragment.java b/cdeosplayer/kantv/src/main/java/com/cdeos/kantv/ui/fragment/ASRResearchFragment.java
@@ -24,6 +24,7 @@
  package com.cdeos.kantv.ui.fragment;
 
 
+ import static org.ggml.ggmljava.GGML_JNI_OP_ADD;
  import static cdeos.media.player.KANTVEvent.KANTV_INFO_ASR_FINALIZE;
  import static cdeos.media.player.KANTVEvent.KANTV_INFO_ASR_STOP;
 
@@ -319,11 +320,11 @@ else if (isQNNModel)
                  return;
              }
 
-             if (isQNNModel && (benchmarkIndex != CDEUtils.BENCHMARK_QNN)) {
+             if (isQNNModel && (benchmarkIndex < CDEUtils.BENCHMARK_QNN_SAMPLE)) {
                  CDEUtils.showMsgBox(mActivity, "mismatch between model file:" + selectModeFileName + " and bench type: " + CDEUtils.getBenchmarkDesc(benchmarkIndex));
                  return;
              }
-             if (!isQNNModel && (benchmarkIndex == CDEUtils.BENCHMARK_QNN)) {
+             if (!isQNNModel && (benchmarkIndex >= CDEUtils.BENCHMARK_QNN_SAMPLE)) {
                  CDEUtils.showMsgBox(mActivity, "mismatch between model file:" + selectModeFileName + " and bench type: " + CDEUtils.getBenchmarkDesc(benchmarkIndex));
                  return;
              }
@@ -403,7 +404,7 @@ public void run() {
                                  CDEUtils.getDataPath() + ggmlModelFileName,
                                  CDEUtils.getDataPath() + ggmlSampleFileName,
                                  benchmarkIndex,
-                                 nThreadCounts, 0);
+                                 nThreadCounts, 0, 0);
                      } else {
                          // avoid following issue
                          // dlopen failed: library "/sdcard/kantv/libInception_v3.so" needed or dlopened by
@@ -413,7 +414,7 @@ public void run() {
                                  CDEUtils.getDataPath(mContext) + ggmlModelFileName,
                                  CDEUtils.getDataPath() + ggmlSampleFileName,
                                  benchmarkIndex,
-                                 nThreadCounts, backendIndex);
+                                 nThreadCounts, backendIndex, GGML_JNI_OP_ADD);
                      }
                      endTime = System.currentTimeMillis();
                      duration = (endTime - beginTime);

diff --git a/cdeosplayer/kantv/src/main/res/values/arrays.xml b/cdeosplayer/kantv/src/main/res/values/arrays.xml
@@ -79,7 +79,9 @@
         <item>full</item>
         <item>matrix</item>
         <item>llama</item>
-        <item>qnn</item>
+        <item>qnn-sample</item>
+        <item>qnn-matrix</item>
+        <item>qnn-ggml</item>
     </string-array>
 
     <string-array name="threadCounts">

diff --git a/external/ggml/CMakeLists.txt b/external/ggml/CMakeLists.txt
@@ -50,6 +50,7 @@ set(SOURCE_FILES
 
         ${KANTV_GGMLJNI_SRC_DIR}/ggml-jni.c
         ${KANTV_GGMLJNI_SRC_DIR}/ggml-jni-impl.cpp
+        ${KANTV_GGMLJNI_SRC_DIR}/ggml-qnn.cpp
 
         ${KANTV_GGMLJNI_SRC_DIR}/tinywav.c
         ${KANTV_GGMLJNI_SRC_DIR}/sampling.cpp

diff --git a/external/ggml/jni/ggml-jni-impl.cpp b/external/ggml/jni/ggml-jni-impl.cpp
@@ -80,6 +80,23 @@
 #include <regex>
 #include <random>
 #include <functional>
+#include <tuple>
+#include <queue>
+#include <unordered_map>
+#include <vector>
+
+//03-31-2024,18:00, for PoC https://github.com/zhouwg/kantv/issues/121
+#include "ggml-qnn.h"
+
+#include "QnnTypes.h"
+#include "QnnCommon.h"
+#include "QnnContext.h"
+#include "QnnBackend.h"
+#include "QnnGraph.h"
+#include "QnnProperty.h"
+#include "QnnSampleAppUtils.hpp"
+#include "QnnTensor.h"
+#include "QnnInterface.h"
 
 extern "C" {
 #include <inttypes.h>
@@ -772,12 +789,13 @@ void whisper_set_benchmark_status(int b_exit_benchmark) {
  *
  * @param sz_model_path         /sdcard/kantv/ggml-xxxxxx.bin or  /sdcard/kantv/xxxxxx.gguf or qualcomm's dedicated model
  * @param sz_audio_path         /sdcard/kantv/jfk.wav
- * @param n_bench_type          0: asr(transcription) 1: memcpy 2: mulmat  3: full/whisper_encode 4: matrix  5: LLAMA 6: QNN
+ * @param n_bench_type          0: asr(transcription) 1: memcpy 2: mulmat  3: full/whisper_encode 4: matrix  5: LLAMA 6: QNN sample 7: QNN matrix 8: QNN GGML
  * @param n_threads             1 - 8
  * @param n_backend_type        0: CPU  1: GPU  2: DSP
+ * @param n_op_type             type of matrix manipulate / GGML OP
  * @return
 */
-void ggml_jni_bench(const char * sz_model_path, const char *sz_audio_path, int n_bench_type, int n_threads, int n_backend_type) {
+void ggml_jni_bench(const char * sz_model_path, const char *sz_audio_path, int n_bench_type, int n_threads, int n_backend_type, int n_op_type) {
     int result = 0;
 
     if (NULL == p_asr_ctx) {
@@ -792,6 +810,7 @@ void ggml_jni_bench(const char * sz_model_path, const char *sz_audio_path, int n
 
     LOGGD("model path:%s\n", sz_model_path);
     LOGGD("backend type:%d\n", n_backend_type);
+    LOGGD("op type:%d\n", n_op_type);
 
     p_asr_ctx->b_use_gpu                = false;        // TODO:not used currently
     p_asr_ctx->n_threads                = n_threads;
@@ -830,7 +849,7 @@ void ggml_jni_bench(const char * sz_model_path, const char *sz_audio_path, int n
             ggml_bench_llama(sz_model_path, n_threads);
             break;
 
-        case BENCHMAKR_QNN:
+        case BENCHMAKR_QNN_SAMPLE:
             {
                 //TODO: this is a lazy method in PoC stage
                 int argc = 11;
@@ -864,6 +883,14 @@ void ggml_jni_bench(const char * sz_model_path, const char *sz_audio_path, int n
             }
             break;
 
+        case BENCHMARK_QNN_MATRIX:
+            qnn_matrix(n_backend_type, n_op_type);
+            break;
+
+        case BENCHMARK_QNN_GGML:
+            qnn_ggml(n_backend_type, n_op_type);
+            break;
+
         default:
             break;
     }
@@ -2496,4 +2523,4 @@ void ggml_bench_matrix(int num_threads) {
     GGML_JNI_NOTIFY("=====================================================================================\n");
 
     LOGGD("leave ggml_bench_matrix\n");
-}
+}
diff --git a/external/ggml/jni/ggml-jni.c b/external/ggml/jni/ggml-jni.c
@@ -52,7 +52,7 @@ Java_org_ggml_ggmljava_asr_1set_1benchmark_1status(JNIEnv *env, jclass clazz,
 
 JNIEXPORT jstring JNICALL
 Java_org_ggml_ggmljava_ggml_1bench(JNIEnv *env, jclass clazz, jstring model_path,
-                                       jstring audio_path, jint bench_type, jint num_threads, jint backend_type) {
+                                       jstring audio_path, jint bench_type, jint num_threads, jint backend_type, jint op_type) {
     UNUSED(clazz);
 
     const char *sz_model_path = NULL;
@@ -77,6 +77,7 @@ Java_org_ggml_ggmljava_ggml_1bench(JNIEnv *env, jclass clazz, jstring model_path
     LOGGV("bench type: %d\n", bench_type);
     LOGGV("thread counts:%d\n", num_threads);
     LOGGV("backend type:%d\n", backend_type);
+    LOGGV("op type:%d\n", op_type);
 
     if (bench_type > BENCHMAKR_MAX) {
         LOGGW("pls check bench type\n");
@@ -91,7 +92,7 @@ Java_org_ggml_ggmljava_ggml_1bench(JNIEnv *env, jclass clazz, jstring model_path
     if (0 == num_threads)
         num_threads = 1;
 
-    ggml_jni_bench(sz_model_path, sz_audio_path, bench_type, num_threads, backend_type);
+    ggml_jni_bench(sz_model_path, sz_audio_path, bench_type, num_threads, backend_type, op_type);
 
     if (BECHMARK_ASR == bench_type) { // asr
         //just return "asr_result" even get correct asr result because I'll try to do everything in native layer

diff --git a/external/ggml/jni/ggml-jni.h b/external/ggml/jni/ggml-jni.h
@@ -59,14 +59,26 @@ extern "C" {
 #define BECHMARK_FULL               3
 #define BENCHMARK_MATRIX            4
 #define BENCHMAKR_LLAMA             5
-#define BENCHMAKR_QNN               6
-#define BENCHMAKR_MAX               6
+#define BENCHMAKR_QNN_SAMPLE        6
+#define BENCHMARK_QNN_MATRIX        7
+#define BENCHMARK_QNN_GGML          8
+#define BENCHMAKR_MAX               8
 
 #define BACKEND_CPU                 0
 #define BACKEND_GPU                 1
 #define BACKEND_DSP                 2
 #define BACKEND_MAX                 2
 
+    enum ggml_jni_op {
+        GGML_JNI_OP_NONE = 0,
+        GGML_JNI_OP_ADD,
+        GGML_JNI_OP_SUB,
+        GGML_JNI_OP_MUL,
+        GGML_JNI_OP_DIV,
+        GGML_JNI_OP_SUM,
+        GGML_JNI_OP_MUL_MAT
+    };
+
 #define GGML_JNI_NOTIFY(...)        ggml_jni_notify_c_impl(__VA_ARGS__)
 
 // JNI helper function for whisper.cpp benchmark
@@ -77,14 +89,15 @@ extern "C" {
     *
     * @param sz_model_path         /sdcard/kantv/ggml-xxxxxx.bin or  /sdcard/kantv/xxxxxx.gguf or qualcomm's dedicated model
     * @param sz_audio_path         /sdcard/kantv/jfk.wav
-    * @param n_bench_type          0: asr(transcription) 1: memcpy 2: mulmat  3: full/whisper_encode 4: matrix  5: LLAMA 6: QNN
+    * @param n_bench_type          0: asr(transcription) 1: memcpy 2: mulmat  3: full/whisper_encode 4: matrix  5: LLAMA 6: QNN sample 7: QNN matrix 8: QNN GGML
     * @param n_threads             1 - 8
     * @param n_backend_type        0: CPU  1: GPU  2: DSP
+    * @param n_op_type             type of matrix manipulate / GGML OP
     * @return
     */
     // renamed to ggml_jni_bench for purpose of unify JNI layer of whisper.cpp and llama.cpp
     // and QNN(Qualcomm Neural Network, aka Qualcomm AI Engine Direct) SDK
-    void         ggml_jni_bench(const char *model_path, const char *audio_path, int n_bench_type, int num_threads, int n_backend_type);
+    void         ggml_jni_bench(const char *model_path, const char *audio_path, int n_bench_type, int num_threads, int n_backend_type, int n_op_type);
 
 
     const char * whisper_get_ggml_type_str(enum ggml_type wtype);
@@ -139,6 +152,10 @@ extern "C" {
 
     int qnn_sample_main(int argc, char** argv);
 
+    int qnn_matrix(int n_backend_type, int n_op_type);
+
+    int qnn_ggml(int n_backend_type, int n_ggml_op_type);
+
 
 #ifdef __cplusplus
 }

diff --git a/external/ggml/jni/ggml-qnn.cpp b/external/ggml/jni/ggml-qnn.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2024- KanTV Authors
+ *
+ * this is source file of Qualcomm mobile SoC native backend for GGML(https://github.com/ggerganov/ggml)
+ *
+ * this clean-room implementation is for
+ *
+ * PoC#121:Add Qualcomm mobile SoC native backend for GGML(https://github.com/zhouwg/kantv/issues/121) in Project KanTV
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * The above statement and notice must be included in corresponding files in derived project
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stddef.h>
+#include <inttypes.h>
+#include <math.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <string>
+#include <vector>
+#include <thread>
+#include <mutex>
+#include <map>
+#include <set>
+#include <tuple>
+#include <queue>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <chrono>
+#include <memory>
+#include <regex>
+#include <random>
+#include <functional>
+#include <unordered_map>
+#include <condition_variable>
+
+#include "QnnTypes.h"
+#include "QnnCommon.h"
+#include "QnnContext.h"
+#include "QnnBackend.h"
+#include "QnnGraph.h"
+#include "QnnProperty.h"
+#include "QnnSampleAppUtils.hpp"
+#include "QnnTensor.h"
+#include "QnnInterface.h"
+
+#include "ggml-qnn.h"
+
+#include "ggml-jni.h"  //should be remove after finished PoC for purpose of submit to upstream GGML community
+
+
+// =================================================================================================
+//
+// Qualcomm mobile SoC native backend for GGML
+//
+// =================================================================================================
+
+
+
+
+// =================================================================================================
+//
+// JNI helper function for PoC#121:Add Qualcomm mobile SoC native backend for GGML(https://github.com/zhouwg/kantv/issues/121)
+// should move into ggml-jni-impl.cpp in the future
+//
+// =================================================================================================
+//TODO:
+// https://github.com/zhouwg/kantv/issues/121
+// PoC-S25: mapping ggml_tensor to QNN_tensor
+int qnn_matrix(int n_backend_type, int n_op_type) {
+    LOGGD("enter qnn_matrix\n");
+    LOGGV("[%s], op type:%d\n", __func__, n_op_type);
+    GGML_JNI_NOTIFY("[%s], backend_type:%d, op type:%d\n", __func__, n_backend_type, n_op_type);
+    LOGGD("leave qnn_matrix\n");
+
+    return 0;
+}
+
+
+//TODO:
+// https://github.com/zhouwg/kantv/issues/121
+// PoC-S26: offload a simple GGML matrix manipulation
+int qnn_ggml(int n_backend_type, int n_ggml_op_type) {
+    LOGGD("enter qnn_ggml\n");
+    LOGGV("op type:%d\n", n_ggml_op_type);
+    GGML_JNI_NOTIFY("[%s], backend_type:%d, ggml op type:%d\n", __func__, n_backend_type, n_ggml_op_type);
+    LOGGD("leave qnn_ggml\n");
+
+    return 0;
+}