Skip to content

Commit

Permalink
PoC: add Qualcomm mobile SoC native backend for GGML --- step3
Browse files Browse the repository at this point in the history
build code skeleton of stage-2 of PoC:#121
  • Loading branch information
zhouwg committed Mar 31, 2024
1 parent 2c740c7 commit ab02352
Show file tree
Hide file tree
Showing 10 changed files with 228 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,9 @@ public class CDEUtils {
public static final int BECHMARK_FULL = 3; //looks good on Xiaomi 14 after optimized by build optimization
public static final int BENCHMARK_MATRIX = 4;
public static final int BENCHMARK_LLM = 5;
public static final int BENCHMARK_QNN = 6;
public static final int BENCHMARK_QNN_SAMPLE = 6;
public static final int BENCHMARK_QNN_MATRIX = 7;
public static final int BENCHMARK_QNN_GGML = 8;

private static int mASRMode = ASR_MODE_NORMAL;

Expand Down Expand Up @@ -3921,8 +3923,14 @@ public static String getBenchmarkDesc(int benchmarkIndex) {
case BENCHMARK_LLM:
return "GGML LLAMA";

case BENCHMARK_QNN:
return "GGML QNN";
case BENCHMARK_QNN_SAMPLE:
return "GGML QNN sample";

case BENCHMARK_QNN_MATRIX:
return "GGML QNN matrix manipulate";

case BENCHMARK_QNN_GGML:
return "GGML QNN ggml";

}

Expand Down
16 changes: 13 additions & 3 deletions cdeosplayer/cdeosplayer-lib/src/main/java/org/ggml/ggmljava.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@
public class ggmljava {
private static final String TAG = ggmljava.class.getName();

// keep sync with ggml_jni_op in ggml-jni.h
public static final int GGML_JNI_OP_NONE = 0;
public static final int GGML_JNI_OP_ADD = 1;
public static final int GGML_JNI_OP_SUB = 2;
public static final int GGML_JNI_OP_MUL = 3;
public static final int GGML_JNI_OP_DIV = 4;
public static final int GGML_JNI_OP_SUM = 5;
public static final int GGML_JNI_OP_MUL_MAT = 6;

public static native int asr_init(String strModelPath, int nThreadCounts, int nASRMode);

public static native void asr_finalize();
Expand All @@ -26,12 +35,13 @@ public class ggmljava {
/**
* @param modelPath /sdcard/kantv/ggml-xxxxxx.bin or /sdcard/kantv/xxxxxx.gguf or qualcomm's dedicated model
* @param audioPath /sdcard/kantv/jfk.wav
* @param nBenchType 0: asr(transcription) 1: memcpy 2: mulmat 3: full/whisper_encode 4: matrix 5: LLAMA 6: QNN
* @param nBackendType 0: CPU 1: GPU 2: DSP
* @param nBenchType 0: asr(transcription) 1: memcpy 2: mulmat 3: full/whisper_encode 4: matrix 5: LLAMA 6: QNN sample 7: QNN matrix 8: QNN GGML
* @param nThreadCounts 1 - 8
* @param nBackendType 0: CPU 1: GPU 2: DSP
* @param nOpType type of matrix manipulate / GGML OP
* @return
*/
public static native String ggml_bench(String modelPath, String audioPath, int nBenchType, int nThreadCounts, int nBackendType);
public static native String ggml_bench(String modelPath, String audioPath, int nBenchType, int nThreadCounts, int nBackendType, int nOpType);


public static native String llm_get_systeminfo();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
package com.cdeos.kantv.ui.fragment;


import static org.ggml.ggmljava.GGML_JNI_OP_ADD;
import static cdeos.media.player.KANTVEvent.KANTV_INFO_ASR_FINALIZE;
import static cdeos.media.player.KANTVEvent.KANTV_INFO_ASR_STOP;

Expand Down Expand Up @@ -319,11 +320,11 @@ else if (isQNNModel)
return;
}

if (isQNNModel && (benchmarkIndex != CDEUtils.BENCHMARK_QNN)) {
if (isQNNModel && (benchmarkIndex < CDEUtils.BENCHMARK_QNN_SAMPLE)) {
CDEUtils.showMsgBox(mActivity, "mismatch between model file:" + selectModeFileName + " and bench type: " + CDEUtils.getBenchmarkDesc(benchmarkIndex));
return;
}
if (!isQNNModel && (benchmarkIndex == CDEUtils.BENCHMARK_QNN)) {
if (!isQNNModel && (benchmarkIndex >= CDEUtils.BENCHMARK_QNN_SAMPLE)) {
CDEUtils.showMsgBox(mActivity, "mismatch between model file:" + selectModeFileName + " and bench type: " + CDEUtils.getBenchmarkDesc(benchmarkIndex));
return;
}
Expand Down Expand Up @@ -403,7 +404,7 @@ public void run() {
CDEUtils.getDataPath() + ggmlModelFileName,
CDEUtils.getDataPath() + ggmlSampleFileName,
benchmarkIndex,
nThreadCounts, 0);
nThreadCounts, 0, 0);
} else {
// avoid following issue
// dlopen failed: library "/sdcard/kantv/libInception_v3.so" needed or dlopened by
Expand All @@ -413,7 +414,7 @@ public void run() {
CDEUtils.getDataPath(mContext) + ggmlModelFileName,
CDEUtils.getDataPath() + ggmlSampleFileName,
benchmarkIndex,
nThreadCounts, backendIndex);
nThreadCounts, backendIndex, GGML_JNI_OP_ADD);
}
endTime = System.currentTimeMillis();
duration = (endTime - beginTime);
Expand Down
4 changes: 3 additions & 1 deletion cdeosplayer/kantv/src/main/res/values/arrays.xml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@
<item>full</item>
<item>matrix</item>
<item>llama</item>
<item>qnn</item>
<item>qnn-sample</item>
<item>qnn-matrix</item>
<item>qnn-ggml</item>
</string-array>

<string-array name="threadCounts">
Expand Down
1 change: 1 addition & 0 deletions external/ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ set(SOURCE_FILES

${KANTV_GGMLJNI_SRC_DIR}/ggml-jni.c
${KANTV_GGMLJNI_SRC_DIR}/ggml-jni-impl.cpp
${KANTV_GGMLJNI_SRC_DIR}/ggml-qnn.cpp

${KANTV_GGMLJNI_SRC_DIR}/tinywav.c
${KANTV_GGMLJNI_SRC_DIR}/sampling.cpp
Expand Down
35 changes: 31 additions & 4 deletions external/ggml/jni/ggml-jni-impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,23 @@
#include <regex>
#include <random>
#include <functional>
#include <tuple>
#include <queue>
#include <unordered_map>
#include <vector>

//03-31-2024,18:00, for PoC https://github.com/zhouwg/kantv/issues/121
#include "ggml-qnn.h"

#include "QnnTypes.h"
#include "QnnCommon.h"
#include "QnnContext.h"
#include "QnnBackend.h"
#include "QnnGraph.h"
#include "QnnProperty.h"
#include "QnnSampleAppUtils.hpp"
#include "QnnTensor.h"
#include "QnnInterface.h"

extern "C" {
#include <inttypes.h>
Expand Down Expand Up @@ -772,12 +789,13 @@ void whisper_set_benchmark_status(int b_exit_benchmark) {
*
* @param sz_model_path /sdcard/kantv/ggml-xxxxxx.bin or /sdcard/kantv/xxxxxx.gguf or qualcomm's dedicated model
* @param sz_audio_path /sdcard/kantv/jfk.wav
* @param n_bench_type 0: asr(transcription) 1: memcpy 2: mulmat 3: full/whisper_encode 4: matrix 5: LLAMA 6: QNN
* @param n_bench_type 0: asr(transcription) 1: memcpy 2: mulmat 3: full/whisper_encode 4: matrix 5: LLAMA 6: QNN sample 7: QNN matrix 8: QNN GGML
* @param n_threads 1 - 8
* @param n_backend_type 0: CPU 1: GPU 2: DSP
* @param n_op_type type of matrix manipulate / GGML OP
* @return
*/
void ggml_jni_bench(const char * sz_model_path, const char *sz_audio_path, int n_bench_type, int n_threads, int n_backend_type) {
void ggml_jni_bench(const char * sz_model_path, const char *sz_audio_path, int n_bench_type, int n_threads, int n_backend_type, int n_op_type) {
int result = 0;

if (NULL == p_asr_ctx) {
Expand All @@ -792,6 +810,7 @@ void ggml_jni_bench(const char * sz_model_path, const char *sz_audio_path, int n

LOGGD("model path:%s\n", sz_model_path);
LOGGD("backend type:%d\n", n_backend_type);
LOGGD("op type:%d\n", n_op_type);

p_asr_ctx->b_use_gpu = false; // TODO:not used currently
p_asr_ctx->n_threads = n_threads;
Expand Down Expand Up @@ -830,7 +849,7 @@ void ggml_jni_bench(const char * sz_model_path, const char *sz_audio_path, int n
ggml_bench_llama(sz_model_path, n_threads);
break;

case BENCHMAKR_QNN:
case BENCHMAKR_QNN_SAMPLE:
{
//TODO: this is a lazy method in PoC stage
int argc = 11;
Expand Down Expand Up @@ -864,6 +883,14 @@ void ggml_jni_bench(const char * sz_model_path, const char *sz_audio_path, int n
}
break;

case BENCHMARK_QNN_MATRIX:
qnn_matrix(n_backend_type, n_op_type);
break;

case BENCHMARK_QNN_GGML:
qnn_ggml(n_backend_type, n_op_type);
break;

default:
break;
}
Expand Down Expand Up @@ -2496,4 +2523,4 @@ void ggml_bench_matrix(int num_threads) {
GGML_JNI_NOTIFY("=====================================================================================\n");

LOGGD("leave ggml_bench_matrix\n");
}
}
5 changes: 3 additions & 2 deletions external/ggml/jni/ggml-jni.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ Java_org_ggml_ggmljava_asr_1set_1benchmark_1status(JNIEnv *env, jclass clazz,

JNIEXPORT jstring JNICALL
Java_org_ggml_ggmljava_ggml_1bench(JNIEnv *env, jclass clazz, jstring model_path,
jstring audio_path, jint bench_type, jint num_threads, jint backend_type) {
jstring audio_path, jint bench_type, jint num_threads, jint backend_type, jint op_type) {
UNUSED(clazz);

const char *sz_model_path = NULL;
Expand All @@ -77,6 +77,7 @@ Java_org_ggml_ggmljava_ggml_1bench(JNIEnv *env, jclass clazz, jstring model_path
LOGGV("bench type: %d\n", bench_type);
LOGGV("thread counts:%d\n", num_threads);
LOGGV("backend type:%d\n", backend_type);
LOGGV("op type:%d\n", op_type);

if (bench_type > BENCHMAKR_MAX) {
LOGGW("pls check bench type\n");
Expand All @@ -91,7 +92,7 @@ Java_org_ggml_ggmljava_ggml_1bench(JNIEnv *env, jclass clazz, jstring model_path
if (0 == num_threads)
num_threads = 1;

ggml_jni_bench(sz_model_path, sz_audio_path, bench_type, num_threads, backend_type);
ggml_jni_bench(sz_model_path, sz_audio_path, bench_type, num_threads, backend_type, op_type);

if (BECHMARK_ASR == bench_type) { // asr
//just return "asr_result" even get correct asr result because I'll try to do everything in native layer
Expand Down
25 changes: 21 additions & 4 deletions external/ggml/jni/ggml-jni.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,26 @@ extern "C" {
#define BECHMARK_FULL 3
#define BENCHMARK_MATRIX 4
#define BENCHMAKR_LLAMA 5
#define BENCHMAKR_QNN 6
#define BENCHMAKR_MAX 6
#define BENCHMAKR_QNN_SAMPLE 6
#define BENCHMARK_QNN_MATRIX 7
#define BENCHMARK_QNN_GGML 8
#define BENCHMAKR_MAX 8

#define BACKEND_CPU 0
#define BACKEND_GPU 1
#define BACKEND_DSP 2
#define BACKEND_MAX 2

enum ggml_jni_op {
GGML_JNI_OP_NONE = 0,
GGML_JNI_OP_ADD,
GGML_JNI_OP_SUB,
GGML_JNI_OP_MUL,
GGML_JNI_OP_DIV,
GGML_JNI_OP_SUM,
GGML_JNI_OP_MUL_MAT
};

#define GGML_JNI_NOTIFY(...) ggml_jni_notify_c_impl(__VA_ARGS__)

// JNI helper function for whisper.cpp benchmark
Expand All @@ -77,14 +89,15 @@ extern "C" {
*
* @param sz_model_path /sdcard/kantv/ggml-xxxxxx.bin or /sdcard/kantv/xxxxxx.gguf or qualcomm's dedicated model
* @param sz_audio_path /sdcard/kantv/jfk.wav
* @param n_bench_type 0: asr(transcription) 1: memcpy 2: mulmat 3: full/whisper_encode 4: matrix 5: LLAMA 6: QNN
* @param n_bench_type 0: asr(transcription) 1: memcpy 2: mulmat 3: full/whisper_encode 4: matrix 5: LLAMA 6: QNN sample 7: QNN matrix 8: QNN GGML
* @param n_threads 1 - 8
* @param n_backend_type 0: CPU 1: GPU 2: DSP
* @param n_op_type type of matrix manipulate / GGML OP
* @return
*/
// renamed to ggml_jni_bench for purpose of unify JNI layer of whisper.cpp and llama.cpp
// and QNN(Qualcomm Neural Network, aka Qualcomm AI Engine Direct) SDK
void ggml_jni_bench(const char *model_path, const char *audio_path, int n_bench_type, int num_threads, int n_backend_type);
void ggml_jni_bench(const char *model_path, const char *audio_path, int n_bench_type, int num_threads, int n_backend_type, int n_op_type);


const char * whisper_get_ggml_type_str(enum ggml_type wtype);
Expand Down Expand Up @@ -139,6 +152,10 @@ extern "C" {

int qnn_sample_main(int argc, char** argv);

int qnn_matrix(int n_backend_type, int n_op_type);

int qnn_ggml(int n_backend_type, int n_ggml_op_type);


#ifdef __cplusplus
}
Expand Down
106 changes: 106 additions & 0 deletions external/ggml/jni/ggml-qnn.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
/*
* Copyright (c) 2024- KanTV Authors
*
* this is source file of Qualcomm mobile SoC native backend for GGML(https://github.com/ggerganov/ggml)
*
* this clean-room implementation is for
*
* PoC#121:Add Qualcomm mobile SoC native backend for GGML(https://github.com/zhouwg/kantv/issues/121) in Project KanTV
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* The above statement and notice must be included in corresponding files in derived project
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stddef.h>
#include <inttypes.h>
#include <math.h>
#include <time.h>
#include <unistd.h>

#include <string>
#include <vector>
#include <thread>
#include <mutex>
#include <map>
#include <set>
#include <tuple>
#include <queue>
#include <fstream>
#include <iostream>
#include <sstream>
#include <chrono>
#include <memory>
#include <regex>
#include <random>
#include <functional>
#include <unordered_map>
#include <condition_variable>

#include "QnnTypes.h"
#include "QnnCommon.h"
#include "QnnContext.h"
#include "QnnBackend.h"
#include "QnnGraph.h"
#include "QnnProperty.h"
#include "QnnSampleAppUtils.hpp"
#include "QnnTensor.h"
#include "QnnInterface.h"

#include "ggml-qnn.h"

#include "ggml-jni.h" //should be remove after finished PoC for purpose of submit to upstream GGML community


// =================================================================================================
//
// Qualcomm mobile SoC native backend for GGML
//
// =================================================================================================




// =================================================================================================
//
// JNI helper function for PoC#121:Add Qualcomm mobile SoC native backend for GGML(https://github.com/zhouwg/kantv/issues/121)
// should move into ggml-jni-impl.cpp in the future
//
// =================================================================================================
//TODO:
// https://github.com/zhouwg/kantv/issues/121
// PoC-S25: mapping ggml_tensor to QNN_tensor
int qnn_matrix(int n_backend_type, int n_op_type) {
LOGGD("enter qnn_matrix\n");
LOGGV("[%s], op type:%d\n", __func__, n_op_type);
GGML_JNI_NOTIFY("[%s], backend_type:%d, op type:%d\n", __func__, n_backend_type, n_op_type);
LOGGD("leave qnn_matrix\n");

return 0;
}


//TODO:
// https://github.com/zhouwg/kantv/issues/121
// PoC-S26: offload a simple GGML matrix manipulation
int qnn_ggml(int n_backend_type, int n_ggml_op_type) {
LOGGD("enter qnn_ggml\n");
LOGGV("op type:%d\n", n_ggml_op_type);
GGML_JNI_NOTIFY("[%s], backend_type:%d, ggml op type:%d\n", __func__, n_backend_type, n_ggml_op_type);
LOGGD("leave qnn_ggml\n");

return 0;
}
Loading

0 comments on commit ab02352

Please sign in to comment.