Skip to content

Commit

Permalink
ggml-qnn: keep sync with PR(ggml-org/llama.cpp#7641) in upstream
Browse files Browse the repository at this point in the history
  • Loading branch information
zhouwg committed May 31, 2024
1 parent 70835aa commit e00612d
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 14 deletions.
24 changes: 19 additions & 5 deletions core/ggml/llamacpp/ggml-backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ static void ggml_setup_op_has_task_pass(void) {
}
}


struct ggml_compute_state;
extern void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor, struct ggml_compute_state * state);
static enum ggml_status ggml_backend_graph_compute_mixed(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
enum ggml_status result = GGML_STATUS_SUCCESS;
Expand Down Expand Up @@ -371,15 +371,28 @@ static enum ggml_status ggml_backend_graph_compute_mixed(ggml_backend_t backend,
return result;
}

#ifdef GGML_USE_QNN
extern bool ggml_backend_is_qnn(ggml_backend_t backend);
#endif

static bool is_qnn_backend(ggml_backend_t backend) {
#ifdef GGML_USE_QNN
return ggml_backend_is_qnn(backend);
#else
GGML_UNUSED(backend);
return false;
#endif
}

enum ggml_status ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
enum ggml_status err = GGML_STATUS_SUCCESS;

if (NULL == g_cpu_backend) {
ggml_backend_cpu_init();
}
if (backend != g_cpu_backend) {
if (ggml_backend_is_qnn(backend)) { // or if (backend->iface.offload_op != NULL) but sycl backend's iface.offload_op is not NULL
if (is_qnn_backend(backend)) { // or if (backend->iface.offload_op != NULL) but sycl backend's iface.offload_op is not NULL
//mixed inference between Qualcomm's CPU/GPU or CPU/NPU
err = ggml_backend_graph_compute_mixed(backend, cgraph);
} else { //compatible for sycl backend or other existing backend
err = backend->iface.graph_compute(backend, cgraph);
Expand All @@ -400,7 +413,8 @@ enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct
ggml_backend_cpu_init();
}
if (backend != g_cpu_backend) {
if (ggml_backend_is_qnn(backend)) { // or if (backend->iface.offload_op != NULL) but sycl backend's iface.offload_op is not NULL
if (is_qnn_backend(backend)) { // or if (backend->iface.offload_op != NULL) but sycl backend's iface.offload_op is not NULL
//mixed inference between Qualcomm's CPU/GPU or CPU/NPU
err = ggml_backend_graph_compute_mixed(backend, cgraph);
} else { //compatible for sycl backend or other existing backend
err = backend->iface.graph_compute(backend, cgraph);
Expand All @@ -409,7 +423,7 @@ enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct
//compatible for existing backend
err = backend->iface.graph_compute(backend, cgraph);;
}
ggml_backend_synchronize(backend);

return err;
}

Expand All @@ -418,7 +432,7 @@ bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor *
}

bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op) {
if (ggml_backend_is_qnn(backend)) { //compatible for sycl backend or other existing backend
if (is_qnn_backend(backend)) { //compatible for sycl backend or other existing backend
return false;
}

Expand Down
10 changes: 1 addition & 9 deletions core/ggml/llamacpp/ggml-qnn.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
#define NOT_IN_PR 1


#if NOT_IN_PR
/*
* Copyright (c) 2024- KanTV Authors
*
Expand Down Expand Up @@ -42,7 +38,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#endif
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
Expand Down Expand Up @@ -110,10 +105,6 @@ __attribute__((__format__(printf, 3, 4)));
#endif
static void ggml_qnn_log_internal(ggml_log_level level, const char * file, const char * func, int line, const char * format, ...);

#if 1// NOT_IN_PR //should be removed before PR because this is a workaround method during development stage
extern "C" void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor, struct ggml_compute_state * state);
#endif


// =================================================================================================
//
Expand All @@ -133,6 +124,7 @@ extern "C" void ggml_compute_forward(struct ggml_compute_params * params, struct
#define BUF_CONTROL_BASE 0xEE000000

#define GGML_QNN_DEBUG 1
#define NOT_IN_PR 1 //for submit/update PR(ggml-qnn.cpp&ggml.h) to upstream more easily and quickly

#define QNN_LOG_ERROR(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
#define QNN_LOG_WARN(...) ggml_qnn_log_internal(GGML_LOG_LEVEL_DEBUG , __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
Expand Down

0 comments on commit e00612d

Please sign in to comment.