fix: incorrect check for GGML_USE_METAL

abetlen · Feb 9, 2024 · 2eaf53d · 2eaf53d
1 parent 631a8f5
commit 2eaf53d
Show file tree

Hide file tree

Showing 3 changed files with 110 additions and 16 deletions.
diff --git a/ggml/ggml.py b/ggml/ggml.py
@@ -56,7 +56,7 @@
 import ctypes
 import pathlib
 import importlib.resources
-from typing import Callable, List, Optional, Sequence, Union
+from typing import List, Optional, Sequence, Union
 from typing_extensions import TypeAlias
 
 
@@ -96,7 +96,7 @@ def load_shared_library(module_name: str, lib_base_name: str):
 
     # Try to load the shared library, handling potential errors
     try:
-        return ctypes.CDLL(str(path), **cdll_args)
+        return ctypes.CDLL(str(path), **cdll_args) # type: ignore
     except Exception as e:
         raise RuntimeError(f"Failed to load shared library '{path}': {e}")
 
@@ -3684,7 +3684,7 @@ def ggml_scale_inplace(
 
     Returns:
         Pointer to ggml_tensor"""
-    return lib.ggml_scale_inplace(ctx, a, b)
+    return lib.ggml_scale_inplace(ctx, a, s)
 
 
 lib.ggml_scale_inplace.argtypes = [
@@ -9910,7 +9910,7 @@ def ggml_backend_sched_reset(
 # GGML_API void                  ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
 def ggml_backend_sched_set_eval_callback(
     sched: ggml_backend_sched_t,
-    callback,
+    callback, # type: ignore
     user_data: ctypes.c_void_p,
 ):
     return lib.ggml_backend_sched_set_eval_callback(sched, callback, user_data)
@@ -9989,7 +9989,7 @@ def ggml_backend_compare_graph_backend(
     backend1: ggml_backend_t,
     backend2: ggml_backend_t,
     graph: ggml_cgraph_p,
-    callback,
+    callback, # type: ignore
     user_data: ctypes.c_void_p,
 ) -> bool:
     return lib.ggml_backend_compare_graph_backend(
@@ -10375,7 +10375,7 @@ class ggml_backend(ctypes.Structure):
 # GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data);
 def ggml_backend_register(
     name: bytes,
-    init_fn,
+    init_fn, # type: ignore
     default_buffer_type: ggml_backend_buffer_type_t,
     user_data: ctypes.c_void_p,
 ):
@@ -10622,7 +10622,7 @@ def ggml_backend_cuda_get_device_memory(
 #####################################################
 
 
-GGML_USE_METAL = hasattr(lib, "ggml_metal_init")
+GGML_USE_METAL = hasattr(lib, "ggml_backend_metal_init")
 
 
 # // max memory buffers that can be mapped to the device
@@ -10639,7 +10639,7 @@ def ggml_backend_cuda_get_device_memory(
 
 # GGML_API void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
 def ggml_backend_metal_log_set_callback(
-    log_callback,
+    log_callback, # type: ignore
     user_data: ctypes.c_void_p,
 ):
     return lib.ggml_backend_metal_log_set_callback(log_callback, user_data)

diff --git a/tests/test_ggml_backends.py b/tests/test_ggml_backends.py
@@ -5,18 +5,15 @@
 
 import numpy as np
 
-def test_ggml_backend():
-    def get_backend():
-        return ggml.ggml_backend_cpu_init()
-
+def test_ggml_cpu_backend():
     n_tensors = 1 + 2 # input (x) and weights (a, b)
     params = ggml.ggml_init_params(
         mem_size=ggml.ggml_tensor_overhead() * n_tensors, mem_buffer=None, no_alloc=True
     )
     ctx = ggml.ggml_init(params=params)
     assert ctx is not None
 
-    backend = get_backend()
+    backend = ggml.ggml_backend_cpu_init()
 
     assert backend is not None
 

diff --git a/tests/test_ggml_metal.py b/tests/test_ggml_metal.py
@@ -14,6 +14,103 @@
 
 @run_if_ggml_metal_available
 def test_metal():
-    ctx_metal = ggml.ggml_metal_init(1)
-    assert ctx_metal is not None
-    ggml.ggml_metal_free(ctx_metal)
+    n_tensors = 1 + 2 # input (x) and weights (a, b)
+    params = ggml.ggml_init_params(
+        mem_size=ggml.ggml_tensor_overhead() * n_tensors, mem_buffer=None, no_alloc=True
+    )
+    ctx = ggml.ggml_init(params=params)
+    assert ctx is not None
+
+    backend = ggml.ggml_backend_metal_init()
+
+    assert backend is not None
+
+    # create the tensors for input and weights
+    x = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1)
+
+    a = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1)
+    b = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1)
+
+    # allocate the tensors in the backend
+    buffer = ggml.ggml_backend_alloc_ctx_tensors(ctx, backend)
+
+    # set the values of the weights
+    ggml.ggml_backend_tensor_set(
+        a,
+        ctypes.cast(np.array([3.0], dtype=np.single).ctypes.data, ctypes.c_void_p),
+        0,
+        ggml.ggml_nbytes(a),
+    )
+    ggml.ggml_backend_tensor_set(
+        b,
+        ctypes.cast(np.array([4.0], dtype=np.single).ctypes.data, ctypes.c_void_p),
+        0,
+        ggml.ggml_nbytes(a),
+    )
+
+    max_nodes = 4096
+
+    buf_size = ggml.ggml_tensor_overhead() * max_nodes + ggml.ggml_graph_overhead_custom(max_nodes, False)
+    buf = (ctypes.c_uint8 * buf_size)()
+
+    def build_graph(x: ggml.ggml_tensor_p, a: ggml.ggml_tensor_p, b: ggml.ggml_tensor_p):
+        params = ggml.ggml_init_params(
+            mem_size=buf_size, mem_buffer=ctypes.cast(buf, ctypes.c_void_p), no_alloc=True
+        )
+        ctx0 = ggml.ggml_init(params=params)
+
+        assert ctx0 is not None
+
+        gf = ggml.ggml_new_graph_custom(ctx0, max_nodes, False)
+
+        x2 = ggml.ggml_mul(ctx0, x, x)
+        ax2 = ggml.ggml_mul(ctx0, a, x2)
+        f = ggml.ggml_add(ctx0, ax2, b)
+
+        ggml.ggml_set_name(x2, b"x2")
+        ggml.ggml_set_name(ax2, b"ax2")
+        ggml.ggml_set_name(f, b"f")
+
+        ggml.ggml_build_forward_expand(gf, f)
+
+        ggml.ggml_free(ctx0)
+
+        return gf
+
+    allocr = ggml.ggml_allocr_new_measure_from_backend(backend)
+
+    gf = build_graph(x, a, b)
+
+    mem_size = ggml.ggml_allocr_alloc_graph(allocr, gf)
+
+    ggml.ggml_allocr_free(allocr)
+
+    buf_compute = ggml.ggml_backend_alloc_buffer(backend, mem_size)
+    allocr = ggml.ggml_allocr_new_from_buffer(buf_compute)
+
+    ggml.ggml_allocr_reset(allocr)
+
+    gf = build_graph(x, a, b)
+
+    ggml.ggml_allocr_alloc_graph(allocr, gf)
+
+    ggml.ggml_backend_tensor_set(
+        x,
+        ctypes.cast(np.array([2.0], dtype=np.single).ctypes.data, ctypes.c_void_p),
+        0,
+        ggml.ggml_nbytes(x),
+    )
+
+    ggml.ggml_backend_graph_compute(backend, gf)
+
+    f = ggml.ggml_graph_get_tensor(gf, b"f")
+
+    output = np.zeros(1, dtype=np.single)
+    ggml.ggml_backend_tensor_get(f, ctypes.cast(output.ctypes.data, ctypes.c_void_p), 0, ggml.ggml_nbytes(x))
+
+    assert output[0] == 16.0
+
+    ggml.ggml_backend_buffer_free(buffer)
+    ggml.ggml_backend_buffer_free(buf_compute)
+    ggml.ggml_backend_free(backend)
+    ggml.ggml_free(ctx)