Skip to content

Commit

Permalink
fix: incorrect check for GGML_USE_METAL
Browse files Browse the repository at this point in the history
  • Loading branch information
abetlen committed Feb 9, 2024
1 parent 631a8f5 commit 2eaf53d
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 16 deletions.
16 changes: 8 additions & 8 deletions ggml/ggml.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
import ctypes
import pathlib
import importlib.resources
from typing import Callable, List, Optional, Sequence, Union
from typing import List, Optional, Sequence, Union
from typing_extensions import TypeAlias


Expand Down Expand Up @@ -96,7 +96,7 @@ def load_shared_library(module_name: str, lib_base_name: str):

# Try to load the shared library, handling potential errors
try:
return ctypes.CDLL(str(path), **cdll_args)
return ctypes.CDLL(str(path), **cdll_args) # type: ignore
except Exception as e:
raise RuntimeError(f"Failed to load shared library '{path}': {e}")

Expand Down Expand Up @@ -3684,7 +3684,7 @@ def ggml_scale_inplace(
Returns:
Pointer to ggml_tensor"""
return lib.ggml_scale_inplace(ctx, a, b)
return lib.ggml_scale_inplace(ctx, a, s)


lib.ggml_scale_inplace.argtypes = [
Expand Down Expand Up @@ -9910,7 +9910,7 @@ def ggml_backend_sched_reset(
# GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
def ggml_backend_sched_set_eval_callback(
sched: ggml_backend_sched_t,
callback,
callback, # type: ignore
user_data: ctypes.c_void_p,
):
return lib.ggml_backend_sched_set_eval_callback(sched, callback, user_data)
Expand Down Expand Up @@ -9989,7 +9989,7 @@ def ggml_backend_compare_graph_backend(
backend1: ggml_backend_t,
backend2: ggml_backend_t,
graph: ggml_cgraph_p,
callback,
callback, # type: ignore
user_data: ctypes.c_void_p,
) -> bool:
return lib.ggml_backend_compare_graph_backend(
Expand Down Expand Up @@ -10375,7 +10375,7 @@ class ggml_backend(ctypes.Structure):
# GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data);
def ggml_backend_register(
name: bytes,
init_fn,
init_fn, # type: ignore
default_buffer_type: ggml_backend_buffer_type_t,
user_data: ctypes.c_void_p,
):
Expand Down Expand Up @@ -10622,7 +10622,7 @@ def ggml_backend_cuda_get_device_memory(
#####################################################


GGML_USE_METAL = hasattr(lib, "ggml_metal_init")
GGML_USE_METAL = hasattr(lib, "ggml_backend_metal_init")


# // max memory buffers that can be mapped to the device
Expand All @@ -10639,7 +10639,7 @@ def ggml_backend_cuda_get_device_memory(

# GGML_API void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
def ggml_backend_metal_log_set_callback(
log_callback,
log_callback, # type: ignore
user_data: ctypes.c_void_p,
):
return lib.ggml_backend_metal_log_set_callback(log_callback, user_data)
Expand Down
7 changes: 2 additions & 5 deletions tests/test_ggml_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,15 @@

import numpy as np

def test_ggml_backend():
def get_backend():
return ggml.ggml_backend_cpu_init()

def test_ggml_cpu_backend():
n_tensors = 1 + 2 # input (x) and weights (a, b)
params = ggml.ggml_init_params(
mem_size=ggml.ggml_tensor_overhead() * n_tensors, mem_buffer=None, no_alloc=True
)
ctx = ggml.ggml_init(params=params)
assert ctx is not None

backend = get_backend()
backend = ggml.ggml_backend_cpu_init()

assert backend is not None

Expand Down
103 changes: 100 additions & 3 deletions tests/test_ggml_metal.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,103 @@

@run_if_ggml_metal_available
def test_metal():
ctx_metal = ggml.ggml_metal_init(1)
assert ctx_metal is not None
ggml.ggml_metal_free(ctx_metal)
n_tensors = 1 + 2 # input (x) and weights (a, b)
params = ggml.ggml_init_params(
mem_size=ggml.ggml_tensor_overhead() * n_tensors, mem_buffer=None, no_alloc=True
)
ctx = ggml.ggml_init(params=params)
assert ctx is not None

backend = ggml.ggml_backend_metal_init()

assert backend is not None

# create the tensors for input and weights
x = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1)

a = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1)
b = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1)

# allocate the tensors in the backend
buffer = ggml.ggml_backend_alloc_ctx_tensors(ctx, backend)

# set the values of the weights
ggml.ggml_backend_tensor_set(
a,
ctypes.cast(np.array([3.0], dtype=np.single).ctypes.data, ctypes.c_void_p),
0,
ggml.ggml_nbytes(a),
)
ggml.ggml_backend_tensor_set(
b,
ctypes.cast(np.array([4.0], dtype=np.single).ctypes.data, ctypes.c_void_p),
0,
ggml.ggml_nbytes(a),
)

max_nodes = 4096

buf_size = ggml.ggml_tensor_overhead() * max_nodes + ggml.ggml_graph_overhead_custom(max_nodes, False)
buf = (ctypes.c_uint8 * buf_size)()

def build_graph(x: ggml.ggml_tensor_p, a: ggml.ggml_tensor_p, b: ggml.ggml_tensor_p):
params = ggml.ggml_init_params(
mem_size=buf_size, mem_buffer=ctypes.cast(buf, ctypes.c_void_p), no_alloc=True
)
ctx0 = ggml.ggml_init(params=params)

assert ctx0 is not None

gf = ggml.ggml_new_graph_custom(ctx0, max_nodes, False)

x2 = ggml.ggml_mul(ctx0, x, x)
ax2 = ggml.ggml_mul(ctx0, a, x2)
f = ggml.ggml_add(ctx0, ax2, b)

ggml.ggml_set_name(x2, b"x2")
ggml.ggml_set_name(ax2, b"ax2")
ggml.ggml_set_name(f, b"f")

ggml.ggml_build_forward_expand(gf, f)

ggml.ggml_free(ctx0)

return gf

allocr = ggml.ggml_allocr_new_measure_from_backend(backend)

gf = build_graph(x, a, b)

mem_size = ggml.ggml_allocr_alloc_graph(allocr, gf)

ggml.ggml_allocr_free(allocr)

buf_compute = ggml.ggml_backend_alloc_buffer(backend, mem_size)
allocr = ggml.ggml_allocr_new_from_buffer(buf_compute)

ggml.ggml_allocr_reset(allocr)

gf = build_graph(x, a, b)

ggml.ggml_allocr_alloc_graph(allocr, gf)

ggml.ggml_backend_tensor_set(
x,
ctypes.cast(np.array([2.0], dtype=np.single).ctypes.data, ctypes.c_void_p),
0,
ggml.ggml_nbytes(x),
)

ggml.ggml_backend_graph_compute(backend, gf)

f = ggml.ggml_graph_get_tensor(gf, b"f")

output = np.zeros(1, dtype=np.single)
ggml.ggml_backend_tensor_get(f, ctypes.cast(output.ctypes.data, ctypes.c_void_p), 0, ggml.ggml_nbytes(x))

assert output[0] == 16.0

ggml.ggml_backend_buffer_free(buffer)
ggml.ggml_backend_buffer_free(buf_compute)
ggml.ggml_backend_free(backend)
ggml.ggml_free(ctx)

0 comments on commit 2eaf53d

Please sign in to comment.