Skip to content
This repository has been archived by the owner on Jun 24, 2024. It is now read-only.

Added support for cuBLAS and CLBlast in ggml. #282

Merged
merged 29 commits into from
Jun 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
38f5db3
Added support for cuBLAS and CLBlast in ggml.
darxkies May 28, 2023
8006e1d
Merge branch 'main' into ggml-blas
darxkies Jun 8, 2023
4d12f70
Added cuBLAS/Windows support
darxkies Jun 8, 2023
ce0328c
Added support for cuda and opencl to `generate-ggml-bindings`
LLukas22 Jun 9, 2023
acc9542
Formatting
LLukas22 Jun 9, 2023
3365a28
Merge branch 'main' into ggml-blas
darxkies Jun 10, 2023
a0a4669
Updated code for llama-cpp. cuBLAS and CLBlast work with Arch. Window…
darxkies Jun 10, 2023
022a075
Added untested support for Metal. The Metal bindings might need to be
darxkies Jun 10, 2023
a08a749
Windows/CLBlast works at least on my setup. It needs more testing.
darxkies Jun 10, 2023
8666654
Fixing Metal...
darxkies Jun 10, 2023
6f4d404
MacOS Refactoring
darxkies Jun 10, 2023
595dd9f
Refactored API bindings to make them consistent.
darxkies Jun 11, 2023
ec308d5
Added docu for building with acceleration backends
LLukas22 Jun 11, 2023
54b4344
Fixed Windows ggml bindgen and refactored the code
darxkies Jun 11, 2023
0391474
Refactored used paths
darxkies Jun 11, 2023
6891d7c
Fixed Windows/cuBLAS
darxkies Jun 11, 2023
d83a5ab
The relevant environment variables are displayed if the compilation f…
darxkies Jun 11, 2023
2ef08c3
Updated the documentation
darxkies Jun 11, 2023
942143c
clippy + fmt
LLukas22 Jun 12, 2023
c6b363d
Merge branch 'main' into pr/282
LLukas22 Jun 12, 2023
5946580
Added documentation for windows
LLukas22 Jun 12, 2023
0d88100
Updated llama.cpp
darxkies Jun 12, 2023
65ce0d3
Added pixelspark's Metal fix
darxkies Jun 14, 2023
ef4e644
docu remove clblast from macos
LLukas22 Jun 16, 2023
39ba320
Removed rustflag and added disclaimer to docu
LLukas22 Jun 16, 2023
b401251
Chore: Updated LLama.cpp
LLukas22 Jun 16, 2023
abcf77b
Updated llama.cpp, generated bindings, and removed MacOS/CLBlast refe…
darxkies Jun 18, 2023
b41b26f
Update CONTRIBUTING.md
LLukas22 Jun 18, 2023
ddadb65
Merge branch 'ggml-blas' of https://github.com/darxkies/llm into pr/282
LLukas22 Jun 18, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ dependency from being built in debug mode:
[profile.dev.package.ggml-sys]
opt-level = 3
```
## Leverage Accelerators with `llm`

The `llm` library is engineered to take advantage of hardware accelerators such as `cuda` and `metal` for optimized performance.

To enable `llm` to harness these accelerators, some preliminary configuration steps are necessary, which vary based on your operating system. For comprehensive guidance, please refer to the [Acceleration Support for Building section](doc/CONTRIBUTING.md#acceleration-support-for-building) in our documentation.

## Using `llm` from Other Languages

Expand Down
87 changes: 68 additions & 19 deletions binaries/generate-ggml-bindings/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,45 +6,94 @@ use std::fs;
use std::path::PathBuf;

fn main() {
let sys_path = PathBuf::from("crates").join("ggml").join("sys");
let ggml_path = sys_path.join("llama-cpp");
let include_path = ggml_path.to_str().unwrap().to_string();
let src_path = sys_path.join("src");

let bindings = bindgen::Builder::default()
.header("crates/ggml/sys/llama-cpp/ggml.h")
.header(ggml_path.join("ggml.h").to_str().unwrap().to_string())
.allowlist_file(r".*ggml.h")
// Suppress some warnings
.raw_line("#![allow(non_upper_case_globals)]")
.raw_line("#![allow(non_camel_case_types)]")
.raw_line("#![allow(non_snake_case)]")
.raw_line("#![allow(unused)]")
.raw_line(r#"#[cfg(feature = "cublas")]"#)
.raw_line("pub mod cuda;")
.raw_line(r#"#[cfg(feature = "metal")]"#)
.raw_line("pub mod metal;")
.raw_line(r#"#[cfg(feature = "clblast")]"#)
.raw_line("pub mod opencl;")
// Only generate code if it's from GGML
.allowlist_file("crates/ggml/.*")
.generate()
.expect("Unable to generate bindings");

let out_path = PathBuf::from("crates")
.join("ggml")
.join("sys")
.join("src")
.join("lib.rs");
bindgen::Builder::default()
.header(ggml_path.join("ggml-cuda.h").to_str().unwrap().to_string())
.allowlist_file(r".*ggml-cuda\.h")
.allowlist_recursively(false)
.clang_arg("-I")
.clang_arg(&include_path)
.raw_line("use super::ggml_compute_params;")
.raw_line("use super::ggml_tensor;")
.generate()
.expect("Unable to generate cuda bindings")
.write_to_file(src_path.join("cuda.rs"))
.expect("Couldn't write cuda bindings");

bindgen::Builder::default()
.header(
ggml_path
.join("ggml-opencl.h")
.to_str()
.unwrap()
.to_string(),
)
.allowlist_file(r".*ggml-opencl\.h")
.allowlist_recursively(false)
.clang_arg("-I")
.clang_arg(&include_path)
.raw_line("use super::ggml_tensor;")
.generate()
.expect("Unable to generate opencl bindings")
.write_to_file(src_path.join("opencl.rs"))
.expect("Couldn't write opencl bindings");

bindgen::Builder::default()
.header(ggml_path.join("ggml-metal.h").to_str().unwrap().to_string())
.allowlist_file(r".*ggml-metal\.h")
.allowlist_recursively(false)
.clang_arg("-I")
.clang_arg(&include_path)
.generate()
.expect("Unable to generate metal bindings")
.write_to_file(src_path.join("metal.rs"))
.expect("Couldn't write metal bindings");

let mut generated_bindings = bindings.to_string();

if cfg!(windows) {
// windows generates all ::std::os::raw::c_* enum types as i32.
// We need to replace some of them with c_uint as the rust bindings expect them to be unsigned.
// Temporary hack until bindgen supports defining the enum types manually. See https://github.com/rust-lang/rust-bindgen/issues/1907
generated_bindings = generated_bindings.replace(
"ggml_type = ::std::os::raw::c_int;",
"ggml_type = ::std::os::raw::c_uint;",
);
generated_bindings = generated_bindings.replace(
"ggml_backend = ::std::os::raw::c_int;",
"ggml_backend = ::std::os::raw::c_uint;",
);
generated_bindings = generated_bindings.replace(
"ggml_op = ::std::os::raw::c_int;",
"ggml_op = ::std::os::raw::c_uint;",
);
for name in &[
"type",
"backend",
"op",
"linesearch",
"opt_type",
"task_type",
] {
generated_bindings = generated_bindings.replace(
&format!("ggml_{name} = ::std::os::raw::c_int;"),
&format!("ggml_{name} = ::std::os::raw::c_uint;"),
);
}
}

fs::write(out_path, generated_bindings).expect("Couldn't write bindings");
fs::write(src_path.join("lib.rs"), generated_bindings).expect("Couldn't write bindings");

println!("Successfully updated bindings");
}
5 changes: 5 additions & 0 deletions binaries/llm-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,8 @@ num_cpus = "1.15.0"

color-eyre = { version = "0.6.2", default-features = false }
zstd = { version = "0.12", default-features = false }

[features]
cublas = ["llm/cublas"]
clblast = ["llm/clblast"]
metal = ["llm/metal"]
4 changes: 4 additions & 0 deletions crates/ggml/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,7 @@ ggml-sys = { path = "sys", version = "0.2.0-dev" }
rand = { workspace = true }
anyhow = { workspace = true }

[features]
cublas = ["ggml-sys/cublas"]
clblast = ["ggml-sys/clblast"]
metal = ["ggml-sys/metal"]
5 changes: 5 additions & 0 deletions crates/ggml/sys/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,8 @@ license = "MIT"

[build-dependencies]
cc = "^1.0"

[features]
cublas = []
clblast = []
metal = []
Loading