-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CI-Examples] Add Candle ML framework example
Candle is a minimalist ML framework for Rust with a focus on performance and ease of use. This commit adds two examples with Candle: simple matrix multiplication (to quickly test functionality) and Quantized LLaMA (to test performance). Signed-off-by: Dmitrii Kuvaiskii <[email protected]>
- Loading branch information
Dmitrii Kuvaiskii
committed
Jul 16, 2024
1 parent
ee079d4
commit 7c5301c
Showing
6 changed files
with
204 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
/candle_matmul | ||
/candle_quantized | ||
/src | ||
|
||
# model | ||
/*.bin | ||
/*.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
# Copyright (C) 2024 Gramine contributors | ||
# SPDX-License-Identifier: BSD-3-Clause | ||
|
||
ARCH_LIBDIR ?= /lib/$(shell $(CC) -dumpmachine) | ||
|
||
ifeq ($(DEBUG),1) | ||
GRAMINE_LOG_LEVEL = debug | ||
else | ||
GRAMINE_LOG_LEVEL = error | ||
endif | ||
|
||
SRCDIR = src | ||
|
||
.PHONY: all | ||
all: candle_matmul candle_matmul.manifest candle_quantized candle_quantized.manifest | ||
ifeq ($(SGX),1) | ||
all: candle_matmul.manifest.sgx candle_matmul.sig candle_quantized.manifest.sgx candle_quantized.sig | ||
endif | ||
|
||
######################### Simple Matrix Multiplication ######################### | ||
|
||
$(SRCDIR)/candle_matmul/target/debug/candle_matmul: | ||
mkdir -p $(SRCDIR) && cd $(SRCDIR) && \ | ||
cargo new candle_matmul && cd candle_matmul && \ | ||
cargo add --git https://github.com/huggingface/candle.git candle-core && \ | ||
cp ../../prepared_matmul_src/main.rs ./src/main.rs && \ | ||
cargo build | ||
|
||
candle_matmul: $(SRCDIR)/candle_matmul/target/debug/candle_matmul | ||
cp $< $@ | ||
|
||
candle_matmul.manifest: candle_matmul.manifest.template | ||
gramine-manifest \ | ||
-Dlog_level=$(GRAMINE_LOG_LEVEL) \ | ||
-Darch_libdir=$(ARCH_LIBDIR) \ | ||
$< > $@ | ||
|
||
candle_matmul.manifest.sgx candle_matmul.sig: candle_matmul_sgx_sign | ||
@: | ||
|
||
.INTERMEDIATE: candle_matmul_sgx_sign | ||
candle_matmul_sgx_sign: candle_matmul.manifest candle_matmul | ||
gramine-sgx-sign \ | ||
--manifest $< \ | ||
--output $<.sgx | ||
|
||
############################## Quantized LLaMA ################################# | ||
|
||
llama-2-7b.ggmlv3.q4_0.bin: | ||
../common_tools/download --output $@ \ | ||
--sha256 bfa26d855e44629c4cf919985e90bd7fa03b77eea1676791519e39a4d45fd4d5 \ | ||
--url https://huggingface.co/TheBloke/Llama-2-7B-GGML/resolve/main/$@ | ||
|
||
tokenizer.json: | ||
../common_tools/download --output $@ \ | ||
--sha256 8eea70c4866c4f1320ba096fc986ac82038a8374dbe135212ba7628835b4a6f1 \ | ||
--url https://huggingface.co/hf-internal-testing/llama-tokenizer/raw/main/$@ | ||
|
||
$(SRCDIR)/candle_quantized/target/release/examples/quantized: llama-2-7b.ggmlv3.q4_0.bin tokenizer.json | ||
mkdir -p $(SRCDIR) && cd $(SRCDIR) && \ | ||
git clone https://github.com/huggingface/candle.git candle_quantized && \ | ||
cd candle_quantized && \ | ||
cargo build --example quantized --release | ||
|
||
candle_quantized: $(SRCDIR)/candle_quantized/target/release/examples/quantized | ||
cp $< $@ | ||
|
||
candle_quantized.manifest: candle_quantized.manifest.template | ||
gramine-manifest \ | ||
-Dlog_level=$(GRAMINE_LOG_LEVEL) \ | ||
-Darch_libdir=$(ARCH_LIBDIR) \ | ||
$< > $@ | ||
|
||
candle_quantized.manifest.sgx candle_quantized.sig: candle_quantized_sgx_sign | ||
@: | ||
|
||
.INTERMEDIATE: candle_quantized_sgx_sign | ||
candle_quantized_sgx_sign: candle_quantized.manifest candle_quantized | ||
gramine-sgx-sign \ | ||
--manifest $< \ | ||
--output $<.sgx | ||
.PHONY: clean | ||
clean: | ||
$(RM) *.token *.sig *.manifest.sgx *.manifest candle_matmul candle_quantized | ||
|
||
.PHONY: distclean | ||
distclean: clean | ||
$(RM) -r $(SRCDIR) *.tar.gz *.bin *.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# Candle | ||
|
||
Candle is a minimalist ML framework for Rust with a focus on performance | ||
(including GPU support) and ease of use: https://github.com/huggingface/candle | ||
|
||
This directory contains the Makefile and the template manifest for the most | ||
recent version of Candle as of this writing (v0.6.0). | ||
|
||
# Warning | ||
|
||
The `candle_quantized` app will download ~4GB of data (model + tokenizer). This | ||
happens automatically in the Makefile. | ||
|
||
# Quick Start | ||
|
||
```sh | ||
# build Candle (uses Rust Cargo) and the final manifest | ||
make SGX=1 | ||
|
||
# run simple matrix multiplication | ||
# example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#get-started | ||
./candle_matmul | ||
gramine-direct ./candle_matmul | ||
gramine-sgx ./candle_matmul | ||
|
||
# run Quantized LLaMA (quantized version of the LLaMA model) | ||
# note that for Gramine, the cmdline args are already defined in the manifest file | ||
# example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#check-out-our-examples | ||
RAYON_NUM_THREADS=36 ./candle_quantized \ | ||
--model llama-2-7b.ggmlv3.q4_0.bin --tokenizer tokenizer.json --sample-len 200 | ||
RAYON_NUM_THREADS=36 gramine-direct ./candle_quantized | ||
RAYON_NUM_THREADS=36 gramine-sgx ./candle_quantized | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# Copyright (C) 2024 Gramine contributors | ||
# SPDX-License-Identifier: BSD-3-Clause | ||
|
||
libos.entrypoint = "/candle_matmul" | ||
|
||
loader.log_level = "{{ log_level }}" | ||
|
||
loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}" | ||
|
||
fs.mounts = [ | ||
{ path = "/candle_matmul", uri = "file:candle_matmul" }, | ||
{ path = "/lib", uri = "file:{{ gramine.runtimedir() }}" }, | ||
{ path = "{{ arch_libdir }}/libgcc_s.so.1", uri = "file:{{ arch_libdir }}/libgcc_s.so.1" }, | ||
] | ||
|
||
sgx.debug = true | ||
sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }} | ||
sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '16' }} | ||
sgx.enclave_size = "1G" | ||
|
||
sgx.trusted_files = [ | ||
"file:candle_matmul", | ||
"file:{{ gramine.runtimedir() }}/", | ||
"file:{{ arch_libdir }}/libgcc_s.so.1", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# Copyright (C) 2024 Gramine contributors | ||
# SPDX-License-Identifier: BSD-3-Clause | ||
|
||
libos.entrypoint = "/candle_quantized" | ||
|
||
loader.log_level = "{{ log_level }}" | ||
|
||
loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}" | ||
loader.env.RAYON_NUM_THREADS = { passthrough = true } | ||
|
||
loader.argv = [ "candle_quantized", "--model", "llama-2-7b.ggmlv3.q4_0.bin", | ||
"--tokenizer", "tokenizer.json", "--sample-len", "200" ] | ||
|
||
fs.mounts = [ | ||
{ path = "/candle_quantized", uri = "file:candle_quantized" }, | ||
{ path = "/lib", uri = "file:{{ gramine.runtimedir() }}" }, | ||
{ path = "{{ arch_libdir }}", uri = "file:{{ arch_libdir }}" }, | ||
|
||
{ path = "/llama-2-7b.ggmlv3.q4_0.bin", uri = "file:llama-2-7b.ggmlv3.q4_0.bin" }, | ||
{ path = "/tokenizer.json", uri = "file:tokenizer.json" }, | ||
] | ||
|
||
sgx.debug = true | ||
sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }} | ||
sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '256' }} | ||
sgx.enclave_size = "32G" | ||
|
||
sgx.trusted_files = [ | ||
"file:candle_quantized", | ||
"file:{{ gramine.runtimedir() }}/", | ||
"file:{{ arch_libdir }}/libcrypto.so.3", | ||
"file:{{ arch_libdir }}/libgcc_s.so.1", | ||
"file:{{ arch_libdir }}/libssl.so.3", | ||
|
||
"file:llama-2-7b.ggmlv3.q4_0.bin", | ||
"file:tokenizer.json", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
// example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#get-started | ||
|
||
use candle_core::{Device, Tensor}; | ||
|
||
fn main() -> Result<(), Box<dyn std::error::Error>> { | ||
let device = Device::Cpu; | ||
|
||
let a = Tensor::randn(0f32, 1., (2, 3), &device)?; | ||
let b = Tensor::randn(0f32, 1., (3, 4), &device)?; | ||
|
||
let c = a.matmul(&b)?; | ||
println!("{c}"); | ||
Ok(()) | ||
} |