Skip to content

Commit 645c58d

Browse files
authored
local-apps: update llama.cpp snippet (#1103)
This change is related to these upstream PR: - ggml-org/llama.cpp#11195 allows using tag-based repo name like on ollama - ggml-org/llama.cpp#11214 automatically turn on `--conversation` mode for models having chat template Example: ```sh # for "instruct" model, conversation mode is enabled automatically llama-cli -hf bartowski/Llama-3.2-1B-Instruct-GGUF # for non-instruct model, it runs as completion llama-cli -hf TheBloke/Llama-2-7B-GGUF -p "Once upon a time," ```
1 parent 455f12c commit 645c58d

File tree

2 files changed

+11
-14
lines changed

2 files changed

+11
-14
lines changed

packages/tasks/src/local-apps.spec.ts

+2-8
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,7 @@ describe("local-apps", () => {
1313
const snippet = snippetFunc(model);
1414

1515
expect(snippet[0].content).toEqual(`# Load and run the model:
16-
llama-cli \\
17-
--hf-repo "bartowski/Llama-3.2-3B-Instruct-GGUF" \\
18-
--hf-file {{GGUF_FILE}} \\
19-
-p "You are a helpful assistant" \\
20-
--conversation`);
16+
llama-cli -hf bartowski/Llama-3.2-3B-Instruct-GGUF`);
2117
});
2218

2319
it("llama.cpp non-conversational", async () => {
@@ -30,9 +26,7 @@ llama-cli \\
3026
const snippet = snippetFunc(model);
3127

3228
expect(snippet[0].content).toEqual(`# Load and run the model:
33-
llama-cli \\
34-
--hf-repo "mlabonne/gemma-2b-GGUF" \\
35-
--hf-file {{GGUF_FILE}} \\
29+
llama-cli -hf mlabonne/gemma-2b-GGUF \\
3630
-p "Once upon a time,"`);
3731
});
3832

packages/tasks/src/local-apps.ts

+9-6
Original file line numberDiff line numberDiff line change
@@ -95,17 +95,20 @@ function isMlxModel(model: ModelData) {
9595
}
9696

9797
const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
98+
let tagName = "";
99+
if (filepath) {
100+
const quantLabel = parseGGUFQuantLabel(filepath);
101+
tagName = quantLabel ? `:${quantLabel}` : "";
102+
}
98103
const command = (binary: string) => {
99104
const snippet = [
100105
"# Load and run the model:",
101-
`${binary} \\`,
102-
` --hf-repo "${model.id}" \\`,
103-
` --hf-file ${filepath ?? "{{GGUF_FILE}}"} \\`,
104-
` -p "${model.tags.includes("conversational") ? "You are a helpful assistant" : "Once upon a time,"}"`,
106+
`${binary} -hf ${model.id}${tagName}`,
105107
];
106-
if (model.tags.includes("conversational")) {
108+
if (!model.tags.includes("conversational")) {
109+
// for non-conversational models, add a prompt
107110
snippet[snippet.length - 1] += " \\";
108-
snippet.push(" --conversation");
111+
snippet.push(" -p \"Once upon a time,\"");
109112
}
110113
return snippet.join("\n");
111114
};

0 commit comments

Comments
 (0)