local-apps: update node-llama-cpp snippet (#1169)

giladgd · ngxson · web-flow · commit f0361f3bb8b0 · 2025-02-26T12:16:41.000+01:00
This PR updates the code snippet of `node-llama-cpp` to use a tag-based model URI, like in `llama.cpp` and Ollama. The implementation for this uses the Ollama support on Hugging Face, and is based on this PR: ggml-org/llama.cpp#11195 Examples: ```bash # chat with the model npx -y node-llama-cpp chat hf:mradermacher/Llama-3.2-3B-Instruct-GGUF:Q4_K_M # estimate the model compatibility with the current machine hardware npx -y node-llama-cpp inspect estimate hf:mradermacher/Llama-3.2-3B-Instruct-GGUF:Q4_K_M ``` Co-authored-by: Xuan-Son Nguyen <son@huggingface.co>
diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts
@@ -138,18 +138,21 @@ const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[]
 };
 
 const snippetNodeLlamaCppCli = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
+	let tagName = "{{OLLAMA_TAG}}";
+
+	if (filepath) {
+		const quantLabel = parseGGUFQuantLabel(filepath);
+		tagName = quantLabel ? `:${quantLabel}` : tagName;
+	}
+
 	return [
 		{
 			title: "Chat with the model",
-			content: [
-				`npx -y node-llama-cpp chat \\`,
-				`  --model "hf:${model.id}/${filepath ?? "{{GGUF_FILE}}"}" \\`,
-				`  --prompt 'Hi there!'`,
-			].join("\n"),
+			content: `npx -y node-llama-cpp chat hf:${model.id}${tagName}`,
 		},
 		{
 			title: "Estimate the model compatibility with your hardware",
-			content: `npx -y node-llama-cpp inspect estimate "hf:${model.id}/${filepath ?? "{{GGUF_FILE}}"}"`,
+			content: `npx -y node-llama-cpp inspect estimate hf:${model.id}${tagName}`,
 		},
 	];
 };