73
73
#include < sys/syslimits.h>
74
74
#endif
75
75
#define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
76
+
77
+ //
78
+ // CURL utils
79
+ //
80
+
81
+ using curl_ptr = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
82
+
83
+ // cannot use unique_ptr for curl_slist, because we cannot update without destroying the old one
84
+ struct curl_slist_ptr {
85
+ struct curl_slist * ptr = nullptr ;
86
+ ~curl_slist_ptr () {
87
+ if (ptr) {
88
+ curl_slist_free_all (ptr);
89
+ }
90
+ }
91
+ };
76
92
#endif // LLAMA_USE_CURL
77
93
78
94
using json = nlohmann::ordered_json;
@@ -1130,7 +1146,8 @@ static bool curl_perform_with_retry(const std::string & url, CURL * curl, int ma
1130
1146
1131
1147
static bool common_download_file (const std::string & url, const std::string & path, const std::string & hf_token) {
1132
1148
// Initialize libcurl
1133
- std::unique_ptr<CURL, decltype (&curl_easy_cleanup)> curl (curl_easy_init (), &curl_easy_cleanup);
1149
+ curl_ptr curl (curl_easy_init (), &curl_easy_cleanup);
1150
+ curl_slist_ptr http_headers;
1134
1151
if (!curl) {
1135
1152
LOG_ERR (" %s: error initializing libcurl\n " , __func__);
1136
1153
return false ;
@@ -1144,11 +1161,9 @@ static bool common_download_file(const std::string & url, const std::string & pa
1144
1161
1145
1162
// Check if hf-token or bearer-token was specified
1146
1163
if (!hf_token.empty ()) {
1147
- std::string auth_header = " Authorization: Bearer " ;
1148
- auth_header += hf_token.c_str ();
1149
- struct curl_slist *http_headers = NULL ;
1150
- http_headers = curl_slist_append (http_headers, auth_header.c_str ());
1151
- curl_easy_setopt (curl.get (), CURLOPT_HTTPHEADER, http_headers);
1164
+ std::string auth_header = " Authorization: Bearer " + hf_token;
1165
+ http_headers.ptr = curl_slist_append (http_headers.ptr , auth_header.c_str ());
1166
+ curl_easy_setopt (curl.get (), CURLOPT_HTTPHEADER, http_headers.ptr );
1152
1167
}
1153
1168
1154
1169
#if defined(_WIN32)
@@ -1444,6 +1459,80 @@ struct llama_model * common_load_model_from_hf(
1444
1459
return common_load_model_from_url (model_url, local_path, hf_token, params);
1445
1460
}
1446
1461
1462
+ /* *
1463
+ * Allow getting the HF file from the HF repo with tag (like ollama), for example:
1464
+ * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
1465
+ * - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M
1466
+ * - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s
1467
+ * Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo)
1468
+ *
1469
+ * Return pair of <repo, file> (with "repo" already having tag removed)
1470
+ *
1471
+ * Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files.
1472
+ */
1473
+ std::pair<std::string, std::string> common_get_hf_file (const std::string & hf_repo_with_tag, const std::string & hf_token) {
1474
+ auto parts = string_split<std::string>(hf_repo_with_tag, ' :' );
1475
+ std::string tag = parts.size () > 1 ? parts.back () : " latest" ;
1476
+ std::string hf_repo = parts[0 ];
1477
+ if (string_split<std::string>(hf_repo, ' /' ).size () != 2 ) {
1478
+ throw std::invalid_argument (" error: invalid HF repo format, expected <user>/<model>[:quant]\n " );
1479
+ }
1480
+
1481
+ // fetch model info from Hugging Face Hub API
1482
+ json model_info;
1483
+ curl_ptr curl (curl_easy_init (), &curl_easy_cleanup);
1484
+ curl_slist_ptr http_headers;
1485
+ std::string res_str;
1486
+ std::string url = " https://huggingface.co/v2/" + hf_repo + " /manifests/" + tag;
1487
+ curl_easy_setopt (curl.get (), CURLOPT_URL, url.c_str ());
1488
+ curl_easy_setopt (curl.get (), CURLOPT_NOPROGRESS, 1L );
1489
+ typedef size_t (*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
1490
+ auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
1491
+ static_cast <std::string *>(data)->append ((char * ) ptr, size * nmemb);
1492
+ return size * nmemb;
1493
+ };
1494
+ curl_easy_setopt (curl.get (), CURLOPT_WRITEFUNCTION, static_cast <CURLOPT_WRITEFUNCTION_PTR>(write_callback));
1495
+ curl_easy_setopt (curl.get (), CURLOPT_WRITEDATA, &res_str);
1496
+ #if defined(_WIN32)
1497
+ curl_easy_setopt (curl.get (), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
1498
+ #endif
1499
+ if (!hf_token.empty ()) {
1500
+ std::string auth_header = " Authorization: Bearer " + hf_token;
1501
+ http_headers.ptr = curl_slist_append (http_headers.ptr , auth_header.c_str ());
1502
+ }
1503
+ // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
1504
+ http_headers.ptr = curl_slist_append (http_headers.ptr , " User-Agent: llama-cpp" );
1505
+ http_headers.ptr = curl_slist_append (http_headers.ptr , " Accept: application/json" );
1506
+ curl_easy_setopt (curl.get (), CURLOPT_HTTPHEADER, http_headers.ptr );
1507
+
1508
+ CURLcode res = curl_easy_perform (curl.get ());
1509
+
1510
+ if (res != CURLE_OK) {
1511
+ throw std::runtime_error (" error: cannot make GET request to HF API" );
1512
+ }
1513
+
1514
+ long res_code;
1515
+ curl_easy_getinfo (curl.get (), CURLINFO_RESPONSE_CODE, &res_code);
1516
+ if (res_code == 200 ) {
1517
+ model_info = json::parse (res_str);
1518
+ } else if (res_code == 401 ) {
1519
+ throw std::runtime_error (" error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token" );
1520
+ } else {
1521
+ throw std::runtime_error (string_format (" error from HF API, response code: %ld, data: %s" , res_code, res_str.c_str ()));
1522
+ }
1523
+
1524
+ // check response
1525
+ if (!model_info.contains (" ggufFile" )) {
1526
+ throw std::runtime_error (" error: model does not have ggufFile" );
1527
+ }
1528
+ json & gguf_file = model_info.at (" ggufFile" );
1529
+ if (!gguf_file.contains (" rfilename" )) {
1530
+ throw std::runtime_error (" error: ggufFile does not have rfilename" );
1531
+ }
1532
+
1533
+ return std::make_pair (hf_repo, gguf_file.at (" rfilename" ));
1534
+ }
1535
+
1447
1536
#else
1448
1537
1449
1538
struct llama_model * common_load_model_from_url (
@@ -1465,6 +1554,11 @@ struct llama_model * common_load_model_from_hf(
1465
1554
return nullptr ;
1466
1555
}
1467
1556
1557
+ std::pair<std::string, std::string> common_get_hf_file (const std::string &, const std::string &) {
1558
+ LOG_WRN (" %s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n " , __func__);
1559
+ return std::make_pair (" " , " " );
1560
+ }
1561
+
1468
1562
#endif // LLAMA_USE_CURL
1469
1563
1470
1564
//
0 commit comments