From bfd2f21fb43525a8757a8c9e44032fd14bac222b Mon Sep 17 00:00:00 2001
From: Francis Couture-Harpin <git@compilade.net>
Date: Fri, 28 Jun 2024 20:38:12 -0400
Subject: [PATCH] bitnet : replace 1.58b with b1.58, as in the paper

---
 convert-hf-to-gguf.py          | 2 +-
 examples/quantize/quantize.cpp | 4 ++--
 ggml/src/ggml-common.h         | 2 +-
 ggml/src/ggml-quants.c         | 2 +-
 src/llama.cpp                  | 4 ++--
 5 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 2bf0967ce4f91..eb5aaebac63af 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -300,7 +300,7 @@ def write_tensors(self):
 
                 if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32:
                     # TODO: cleaner model-specific per-tensor types
-                    # NOTE: Q1_3 is only relevant for BitNet 1.58b
+                    # NOTE: Q1_3 is only relevant for BitNet b1.58
                     if (
                         self.ftype == gguf.LlamaFileType.MOSTLY_Q1_3
                         and gguf.can_quantize_to_q1_3(data)
diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp
index 43241df6087c7..aed39a4d00777 100644
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@@ -26,8 +26,8 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
     { "IQ2_M",  LLAMA_FTYPE_MOSTLY_IQ2_M,  " 2.7  bpw quantization",            },
     { "IQ1_S",  LLAMA_FTYPE_MOSTLY_IQ1_S,  " 1.56 bpw quantization",            },
     { "IQ1_M",  LLAMA_FTYPE_MOSTLY_IQ1_M,  " 1.75 bpw quantization",            },
-    { "Q1_3",   LLAMA_FTYPE_MOSTLY_Q1_3,   " 1.63 bpw for BitNet 1.58b",        },
-    { "Q2_2",   LLAMA_FTYPE_MOSTLY_Q2_2,   " 2.00 bpw for BitNet 1.58b",        },
+    { "Q1_3",   LLAMA_FTYPE_MOSTLY_Q1_3,   " 1.63 bpw for BitNet b1.58",        },
+    { "Q2_2",   LLAMA_FTYPE_MOSTLY_Q2_2,   " 2.00 bpw for BitNet b1.58",        },
     { "Q2_K",   LLAMA_FTYPE_MOSTLY_Q2_K,   " 2.96G, +3.5199 ppl @ Llama-3-8B",  },
     { "Q2_K_S", LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.96G, +3.1836 ppl @ Llama-3-8B",  },
     { "IQ3_XXS",LLAMA_FTYPE_MOSTLY_IQ3_XXS," 3.06 bpw quantization",            },
diff --git a/ggml/src/ggml-common.h b/ggml/src/ggml-common.h
index 9c680e3b1c05f..71901565158fa 100644
--- a/ggml/src/ggml-common.h
+++ b/ggml/src/ggml-common.h
@@ -137,7 +137,7 @@ typedef sycl::half2 ggml_half2;
 
 #endif // GGML_COMMON_DECL_CUDA || GGML_COMMON_DECL_HIP
 
-// 1.625 bpw for BitNet 1.58b models
+// 1.625 bpw for BitNet b1.58 models
 #define QK1_3 64
 typedef struct {
     uint8_t q[(QK1_3 - 4*QK1_3/64)/5]; // 5 elements per byte (3^5 = 243 < 256)
diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c
index 5dd682b602d56..e1197f4733b51 100644
--- a/ggml/src/ggml-quants.c
+++ b/ggml/src/ggml-quants.c
@@ -3366,7 +3366,7 @@ size_t quantize_q2_2(const float * restrict src, void * restrict dst, int64_t nr
     return nrow * row_size;
 }
 
-// ====================== 1.625 bpw (de)-quantization (BitNet 1.58b)
+// ====================== 1.625 bpw (de)-quantization (BitNet b1.58)
 
 void quantize_row_q1_3_reference(const float * restrict x, block_q1_3 * restrict y, int64_t k) {
     assert(k % QK1_3 == 0);
diff --git a/src/llama.cpp b/src/llama.cpp
index fa2d97e65d472..750455e33509f 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -4186,8 +4186,8 @@ static std::string llama_model_ftype_name(llama_ftype ftype) {
         case LLAMA_FTYPE_ALL_F32:     return "all F32";
         case LLAMA_FTYPE_MOSTLY_F16:  return "F16";
         case LLAMA_FTYPE_MOSTLY_BF16: return "BF16";
-        case LLAMA_FTYPE_MOSTLY_Q1_3: return "Q1_3 - 1.625 bpw for BitNet 1.58b";
-        case LLAMA_FTYPE_MOSTLY_Q2_2: return "Q2_2 - 2.000 bpw for BitNet 1.58b";
+        case LLAMA_FTYPE_MOSTLY_Q1_3: return "Q1_3 - 1.625 bpw for BitNet b1.58";
+        case LLAMA_FTYPE_MOSTLY_Q2_2: return "Q2_2 - 2.000 bpw for BitNet b1.58";
         case LLAMA_FTYPE_MOSTLY_Q4_0: return "Q4_0";
         case LLAMA_FTYPE_MOSTLY_Q4_1: return "Q4_1";
         case LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16: