Skip to content

Commit

Permalink
imatrix: guard even more against low-bit quantization misuse
Browse files Browse the repository at this point in the history
  • Loading branch information
Kawrakow committed Jan 12, 2024
1 parent d5598f7 commit f342143
Showing 1 changed file with 9 additions and 0 deletions.
9 changes: 9 additions & 0 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9240,6 +9240,15 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
}
}
}
if ((new_type == GGML_TYPE_IQ2_XXS ||
new_type == GGML_TYPE_IQ2_XS ||
(new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0)) && !imatrix) {
fprintf(stderr, "\n\n============================================================\n");
fprintf(stderr, "Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name);
fprintf(stderr, "The result will be garbage, so bailing out\n");
fprintf(stderr, "============================================================\n\n");
throw std::runtime_error(format("Missing importance matrix for tensor %s in a very low-bit quantization", tensor->name));
}

float * f32_data;

Expand Down

0 comments on commit f342143

Please sign in to comment.