Skip to content

Commit

Permalink
Address review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
jart committed Apr 11, 2024
1 parent 316df55 commit 492b76d
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 17 deletions.
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,11 @@ ifdef LLAMA_DISABLE_LOGS
MK_CPPFLAGS += -DLOG_DISABLE_LOGS
endif # LLAMA_DISABLE_LOGS

# disable ggml.c's use of sgemm.cpp
ifdef LLAMA_NO_LLAMAFILE
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE=0
endif

# warnings
WARN_FLAGS = -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
MK_CFLAGS += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int \
Expand Down
22 changes: 13 additions & 9 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,11 @@ int32_t get_num_physical_cores() {

static void cpuid(unsigned leaf, unsigned subleaf,
unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx) {
__asm__("movq\t%%rbx,%%rsi\n\t"
"cpuid\n\t"
"xchgq\t%%rbx,%%rsi"
: "=a"(*eax), "=S"(*ebx), "=c"(*ecx), "=d"(*edx)
: "0"(leaf), "2"(subleaf));
__asm__("movq\t%%rbx,%%rsi\n\t"
"cpuid\n\t"
"xchgq\t%%rbx,%%rsi"
: "=a"(*eax), "=S"(*ebx), "=c"(*ecx), "=d"(*edx)
: "0"(leaf), "2"(subleaf));
}

static int pin_cpu(int cpu) {
Expand All @@ -140,10 +140,12 @@ static bool is_running_on_efficiency_core(void) {
static int count_math_cpus(int cpu_count) {
int result = 0;
for (int cpu = 0; cpu < cpu_count; ++cpu) {
if (pin_cpu(cpu))
if (pin_cpu(cpu)) {
return -1;
if (is_running_on_efficiency_core())
}
if (is_running_on_efficiency_core()) {
continue; // efficiency cores harm lockstep threading
}
++cpu; // hyperthreading isn't useful for linear algebra
++result;
}
Expand All @@ -158,15 +160,17 @@ static int count_math_cpus(int cpu_count) {
int get_math_cpu_count() {
#if defined(__x86_64__) && defined(__linux__)
int cpu_count = sysconf(_SC_NPROCESSORS_ONLN);
if (cpu_count < 1)
if (cpu_count < 1) {
return get_num_physical_cores();
}
if (is_hybrid_cpu()) {
cpu_set_t affinity;
if (!pthread_getaffinity_np(pthread_self(), sizeof(affinity), &affinity)) {
int result = count_math_cpus(cpu_count);
pthread_setaffinity_np(pthread_self(), sizeof(affinity), &affinity);
if (result > 0)
if (result > 0) {
return result;
}
}
}
#endif
Expand Down
12 changes: 10 additions & 2 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@
#include <unistd.h>
#endif

#ifndef GGML_USE_LLAMAFILE
#define GGML_USE_LLAMAFILE 1
#endif

#if defined(_MSC_VER)
// disable "possible loss of data" to avoid hundreds of casts
// we should just be careful :)
Expand Down Expand Up @@ -10811,7 +10815,8 @@ static void ggml_compute_forward_mul_mat(
}
#endif

if (src1_cont) {
#if GGML_USE_LLAMAFILE
if (nb10 == ggml_type_size(src1->type)) {
for (int64_t j = 0; j < ne13; j++)
for (int64_t i = 0; i < ne12; i++)
if (!llamafile_sgemm(ne01, ne11, ne00/ggml_blck_size(src0->type),
Expand All @@ -10830,6 +10835,7 @@ static void ggml_compute_forward_mul_mat(
return;
}
UseGgmlGemm1:;
#endif

if (params->type == GGML_TASK_TYPE_INIT) {
if (ith != 0) {
Expand Down Expand Up @@ -10862,7 +10868,8 @@ UseGgmlGemm1:;
const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
const size_t row_size = ggml_row_size(vec_dot_type, ne10);

if (src1_cont) {
#if GGML_USE_LLAMAFILE
if (nb10 == ggml_type_size(src1->type) || src1->type != vec_dot_type) {
for (int64_t j = 0; j < ne13; j++)
for (int64_t i = 0; i < ne12; i++)
if (!llamafile_sgemm(ne01, ne11, ne00/ggml_blck_size(src0->type),
Expand All @@ -10882,6 +10889,7 @@ UseGgmlGemm1:;
return;
}
UseGgmlGemm2:;
#endif

const int64_t nr0 = ne01; // src0 rows
const int64_t nr1 = ne1*ne12*ne13; // src1 rows
Expand Down
8 changes: 2 additions & 6 deletions sgemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1079,10 +1079,8 @@ bool llamafile_sgemm(int m, int n, int k, const void *A, int lda, const void *B,
}

case GGML_TYPE_Q8_0: {
if (k % 32)
return false;
if (Btype != GGML_TYPE_Q8_0)
return false;
if (Btype != GGML_TYPE_Q8_0)
return false;
#if defined(__AVX2__) || defined(__AVX512F__)
tinyBLAS_Q0_AVX2<block_q8_0, block_q8_0, float> tb{
k, (const block_q8_0 *)A, lda,
Expand All @@ -1105,8 +1103,6 @@ bool llamafile_sgemm(int m, int n, int k, const void *A, int lda, const void *B,
}

case GGML_TYPE_Q4_0: {
if (k % 32)
return false;
if (Btype != GGML_TYPE_Q8_0)
return false;
#if defined(__AVX2__) || defined(__AVX512F__)
Expand Down

0 comments on commit 492b76d

Please sign in to comment.