Skip to content

Commit

Permalink
ggml : add loongarch lsx and lasx support (#6454)
Browse files Browse the repository at this point in the history
* add loongarch lsx and lasx optimize code

* Add loongarch compilation support to makefile

* revert stb_image.h

* opt bytes_from_nibbles_32 and sum_i16_pairs_float

* fix undeclared

* format code

* update

* update 2

---------

Co-authored-by: Jinyang He <[email protected]>
  • Loading branch information
junchao-loongson and MQ-mengqing authored May 20, 2024
1 parent 1cc0155 commit 65c5820
Show file tree
Hide file tree
Showing 5 changed files with 2,389 additions and 81 deletions.
13 changes: 13 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ set(LLAMA_SCHED_MAX_COPIES "4" CACHE STRING "llama: max input copies for pipeli
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_SERVER "llama: build server example" ON)
option(LLAMA_LASX "llama: enable lasx" ON)
option(LLAMA_LSX "llama: enable lsx" ON)

# add perf arguments
option(LLAMA_PERF "llama: enable perf" OFF)
Expand Down Expand Up @@ -1108,6 +1110,17 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
#TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
endif()
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
message(STATUS "loongarch64 detected")

list(APPEND ARCH_FLAGS -march=loongarch64)
if (LLAMA_LASX)
list(APPEND ARCH_FLAGS -mlasx)
endif()
if (LLAMA_LSX)
list(APPEND ARCH_FLAGS -mlsx)
endif()

else()
message(STATUS "Unknown architecture")
endif()
Expand Down
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,11 @@ ifneq ($(filter ppc64le%,$(UNAME_M)),)
CUDA_POWER_ARCH = 1
endif

ifneq ($(filter loongarch64%,$(UNAME_M)),)
MK_CFLAGS += -mlasx
MK_CXXFLAGS += -mlasx
endif

else
MK_CFLAGS += -march=rv64gcv -mabi=lp64d
MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
Expand Down
28 changes: 28 additions & 0 deletions ggml-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,34 @@ static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
#include <riscv_vector.h>
#endif

#if defined(__loongarch64)
#if defined(__loongarch_asx)
#include <lasxintrin.h>
#endif
#if defined(__loongarch_sx)
#include <lsxintrin.h>
#endif
#endif

#if defined(__loongarch_asx)

typedef union {
int32_t i;
float f;
} ft_union;

/* float type data load instructions */
static __m128 __lsx_vreplfr2vr_s(float val) {
ft_union fi_tmpval = {.f = val};
return (__m128)__lsx_vreplgr2vr_w(fi_tmpval.i);
}

static __m256 __lasx_xvreplfr2vr_s(float val) {
ft_union fi_tmpval = {.f = val};
return (__m256)__lasx_xvreplgr2vr_w(fi_tmpval.i);
}
#endif

#ifdef __F16C__

#ifdef _MSC_VER
Expand Down
Loading

0 comments on commit 65c5820

Please sign in to comment.