update

ling0322 · Jun 23, 2024 · 858da23 · 858da23
1 parent 1b0842a
commit 858da23
Show file tree

Hide file tree

Showing 7 changed files with 28 additions and 15 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -8,6 +8,8 @@ option(WITH_OPENMP "Build with OpenMP." ON)
 option(WITH_CUTLASS "build MatMul operators with CUTLASS." OFF)
 option(MKL_PREFIX "Prefix for MKL headers and libraries." "/opt/intel/mkl")
 
+set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
+
 if(WITH_CUDA)
     add_definitions("-DLIBLLM_CUDA_ENABLED")
     find_package(CUDAToolkit REQUIRED)

diff --git a/go/llm/llm.go b/go/llm/llm.go
@@ -19,7 +19,8 @@
 
 package llm
 
-// #cgo LDFLAGS: -ldl
+// #cgo linux LDFLAGS: -ldl
+// #cgo darwin LDFLAGS: -ldl
 // #include <stdlib.h>
 // #include "llm_api.h"
 import "C"

diff --git a/src/libllm/benchmark_main.cc b/src/libllm/benchmark_main.cc
@@ -30,6 +30,7 @@
 #include "libllm/lut/flags.h"
 #include "libllm/lut/random.h"
 #include "libllm/lut/time.h"
+#include "libllm/operators.h"
 #include "libllm/model_for_generation.h"
 
 constexpr int MagicNumber = 0x55aa;
@@ -165,7 +166,7 @@ void benchmarkLlama(std::shared_ptr<llama::LlamaModel> model, int ctxLength, DTy
 }
 
 int benchmarkMain(Device device) {
-  CHECK(llmInit(LLM_API_VERSION) == LLM_OK);
+  libllm::initOperators();
 
   LlamaType llamaType = LlamaType::Llama2_7B;
   DType weightType = libllm::DType::kQInt4x32;
@@ -182,6 +183,8 @@ int benchmarkMain(Device device) {
   libllm::benchmarkLlama(model, 512, libllm::DType::kQInt4x32);
 
   printf("----------------------------------------------------------\n");
+
+  libllm::destroyOperators();
   return 0;
 }
 

diff --git a/src/libllm/cuda/gemm_cublas.cc b/src/libllm/cuda/gemm_cublas.cc
@@ -26,10 +26,11 @@ namespace libllm {
 namespace op {
 namespace cuda {
 
-std::shared_ptr<Gemm> CublasGemm::create() {
-  std::shared_ptr<CublasGemm> mm = std::make_shared<CublasGemm>();
+Gemm *CublasGemm::create() {
+  CublasGemm *mm = new CublasGemm();
   mm->_handle = {nullptr, safeDestroyCublas};
   if (CUBLAS_STATUS_SUCCESS != cublasCreate(mm->_handle.get_pp())) {
+    delete mm;
     return nullptr;
   } else {
     return mm;
@@ -138,6 +139,10 @@ lut::ErrorCode CublasGemm::hgemmArray(
 }  // op
 }  // ly
 
-std::shared_ptr<libllm::op::cuda::Gemm> llmCreateCudaOpExtGemm() {
+libllm::op::cuda::Gemm *llmGemmExt_New() {
   return libllm::op::cuda::CublasGemm::create();
 }
+
+void llmGemmExt_Delete(libllm::op::cuda::Gemm *gemm) {
+  delete gemm;
+}
diff --git a/src/libllm/cuda/gemm_cublas.h b/src/libllm/cuda/gemm_cublas.h
@@ -35,7 +35,7 @@ namespace cuda {
 /// @brief Operators implemented by cuBLAS.
 class CublasGemm : public Gemm {
  public:
-  static std::shared_ptr<Gemm> create();
+  static Gemm *create();
 
   lut::ErrorCode hgemm(
       bool transA,
@@ -78,5 +78,6 @@ class CublasGemm : public Gemm {
 }  // ly
 
 extern "C" {
-EXTAPI std::shared_ptr<libllm::op::cuda::Gemm> llmCreateCudaOpExtGemm();
+EXTAPI libllm::op::cuda::Gemm *llmGemmExt_New();
+EXTAPI void llmGemmExt_Delete(libllm::op::cuda::Gemm *gemm);
 }  // extern "C"
diff --git a/src/libllm/cuda/matmul.cc b/src/libllm/cuda/matmul.cc
@@ -64,10 +64,12 @@ std::shared_ptr<MatMul> MatMul::createCublas() {
 
   mm->_gemmExtLib = lut::SharedLibrary::open("llmextcublas");
 
-  std::function<std::shared_ptr<op::cuda::Gemm>()> factory;
-  factory = mm->_gemmExtLib->getFunc<std::shared_ptr<op::cuda::Gemm>()>("llmCreateCudaOpExtGemm");
+  std::function<op::cuda::Gemm *()> factory;
+  std::function<void(op::cuda::Gemm *)> deleter;
+  factory = mm->_gemmExtLib->getFunc<op::cuda::Gemm *()>("llmGemmExt_New");
+  deleter = mm->_gemmExtLib->getFunc<void(op::cuda::Gemm *)>("llmGemmExt_Delete");
 
-  mm->_gemm = factory();
+  mm->_gemm = std::shared_ptr<op::cuda::Gemm>(factory(), deleter);
   if (!mm->_gemm) throw lut::AbortedError("unable to create MatMul operator.");
 
   return mm;

diff --git a/src/libllm/test_main.cc b/src/libllm/test_main.cc
@@ -19,20 +19,19 @@
 
 #include "../../third_party/catch2/catch_amalgamated.hpp"
 #include "libllm/cpu/kernel/interface.h"
-#include "libllm/llm.h"
+#include "libllm/operators.h"
 #include "libllm/lut/error.h"
 #include "libllm/lut/log.h"
 
 int main(int argc, char **argv) {
-  if (llmInit(LLM_API_VERSION) != LLM_OK) {
-    LOG(FATAL) << llmGetLastErrorMessage();
-  }
+  libllm::initOperators();
 
   // enable some slow kernels for reference.
   libllm::op::cpu::kernel::setAllowSlowKernel(true);
 
   int result = Catch::Session().run(argc, argv);
-  CHECK(llmDestroy() == LLM_OK);
+
+  libllm::destroyOperators();
 
   return result;
 }