update

ling0322 · ling0322 · Jul 30, 2024 · Jun 27, 2024 · Jul 3, 2024 · Jul 4, 2024
commit 8d2a8490dd2111bfbd2b50768cfc8eb44df7cd1c
diff --git a/go/llmtasks/whisper.go b/go/llmtasks/whisper.go
@@ -62,6 +62,7 @@ func (w *Whisper) Transcribe(audio []byte, config TranscriptionConfig) (llm.Comp
 
 	compConfig := llm.NewCompletionConfig()
 	compConfig.SetTopK(1)
+	compConfig.SetTemperature(2.0)
 	compConfig.SupressControlToken("<|notimestamps|>")
 	comp, err := w.model.Complete(compConfig, prompt)
 	return comp, err

diff --git a/src/libllm/CMakeLists.txt b/src/libllm/CMakeLists.txt
@@ -33,6 +33,7 @@ set(libllm_SOURCES
     "cpu/copy.cc"
     "cpu/cpu_operators.cc"
     "cpu/cpu_tensor_data.cc"
+    "cpu/fill.cc"
     "cpu/fingerprint.cc"
     "cpu/gelu.cc"
     "cpu/log_mel_spectrogram.cc"
@@ -41,6 +42,7 @@ set(libllm_SOURCES
     "cpu/normalizations.cc"
     "cpu/print.cc"
     "cpu/rand.cc"
+    "cpu/reduce.cc"
     "cpu/softmax.cc"
     "cpu/swiglu.cc"
     "cpu/tensor.cc"

diff --git a/src/libllm/cpu/cpu_operators.cc b/src/libllm/cpu/cpu_operators.cc
@@ -32,6 +32,7 @@
 #include "libllm/cpu/common.h"
 #include "libllm/cpu/copy.h"
 #include "libllm/cpu/cpu_tensor_data.h"
+#include "libllm/cpu/fill.h"
 #include "libllm/cpu/gelu.h"
 #include "libllm/cpu/kernel/interface.h"
 #include "libllm/cpu/log_mel_spectrogram.h"
@@ -40,6 +41,7 @@
 #include "libllm/cpu/normalizations.h"
 #include "libllm/cpu/print.h"
 #include "libllm/cpu/rand.h"
+#include "libllm/cpu/reduce.h"
 #include "libllm/cpu/softmax.h"
 #include "libllm/cpu/swiglu.h"
 #include "libllm/cpu/tensor.h"
@@ -114,6 +116,18 @@ Tensor CPUOperators::gelu(Tensor input) {
   return cpu::gelu(input);
 }
 
+void CPUOperators::fill(Tensor input, float value) {
+  return cpu::fill(input, value);
+}
+
+Tensor CPUOperators::sum(Tensor inputs) {
+  return cpu::reduce(inputs, MapReduceType::SUM);
+}
+
+Tensor CPUOperators::max(Tensor inputs) {
+  return cpu::reduce(inputs, MapReduceType::MAX);
+}
+
 Tensor CPUOperators::rmsNorm(Tensor input, Tensor weight, float eps) {
   CHECK(input.getDType() == weight.getDType());
 

diff --git a/src/libllm/cpu/cpu_operators.h b/src/libllm/cpu/cpu_operators.h
@@ -49,7 +49,10 @@ class CPUOperators : public Operators {
   Tensor mul(Tensor input, Tensor other) override;
   Tensor softmax(Tensor input) override;
   Tensor gelu(Tensor input) override;
+  void fill(Tensor input, float value) override;
   Tensor add(Tensor a, Tensor b) override;
+  Tensor sum(Tensor inputs) override;
+  Tensor max(Tensor inputs) override;
   Tensor tensor(lut::Span<const int> shape, DType dtype) override;
   Tensor tensorLike(Tensor input) override;
   Tensor zeros(lut::Span<const int> shape, DType dtype) override;

diff --git a/src/libllm/cpu/fill.cc b/src/libllm/cpu/fill.cc
@@ -0,0 +1,71 @@
+// The MIT License (MIT)
+//
+// Copyright (c) 2024 Xiaoyang Chen
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software
+// and associated documentation files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or
+// substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+#include "libllm/cpu/fill.h"
+
+#include "libllm/cpu/accessor.h"
+#include "libllm/cpu/common.h"
+#include "libllm/cpu/tensor.h"
+#include "libllm/mp.h"
+#include "libllm/tensor.h"
+
+namespace libllm {
+namespace op {
+namespace cpu {
+
+template<typename T>
+void fillKernel(Tensor A, float value) {
+  TensorList<T, 1> vC = TensorList<T, 1>::fromTensor(A);
+  MP::parallelFor({vC.getLength()}, [&vC, value](MP::Partition partition) {
+    for (int j : partition.getRange()) {
+      TensorAccessor<T, 1> c = vC.getTensor(j);
+
+      for (int i = 0; i < c.getShape(0); ++i) {
+        c[i] = value;
+      }
+    }
+  });
+}
+
+void fill(Tensor src, float value) {
+  if (src.getDType() == DType::kFloat) {
+    if (src.getNumEl() == 1) {
+      *src.getData<float>() = value;
+    } else {
+      fillKernel<float>(src, value);
+    }
+    return;
+  }
+#if LUT_CPU_ARCH == LUT_AARCH64
+  if (src.getDType() == DType::kFloat16) {
+    if (src.getNumEl() == 1) {
+      *src.getData<Float16>() = value;
+    } else {
+      fillKernel<Float16>(src, value);
+    }
+    return;
+  }
+#endif
+
+  NOT_IMPL();
+}
+
+}  // namespace cpu
+}  // namespace op
+}  // namespace libllm
diff --git a/src/libllm/cpu/fill.h b/src/libllm/cpu/fill.h
@@ -0,0 +1,33 @@
+// The MIT License (MIT)
+//
+// Copyright (c) 2024 Xiaoyang Chen
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software
+// and associated documentation files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or
+// substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+#pragma once
+
+#include "libllm/tensor.h"
+
+namespace libllm {
+namespace op {
+namespace cpu {
+
+// fill tensor with value
+void fill(Tensor tensor, float value);
+
+}  // namespace cpu
+}  // namespace op
+}  // namespace libllm
diff --git a/src/libllm/cpu/reduce.cc b/src/libllm/cpu/reduce.cc
@@ -0,0 +1,118 @@
+// The MIT License (MIT)
+//
+// Copyright (c) 2024 Xiaoyang Chen
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software
+// and associated documentation files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or
+// substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+#include "libllm/cpu/reduce.h"
+
+#include "libllm/cpu/accessor.h"
+#include "libllm/cpu/tensor.h"
+#include "libllm/mp.h"
+#include "libllm/tensor.h"
+
+namespace libllm {
+namespace op {
+namespace cpu {
+
+enum class MapType { EXP_FP16_FP32, SQUARE_FP16_FP32, IDENTITY, UNKNOWN };
+enum class ReduceType { SUM, MAX, UNKNOWN };
+
+constexpr MapType getMapType(MapReduceType mapReduceType) {
+  switch (mapReduceType) {
+    case MapReduceType::SUM:
+      return MapType::IDENTITY;
+    case MapReduceType::MAX:
+      return MapType::IDENTITY;
+    default:
+      return MapType::UNKNOWN;
+  }
+}
+
+constexpr ReduceType getReduceType(MapReduceType mapReduceType) {
+  switch (mapReduceType) {
+    case MapReduceType::SUM:
+      return ReduceType::SUM;
+    case MapReduceType::MAX:
+      return ReduceType::MAX;
+    default:
+      return ReduceType::UNKNOWN;
+  }
+}
+
+template<typename T, ReduceType REDUCE_TYPE>
+T getReduceInitial() {
+  switch (REDUCE_TYPE) {
+    case ReduceType::SUM:
+      return T(0);
+    case ReduceType::MAX:
+      return -std::numeric_limits<float>::infinity();
+    default:
+      NOT_IMPL();
+  }
+}
+
+template<typename T, ReduceType REDUCE_TYPE>
+Tensor reduceKernel(Tensor A) {
+  std::vector<int> shape = A.getShape();
+  shape.back() == 1;
+  Tensor C = tensor(shape, A.getDType());
+
+  TensorList<const T, 1> vA = TensorList<const T, 1>::fromTensor(A);
+  TensorList<T, 1> vC = TensorList<T, 1>::fromTensor(C);
+  CHECK(vA.getLength() == vC.getLength());
+
+  MP::parallelFor({vA.getLength()}, [&vA, &vC](MP::Partition partition) {
+    for (int j : partition.getRange()) {
+      TensorAccessor<const T, 1> a = vA.getTensor(j);
+      TensorAccessor<T, 1> c = vC.getTensor(j);
+
+      float accumulator = getReduceInitial<T, REDUCE_TYPE>();
+      for (int i = 0; i < a.getShape(0); i++) {
+        if (REDUCE_TYPE == ReduceType::SUM) {
+          accumulator += a[i];
+        } else if (REDUCE_TYPE == ReduceType::MAX) {
+          if (a[i] > accumulator) accumulator = a[i];
+        } else {
+          NOT_IMPL();
+        }
+      }
+
+      c[0] = accumulator;
+    }
+  });
+
+  return C;
+}
+
+Tensor reduce(const Tensor &A, MapReduceType reduceType) {
+  if (A.getDType() == DType::kFloat && reduceType == MapReduceType::SUM)
+    return reduceKernel<float, ReduceType::SUM>(A);
+  if (A.getDType() == DType::kFloat && reduceType == MapReduceType::MAX)
+    return reduceKernel<float, ReduceType::MAX>(A);
+#if LUT_CPU_ARCH == LUT_AARCH64
+  if (A.getDType() == DType::kFloat16 && reduceType == MapReduceType::SUM)
+    return reduceKernel<Float16, ReduceType::SUM>(A);
+  if (A.getDType() == DType::kFloat16 && reduceType == MapReduceType::MAX)
+    return reduceKernel<Float16, ReduceType::MAX>(A);
+#endif
+
+  NOT_IMPL();
+}
+
+}  // namespace cpu
+}  // namespace op
+}  // namespace libllm
diff --git a/src/libllm/cpu/reduce.h b/src/libllm/cpu/reduce.h
@@ -0,0 +1,34 @@
+// The MIT License (MIT)
+//
+// Copyright (c) 2024 Xiaoyang Chen
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software
+// and associated documentation files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or
+// substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+#pragma once
+
+#include "libllm/tensor.h"
+
+namespace libllm {
+namespace op {
+namespace cpu {
+
+enum class MapReduceType { SUM, MAX };
+
+Tensor reduce(const Tensor &A, MapReduceType reduceType);
+
+}  // namespace cpu
+}  // namespace op
+}  // namespace libllm
diff --git a/src/libllm/functional.cc b/src/libllm/functional.cc
@@ -151,6 +151,20 @@ void copy(Tensor src, Tensor dest) {
   }
 }
 
+Tensor sum(Tensor tensor, int dim) {
+  CHECK(dim == -1 || dim == tensor.getDim() - 1);
+  return getOperators(tensor.getDevice().getType())->sum(tensor);
+}
+
+Tensor max(Tensor tensor, int dim) {
+  CHECK(dim == -1 || dim == tensor.getDim() - 1);
+  return getOperators(tensor.getDevice().getType())->max(tensor);
+}
+
+void fill(Tensor tensor, float value) {
+  getOperators(tensor.getDevice().getType())->fill(tensor, value);
+}
+
 Tensor attention(Tensor q, Tensor k, Tensor v, Tensor mask) {
   float dK = 1.0f / sqrtf(1.0f * q.getShape(-1));
   q = F::mul(q, sqrtf(dK));

diff --git a/src/libllm/functional.h b/src/libllm/functional.h
@@ -179,6 +179,21 @@ Tensor swiglu(Tensor input);
 /// @return <float>(..., D): the output tensor.
 Tensor gelu(Tensor inputs);
 
+/// @brief fill tensor with value.
+/// @param tensor the tensor to fill.
+/// @param value the value.
+void fill(Tensor tensor, float value);
+
+/// @brief Returns the sum of each row of the input tensor in the given dimension dim.
+/// @param tensor <float>(d1, d2, ..., dn) the input tensor.
+/// @return <float>(d1, d2, ..., dn-1): the output tensor.
+Tensor sum(Tensor tensor, int dim = -1);
+
+/// @brief Returns the maximum value of each row of the input tensor in the given dimension dim.
+/// @param tensor <float>(d1, d2, ..., dn) the input tensor.
+/// @return <float>(d1, d2, ..., dn-1): the output tensor.
+Tensor max(Tensor tensor, int dim = -1);
+
 /// @brief (im2col) Extracts sliding local blocks from the input tensor. To make
 /// sure the input and output shape are the same after Conv, it will also pad the input tensor with
 /// zero.