Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support whisper models #80

Merged
merged 14 commits into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
update
  • Loading branch information
ling0322 committed Jul 12, 2024
commit 8d2a8490dd2111bfbd2b50768cfc8eb44df7cd1c
1 change: 1 addition & 0 deletions go/llmtasks/whisper.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ func (w *Whisper) Transcribe(audio []byte, config TranscriptionConfig) (llm.Comp

compConfig := llm.NewCompletionConfig()
compConfig.SetTopK(1)
compConfig.SetTemperature(2.0)
compConfig.SupressControlToken("<|notimestamps|>")
comp, err := w.model.Complete(compConfig, prompt)
return comp, err
Expand Down
2 changes: 2 additions & 0 deletions src/libllm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ set(libllm_SOURCES
"cpu/copy.cc"
"cpu/cpu_operators.cc"
"cpu/cpu_tensor_data.cc"
"cpu/fill.cc"
"cpu/fingerprint.cc"
"cpu/gelu.cc"
"cpu/log_mel_spectrogram.cc"
Expand All @@ -41,6 +42,7 @@ set(libllm_SOURCES
"cpu/normalizations.cc"
"cpu/print.cc"
"cpu/rand.cc"
"cpu/reduce.cc"
"cpu/softmax.cc"
"cpu/swiglu.cc"
"cpu/tensor.cc"
Expand Down
14 changes: 14 additions & 0 deletions src/libllm/cpu/cpu_operators.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "libllm/cpu/common.h"
#include "libllm/cpu/copy.h"
#include "libllm/cpu/cpu_tensor_data.h"
#include "libllm/cpu/fill.h"
#include "libllm/cpu/gelu.h"
#include "libllm/cpu/kernel/interface.h"
#include "libllm/cpu/log_mel_spectrogram.h"
Expand All @@ -40,6 +41,7 @@
#include "libllm/cpu/normalizations.h"
#include "libllm/cpu/print.h"
#include "libllm/cpu/rand.h"
#include "libllm/cpu/reduce.h"
#include "libllm/cpu/softmax.h"
#include "libllm/cpu/swiglu.h"
#include "libllm/cpu/tensor.h"
Expand Down Expand Up @@ -114,6 +116,18 @@ Tensor CPUOperators::gelu(Tensor input) {
return cpu::gelu(input);
}

void CPUOperators::fill(Tensor input, float value) {
return cpu::fill(input, value);
}

Tensor CPUOperators::sum(Tensor inputs) {
return cpu::reduce(inputs, MapReduceType::SUM);
}

Tensor CPUOperators::max(Tensor inputs) {
return cpu::reduce(inputs, MapReduceType::MAX);
}

Tensor CPUOperators::rmsNorm(Tensor input, Tensor weight, float eps) {
CHECK(input.getDType() == weight.getDType());

Expand Down
3 changes: 3 additions & 0 deletions src/libllm/cpu/cpu_operators.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,10 @@ class CPUOperators : public Operators {
Tensor mul(Tensor input, Tensor other) override;
Tensor softmax(Tensor input) override;
Tensor gelu(Tensor input) override;
void fill(Tensor input, float value) override;
Tensor add(Tensor a, Tensor b) override;
Tensor sum(Tensor inputs) override;
Tensor max(Tensor inputs) override;
Tensor tensor(lut::Span<const int> shape, DType dtype) override;
Tensor tensorLike(Tensor input) override;
Tensor zeros(lut::Span<const int> shape, DType dtype) override;
Expand Down
71 changes: 71 additions & 0 deletions src/libllm/cpu/fill.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// The MIT License (MIT)
//
// Copyright (c) 2024 Xiaoyang Chen
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software
// and associated documentation files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

#include "libllm/cpu/fill.h"

#include "libllm/cpu/accessor.h"
#include "libllm/cpu/common.h"
#include "libllm/cpu/tensor.h"
#include "libllm/mp.h"
#include "libllm/tensor.h"

namespace libllm {
namespace op {
namespace cpu {

template<typename T>
void fillKernel(Tensor A, float value) {
TensorList<T, 1> vC = TensorList<T, 1>::fromTensor(A);
MP::parallelFor({vC.getLength()}, [&vC, value](MP::Partition partition) {
for (int j : partition.getRange()) {
TensorAccessor<T, 1> c = vC.getTensor(j);

for (int i = 0; i < c.getShape(0); ++i) {
c[i] = value;
}
}
});
}

void fill(Tensor src, float value) {
if (src.getDType() == DType::kFloat) {
if (src.getNumEl() == 1) {
*src.getData<float>() = value;
} else {
fillKernel<float>(src, value);
}
return;
}
#if LUT_CPU_ARCH == LUT_AARCH64
if (src.getDType() == DType::kFloat16) {
if (src.getNumEl() == 1) {
*src.getData<Float16>() = value;
} else {
fillKernel<Float16>(src, value);
}
return;
}
#endif

NOT_IMPL();
}

} // namespace cpu
} // namespace op
} // namespace libllm
33 changes: 33 additions & 0 deletions src/libllm/cpu/fill.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// The MIT License (MIT)
//
// Copyright (c) 2024 Xiaoyang Chen
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software
// and associated documentation files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

#pragma once

#include "libllm/tensor.h"

namespace libllm {
namespace op {
namespace cpu {

// fill tensor with value
void fill(Tensor tensor, float value);

} // namespace cpu
} // namespace op
} // namespace libllm
118 changes: 118 additions & 0 deletions src/libllm/cpu/reduce.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
// The MIT License (MIT)
//
// Copyright (c) 2024 Xiaoyang Chen
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software
// and associated documentation files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

#include "libllm/cpu/reduce.h"

#include "libllm/cpu/accessor.h"
#include "libllm/cpu/tensor.h"
#include "libllm/mp.h"
#include "libllm/tensor.h"

namespace libllm {
namespace op {
namespace cpu {

enum class MapType { EXP_FP16_FP32, SQUARE_FP16_FP32, IDENTITY, UNKNOWN };
enum class ReduceType { SUM, MAX, UNKNOWN };

constexpr MapType getMapType(MapReduceType mapReduceType) {
switch (mapReduceType) {
case MapReduceType::SUM:
return MapType::IDENTITY;
case MapReduceType::MAX:
return MapType::IDENTITY;
default:
return MapType::UNKNOWN;
}
}

constexpr ReduceType getReduceType(MapReduceType mapReduceType) {
switch (mapReduceType) {
case MapReduceType::SUM:
return ReduceType::SUM;
case MapReduceType::MAX:
return ReduceType::MAX;
default:
return ReduceType::UNKNOWN;
}
}

template<typename T, ReduceType REDUCE_TYPE>
T getReduceInitial() {
switch (REDUCE_TYPE) {
case ReduceType::SUM:
return T(0);
case ReduceType::MAX:
return -std::numeric_limits<float>::infinity();
default:
NOT_IMPL();
}
}

template<typename T, ReduceType REDUCE_TYPE>
Tensor reduceKernel(Tensor A) {
std::vector<int> shape = A.getShape();
shape.back() == 1;
Tensor C = tensor(shape, A.getDType());

TensorList<const T, 1> vA = TensorList<const T, 1>::fromTensor(A);
TensorList<T, 1> vC = TensorList<T, 1>::fromTensor(C);
CHECK(vA.getLength() == vC.getLength());

MP::parallelFor({vA.getLength()}, [&vA, &vC](MP::Partition partition) {
for (int j : partition.getRange()) {
TensorAccessor<const T, 1> a = vA.getTensor(j);
TensorAccessor<T, 1> c = vC.getTensor(j);

float accumulator = getReduceInitial<T, REDUCE_TYPE>();
for (int i = 0; i < a.getShape(0); i++) {
if (REDUCE_TYPE == ReduceType::SUM) {
accumulator += a[i];
} else if (REDUCE_TYPE == ReduceType::MAX) {
if (a[i] > accumulator) accumulator = a[i];
} else {
NOT_IMPL();
}
}

c[0] = accumulator;
}
});

return C;
}

Tensor reduce(const Tensor &A, MapReduceType reduceType) {
if (A.getDType() == DType::kFloat && reduceType == MapReduceType::SUM)
return reduceKernel<float, ReduceType::SUM>(A);
if (A.getDType() == DType::kFloat && reduceType == MapReduceType::MAX)
return reduceKernel<float, ReduceType::MAX>(A);
#if LUT_CPU_ARCH == LUT_AARCH64
if (A.getDType() == DType::kFloat16 && reduceType == MapReduceType::SUM)
return reduceKernel<Float16, ReduceType::SUM>(A);
if (A.getDType() == DType::kFloat16 && reduceType == MapReduceType::MAX)
return reduceKernel<Float16, ReduceType::MAX>(A);
#endif

NOT_IMPL();
}

} // namespace cpu
} // namespace op
} // namespace libllm
34 changes: 34 additions & 0 deletions src/libllm/cpu/reduce.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// The MIT License (MIT)
//
// Copyright (c) 2024 Xiaoyang Chen
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software
// and associated documentation files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

#pragma once

#include "libllm/tensor.h"

namespace libllm {
namespace op {
namespace cpu {

enum class MapReduceType { SUM, MAX };

Tensor reduce(const Tensor &A, MapReduceType reduceType);

} // namespace cpu
} // namespace op
} // namespace libllm
14 changes: 14 additions & 0 deletions src/libllm/functional.cc
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,20 @@ void copy(Tensor src, Tensor dest) {
}
}

Tensor sum(Tensor tensor, int dim) {
CHECK(dim == -1 || dim == tensor.getDim() - 1);
return getOperators(tensor.getDevice().getType())->sum(tensor);
}

Tensor max(Tensor tensor, int dim) {
CHECK(dim == -1 || dim == tensor.getDim() - 1);
return getOperators(tensor.getDevice().getType())->max(tensor);
}

void fill(Tensor tensor, float value) {
getOperators(tensor.getDevice().getType())->fill(tensor, value);
}

Tensor attention(Tensor q, Tensor k, Tensor v, Tensor mask) {
float dK = 1.0f / sqrtf(1.0f * q.getShape(-1));
q = F::mul(q, sqrtf(dK));
Expand Down
15 changes: 15 additions & 0 deletions src/libllm/functional.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,21 @@ Tensor swiglu(Tensor input);
/// @return <float>(..., D): the output tensor.
Tensor gelu(Tensor inputs);

/// @brief fill tensor with value.
/// @param tensor the tensor to fill.
/// @param value the value.
void fill(Tensor tensor, float value);

/// @brief Returns the sum of each row of the input tensor in the given dimension dim.
/// @param tensor <float>(d1, d2, ..., dn) the input tensor.
/// @return <float>(d1, d2, ..., dn-1): the output tensor.
Tensor sum(Tensor tensor, int dim = -1);

/// @brief Returns the maximum value of each row of the input tensor in the given dimension dim.
/// @param tensor <float>(d1, d2, ..., dn) the input tensor.
/// @return <float>(d1, d2, ..., dn-1): the output tensor.
Tensor max(Tensor tensor, int dim = -1);

/// @brief (im2col) Extracts sliding local blocks from the input tensor. To make
/// sure the input and output shape are the same after Conv, it will also pad the input tensor with
/// zero.
Expand Down
Loading