Skip to content

Commit

Permalink
Merge pull request #42 from thewh1teagle/feat/vulkan
Browse files Browse the repository at this point in the history
feat: add vulkan support
  • Loading branch information
absadiki authored Aug 29, 2024
2 parents 3ee9803 + b67f020 commit 4ddfa8c
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 27 deletions.
15 changes: 13 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,18 @@ python -m build --wheel # in this repository to build the wheel. Assumes you hav
pip install dist/<generated>.whl
```

### Vulkan support

Thanks to [@thewh1teagle](https://github.com/thewh1teagle)

To build and install, clone the repository and run the following commands:

```shell
export CMAKE_ARGS="-DGGML_VULKAN=1"
python -m build --wheel # in this repository to build the wheel. Assumes you have installed build with pip install build
pip install dist/<generated>.whl
```

Then download and convert the appropriate model using the original `whisper.cpp` repository, producing a `<model>.mlmodelc` directory.

You can now verify if everything's working:
Expand Down Expand Up @@ -180,7 +192,7 @@ usage: pwcpp [-h] [-m MODEL] [--version] [--processors PROCESSORS] [-otxt] [-ovt
[--translate TRANSLATE] [--no_context NO_CONTEXT] [--single_segment SINGLE_SEGMENT] [--print_special PRINT_SPECIAL]
[--print_progress PRINT_PROGRESS] [--print_realtime PRINT_REALTIME] [--print_timestamps PRINT_TIMESTAMPS]
[--token_timestamps TOKEN_TIMESTAMPS] [--thold_pt THOLD_PT] [--thold_ptsum THOLD_PTSUM] [--max_len MAX_LEN]
[--split_on_word SPLIT_ON_WORD] [--max_tokens MAX_TOKENS] [--speed_up SPEED_UP] [--audio_ctx AUDIO_CTX]
[--split_on_word SPLIT_ON_WORD] [--max_tokens MAX_TOKENS] [--audio_ctx AUDIO_CTX]
[--prompt_tokens PROMPT_TOKENS] [--prompt_n_tokens PROMPT_N_TOKENS] [--language LANGUAGE] [--suppress_blank SUPPRESS_BLANK]
[--suppress_non_speech_tokens SUPPRESS_NON_SPEECH_TOKENS] [--temperature TEMPERATURE] [--max_initial_ts MAX_INITIAL_TS]
[--length_penalty LENGTH_PENALTY] [--temperature_inc TEMPERATURE_INC] [--entropy_thold ENTROPY_THOLD]
Expand Down Expand Up @@ -234,7 +246,6 @@ options:
split on word rather than on token (when used with max_len)
--max_tokens MAX_TOKENS
max tokens per segment (0 = no limit)
--speed_up SPEED_UP speed-up the audio by 2x using Phase Vocoder
--audio_ctx AUDIO_CTX
overwrite the audio context size (0 = use default)
--prompt_tokens PROMPT_TOKENS
Expand Down
8 changes: 0 additions & 8 deletions pywhispercpp/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,14 +154,6 @@
'options': None,
'default': 0
},
# [EXPERIMENTAL] speed-up techniques
# note: these can significantly reduce the quality of the output
'speed_up': {
'type': bool,
'description': "speed-up the audio by 2x using Phase Vocoder",
'options': None,
'default': False
},
'audio_ctx': {
'type': int,
'description': "overwrite the audio context size (0 = use default)",
Expand Down
2 changes: 1 addition & 1 deletion pywhispercpp/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class Model:
Example usage.
```python
model = Model('base.en', n_threads=6)
segments = model.transcribe('file.mp3', speed_up=True)
segments = model.transcribe('file.mp3')
for segment in segments:
print(segment.text)
```
Expand Down
15 changes: 0 additions & 15 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,17 +90,6 @@ int whisper_pcm_to_mel_wrapper(
return whisper_pcm_to_mel(ctx->ptr, samples_ptr, n_samples, n_threads);
};

int whisper_pcm_to_mel_phase_vocoder_wrapper(
struct whisper_context_wrapper* ctx,
py::array_t<float> samples,
int n_samples,
int n_threads){
py::buffer_info buf = samples.request();
float *samples_ptr = static_cast<float *>(buf.ptr);
return whisper_pcm_to_mel_phase_vocoder(ctx->ptr, samples_ptr, n_samples, n_threads);

};

int whisper_set_mel_wrapper(
struct whisper_context_wrapper * ctx,
py::array_t<float> data,
Expand Down Expand Up @@ -388,9 +377,6 @@ PYBIND11_MODULE(_pywhispercpp, m) {
m.def("whisper_pcm_to_mel", &whisper_pcm_to_mel_wrapper, "Convert RAW PCM audio to log mel spectrogram.\n"
"The resulting spectrogram is stored inside the provided whisper context.\n"
"Returns 0 on success");
m.def("whisper_pcm_to_mel_phase_vocoder", &whisper_pcm_to_mel_phase_vocoder_wrapper, "Convert RAW PCM audio to log mel spectrogram but applies a Phase Vocoder to speed up the audio x2. \n"
"The resulting spectrogram is stored inside the provided whisper context.\n"
"Returns 0 on success");

m.def("whisper_set_mel", &whisper_set_mel_wrapper, " This can be used to set a custom log mel spectrogram inside the provided whisper context.\n"
"Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.\n"
Expand Down Expand Up @@ -490,7 +476,6 @@ PYBIND11_MODULE(_pywhispercpp, m) {
.def_readwrite("max_len", &whisper_full_params::max_len)
.def_readwrite("split_on_word", &whisper_full_params::split_on_word)
.def_readwrite("max_tokens", &whisper_full_params::max_tokens)
.def_readwrite("speed_up", &whisper_full_params::speed_up)
.def_readwrite("audio_ctx", &whisper_full_params::audio_ctx)
.def_readwrite("initial_prompt", &whisper_full_params::initial_prompt)
.def_readwrite("prompt_tokens", &whisper_full_params::prompt_tokens)
Expand Down
2 changes: 1 addition & 1 deletion whisper.cpp
Submodule whisper.cpp updated 466 files

0 comments on commit 4ddfa8c

Please sign in to comment.