diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 000000000..116c85937 --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,37 @@ +name: Release + +on: + push: + branches: + - main +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-ecosystem/action-regex-match@v2 + id: regex-match + with: + text: ${{ github.event.head_commit.message }} + regex: '^Release ([^ ]+)' + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.8' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Release + if: ${{ steps.regex-match.outputs.match != '' }} + uses: softprops/action-gh-release@v1 + with: + tag_name: v${{ steps.regex-match.outputs.group1 }} + - name: Build and publish + if: ${{ steps.regex-match.outputs.match != '' }} + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} + run: | + python setup.py sdist + twine upload dist/* diff --git a/MANIFEST.in b/MANIFEST.in index 838aa2d9c..f091bded7 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,6 @@ +include requirements.txt +include README.md +include LICENSE include whisper/assets/* include whisper/assets/gpt2/* include whisper/assets/multilingual/* diff --git a/README.md b/README.md index 281561b42..9d39f8202 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [[Blog]](https://openai.com/blog/whisper) [[Paper]](https://arxiv.org/abs/2212.04356) -[[Model card]](model-card.md) +[[Model card]](https://github.com/openai/whisper/blob/main/model-card.md) [[Colab example]](https://colab.research.google.com/github/openai/whisper/blob/master/notebooks/LibriSpeech.ipynb) Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse audio and is also a multi-task model that can perform multilingual speech recognition as well as speech translation and language identification. @@ -10,14 +10,14 @@ Whisper is a general-purpose speech recognition model. It is trained on a large ## Approach -![Approach](approach.png) +![Approach](https://raw.githubusercontent.com/openai/whisper/main/approach.png) A Transformer sequence-to-sequence model is trained on various speech processing tasks, including multilingual speech recognition, speech translation, spoken language identification, and voice activity detection. All of these tasks are jointly represented as a sequence of tokens to be predicted by the decoder, allowing for a single model to replace many different stages of a traditional speech processing pipeline. The multitask training format uses a set of special tokens that serve as task specifiers or classification targets. ## Setup -We used Python 3.9.9 and [PyTorch](https://pytorch.org/) 1.10.1 to train and test our models, but the codebase is expected to be compatible with Python 3.7 or later and recent PyTorch versions. The codebase also depends on a few Python packages, most notably [HuggingFace Transformers](https://huggingface.co/docs/transformers/index) for their fast tokenizer implementation and [ffmpeg-python](https://github.com/kkroening/ffmpeg-python) for reading audio files. The following command will pull and install the latest commit from this repository, along with its Python dependencies +We used Python 3.9.9 and [PyTorch](https://pytorch.org/) 1.10.1 to train and test our models, but the codebase is expected to be compatible with Python 3.7 or later and recent PyTorch versions. The codebase also depends on a few Python packages, most notably [HuggingFace Transformers](https://huggingface.co/docs/transformers/index) for their fast tokenizer implementation and [ffmpeg-python](https://github.com/kkroening/ffmpeg-python) for reading audio files. The following command will pull and install the latest commit from this repository, along with its Python dependencies: pip install git+https://github.com/openai/whisper.git @@ -68,7 +68,7 @@ For English-only applications, the `.en` models tend to perform better, especial Whisper's performance varies widely depending on the language. The figure below shows a WER (Word Error Rate) breakdown by languages of Fleurs dataset, using the `large-v2` model. More WER and BLEU scores corresponding to the other models and datasets can be found in Appendix D in [the paper](https://arxiv.org/abs/2212.04356). The smaller is better. -![WER breakdown by language](language-breakdown.svg) +![WER breakdown by language](https://raw.githubusercontent.com/openai/whisper/main/language-breakdown.svg) @@ -90,7 +90,7 @@ Run the following to view all available options: whisper --help -See [tokenizer.py](whisper/tokenizer.py) for the list of all available languages. +See [tokenizer.py](https://github.com/openai/whisper/blob/main/whisper/tokenizer.py) for the list of all available languages. ## Python usage @@ -140,4 +140,4 @@ Please use the [🙌 Show and tell](https://github.com/openai/whisper/discussion ## License -The code and the model weights of Whisper are released under the MIT License. See [LICENSE](LICENSE) for further details. +The code and the model weights of Whisper are released under the MIT License. See [LICENSE](https://github.com/openai/whisper/blob/main/LICENSE) for further details. diff --git a/setup.py b/setup.py index 13f341ea6..0e822ab9e 100644 --- a/setup.py +++ b/setup.py @@ -3,11 +3,19 @@ import pkg_resources from setuptools import setup, find_packages + +def read_version(fname="whisper/version.py"): + exec(compile(open(fname, encoding="utf-8").read(), fname, "exec")) + return locals()["__version__"] + + setup( - name="whisper", + name="openai-whisper", py_modules=["whisper"], - version="1.0", + version=read_version(), description="Robust Speech Recognition via Large-Scale Weak Supervision", + long_description=open("README.md", encoding="utf-8").read(), + long_description_content_type="text/markdown", readme="README.md", python_requires=">=3.7", author="OpenAI", @@ -20,9 +28,9 @@ open(os.path.join(os.path.dirname(__file__), "requirements.txt")) ) ], - entry_points = { - 'console_scripts': ['whisper=whisper.transcribe:cli'], + entry_points={ + "console_scripts": ["whisper=whisper.transcribe:cli"], }, include_package_data=True, - extras_require={'dev': ['pytest']}, + extras_require={"dev": ["pytest"]}, ) diff --git a/whisper/__init__.py b/whisper/__init__.py index f80fe721b..2a1fb4ec6 100644 --- a/whisper/__init__.py +++ b/whisper/__init__.py @@ -12,6 +12,7 @@ from .decoding import DecodingOptions, DecodingResult, decode, detect_language from .model import Whisper, ModelDimensions from .transcribe import transcribe +from .version import __version__ _MODELS = { diff --git a/whisper/version.py b/whisper/version.py new file mode 100644 index 000000000..8c605f4d5 --- /dev/null +++ b/whisper/version.py @@ -0,0 +1 @@ +__version__ = "20230117"