From d37773e18241a90a9bd9d8249927c9d77676fbf3 Mon Sep 17 00:00:00 2001 From: Yuta Hayashibe Date: Thu, 29 Sep 2022 15:39:42 +0900 Subject: [PATCH 1/2] Fix typos --- README.md | 4 ++-- docs/index.md | 2 +- ginza/compound_splitter.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 3797772..913a110 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ If you need to install `ja_ginza_electra` along with `pytorch_model.bin` at the $ pip install -U ginza https://github.com/megagonlabs/ginza/releases/download/latest/ja_ginza_electra-latest-with-model.tar.gz ``` -If you hope to accelarate the transformers-based models by using GPUs with CUDA support, you can install `spacy` by specifying the CUDA version as follows: +If you hope to accelerate the transformers-based models by using GPUs with CUDA support, you can install `spacy` by specifying the CUDA version as follows: ```console pip install -U "spacy[cuda110]" ``` @@ -287,7 +287,7 @@ Please read the official documents to compile user dictionaries with `sudachipy` - Important changes - Upgrade spaCy to v3 - Release transformer-based `ja-ginza-electra` model - - Improve UPOS accuracy of the standard `ja-ginza` model by adding `morphologizer` to the tail of spaCy pipleline + - Improve UPOS accuracy of the standard `ja-ginza` model by adding `morphologizer` to the tail of spaCy pipeline - Need to insrtall analysis model along with `ginza` package - High accuracy model (>=16GB memory needed) - `pip install -U ginza ja-ginza-electra` diff --git a/docs/index.md b/docs/index.md index 3a59c5d..942acb6 100644 --- a/docs/index.md +++ b/docs/index.md @@ -315,7 +315,7 @@ Contains information from mC4 which is made available under the ODC Attribution - 重要な変更 - プラットフォームをspaCy v3に変更 - transformersモデルを採用して飛躍的に精度を向上した解析モデルパッケージ`ja-ginza-electra`をリリースしました。 - - 従来型の解析モデルパッケージ`ja-ginza`のpiplelineに`morphologizer`を追加し、UD品詞解析精度を向上しました。 + - 従来型の解析モデルパッケージ`ja-ginza`のpipelineに`morphologizer`を追加し、UD品詞解析精度を向上しました。 - transformersモデルの追加に伴いGiNZA v5インストール時は`ginza`パッケージとともに解析モデルパッケージを明示的に指定する必要があります - 解析精度重視モデル (メモリ容量16GB以上を推奨) - `pip install -U ginza ja-ginza-electra` diff --git a/ginza/compound_splitter.py b/ginza/compound_splitter.py index aa3e8a9..a474a35 100644 --- a/ginza/compound_splitter.py +++ b/ginza/compound_splitter.py @@ -127,7 +127,7 @@ def morph(dtoken): print(list(enumerate(doc.user_data["sub_tokens"])), file=sys.stderr) raise e - # work-around: retokenize() does not consider the head of the splitted tokens + # work-around: retokenize() does not consider the head of the split tokens if not compounds: for t in doc: if t.i < token_i or token_i + len(sub_tokens) <= t.i: From 63d412b7cbf05617d02b4517a18736211f971af9 Mon Sep 17 00:00:00 2001 From: Yuta Hayashibe Date: Thu, 29 Sep 2022 15:41:38 +0900 Subject: [PATCH 2/2] Add a typo checker --- .github/workflows/typos.yml | 21 +++++++++++++++++++++ _typos.toml | 9 +++++++++ 2 files changed, 30 insertions(+) create mode 100644 .github/workflows/typos.yml create mode 100644 _typos.toml diff --git a/.github/workflows/typos.yml b/.github/workflows/typos.yml new file mode 100644 index 0000000..1efee47 --- /dev/null +++ b/.github/workflows/typos.yml @@ -0,0 +1,21 @@ +--- +# yamllint disable rule:line-length +name: Typos + +on: # yamllint disable-line rule:truthy + push: + pull_request: + types: + - opened + - synchronize + - reopened + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: typos-action + uses: crate-ci/typos@v1.12.7 diff --git a/_typos.toml b/_typos.toml new file mode 100644 index 0000000..8c646be --- /dev/null +++ b/_typos.toml @@ -0,0 +1,9 @@ +# Files for typos +# Instruction: https://github.com/marketplace/actions/typos-action#getting-started + +[default.extend-identifiers] + +[default.extend-words] + +[files] +extend-exclude = ["requirements.txt"]