Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/v4' into store-activations
Browse files Browse the repository at this point in the history
  • Loading branch information
danieldk committed Aug 1, 2022
2 parents 403b1f1 + e581eea commit 288d27e
Show file tree
Hide file tree
Showing 77 changed files with 1,472 additions and 676 deletions.
37 changes: 18 additions & 19 deletions .github/azure-steps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ steps:
- script: python -m mypy spacy
displayName: 'Run mypy'
condition: ne(variables['python_version'], '3.10')

- task: DeleteFiles@1
inputs:
Expand All @@ -41,7 +40,7 @@ steps:
- bash: |
${{ parameters.prefix }} SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
${{ parameters.prefix }} python -m pip install dist/$SDIST
${{ parameters.prefix }} SPACY_NUM_BUILD_JOBS=2 python -m pip install dist/$SDIST
displayName: "Install from sdist"
- script: |
Expand All @@ -64,12 +63,12 @@ steps:
displayName: "Run GPU tests"
condition: eq(${{ parameters.gpu }}, true)
# - script: |
# python -m spacy download ca_core_news_sm
# python -m spacy download ca_core_news_md
# python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')"
# displayName: 'Test download CLI'
# condition: eq(variables['python_version'], '3.8')
- script: |
python -m spacy download ca_core_news_sm
python -m spacy download ca_core_news_md
python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')"
displayName: 'Test download CLI'
condition: eq(variables['python_version'], '3.8')
- script: |
python -m spacy convert extra/example_data/ner_example_data/ner-token-per-line-conll2003.json .
Expand All @@ -93,17 +92,17 @@ steps:
displayName: 'Test train CLI'
condition: eq(variables['python_version'], '3.8')
# - script: |
# python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
# PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
# displayName: 'Test assemble CLI'
# condition: eq(variables['python_version'], '3.8')
#
# - script: |
# python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')"
# python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113
# displayName: 'Test assemble CLI vectors warning'
# condition: eq(variables['python_version'], '3.8')
- script: |
python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
displayName: 'Test assemble CLI'
condition: eq(variables['python_version'], '3.8')
- script: |
python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')"
python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113
displayName: 'Test assemble CLI vectors warning'
condition: eq(variables['python_version'], '3.8')
- script: |
python .github/validate_universe_json.py website/meta/universe.json
Expand Down
67 changes: 67 additions & 0 deletions .github/spacy_universe_alert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import os
import sys
import json
from datetime import datetime

from slack_sdk.web.client import WebClient

CHANNEL = "#alerts-universe"
SLACK_TOKEN = os.environ.get("SLACK_BOT_TOKEN", "ENV VAR not available!")
DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ"

client = WebClient(SLACK_TOKEN)
github_context = json.loads(sys.argv[1])

event = github_context['event']
pr_title = event['pull_request']["title"]
pr_link = event['pull_request']["patch_url"].replace(".patch", "")
pr_author_url = event['sender']["html_url"]
pr_author_name = pr_author_url.rsplit('/')[-1]
pr_created_at_dt = datetime.strptime(
event['pull_request']["created_at"],
DATETIME_FORMAT
)
pr_created_at = pr_created_at_dt.strftime("%c")
pr_updated_at_dt = datetime.strptime(
event['pull_request']["updated_at"],
DATETIME_FORMAT
)
pr_updated_at = pr_updated_at_dt.strftime("%c")

blocks = [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "📣 New spaCy Universe Project Alert ✨"
}
},
{
"type": "section",
"fields": [
{
"type": "mrkdwn",
"text": f"*Pull Request:*\n<{pr_link}|{pr_title}>"
},
{
"type": "mrkdwn",
"text": f"*Author:*\n<{pr_author_url}|{pr_author_name}>"
},
{
"type": "mrkdwn",
"text": f"*Created at:*\n {pr_created_at}"
},
{
"type": "mrkdwn",
"text": f"*Last Updated:*\n {pr_updated_at}"
}
]
}
]


client.chat_postMessage(
channel=CHANNEL,
text="spaCy universe project PR alert",
blocks=blocks
)
30 changes: 30 additions & 0 deletions .github/workflows/spacy_universe_alert.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: spaCy universe project alert

on:
pull_request_target:
paths:
- "website/meta/universe.json"

jobs:
build:
runs-on: ubuntu-latest

steps:
- name: Dump GitHub context
env:
GITHUB_CONTEXT: ${{ toJson(github) }}
PR_NUMBER: ${{github.event.number}}
run: |
echo "$GITHUB_CONTEXT"
- uses: actions/checkout@v1
- uses: actions/setup-python@v1
- name: Install Bernadette app dependency and send an alert
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
GITHUB_CONTEXT: ${{ toJson(github) }}
CHANNEL: "#alerts-universe"
run: |
pip install slack-sdk==3.17.2 aiohttp==3.8.1
echo "$CHANNEL"
python .github/spacy_universe_alert.py "$GITHUB_CONTEXT"
3 changes: 2 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,8 @@ except: # noqa: E722

### Python conventions

All Python code must be written **compatible with Python 3.6+**.
All Python code must be written **compatible with Python 3.6+**. More detailed
code conventions can be found in the [developer docs](https://github.com/explosion/spaCy/blob/master/extra/DEVELOPER_DOCS/Code%20Conventions.md).

#### I/O and handling paths

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ production-ready [**training system**](https://spacy.io/usage/training) and easy
model packaging, deployment and workflow management. spaCy is commercial
open-source software, released under the MIT license.

💫 **Version 3.3.1 out now!**
💫 **Version 3.4.0 out now!**
[Check out the release notes here.](https://github.com/explosion/spaCy/releases)

[![Azure Pipelines](https://img.shields.io/azure-devops/build/explosion-ai/public/8/master.svg?logo=azure-pipelines&style=flat-square&label=build)](https://dev.azure.com/explosion-ai/public/_build?definitionId=8)
Expand Down
6 changes: 4 additions & 2 deletions build-constraints.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# build version constraints for use with wheelwright + multibuild
numpy==1.15.0; python_version<='3.7'
numpy==1.17.3; python_version=='3.8'
numpy==1.15.0; python_version<='3.7' and platform_machine!='aarch64'
numpy==1.19.2; python_version<='3.7' and platform_machine=='aarch64'
numpy==1.17.3; python_version=='3.8' and platform_machine!='aarch64'
numpy==1.19.2; python_version=='3.8' and platform_machine=='aarch64'
numpy==1.19.3; python_version=='3.9'
numpy==1.21.3; python_version=='3.10'
numpy; python_version>='3.11'
44 changes: 32 additions & 12 deletions extra/DEVELOPER_DOCS/ExplosionBot.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,41 @@ To summon the robot, write a github comment on the issue/PR you wish to test. Th

Some things to note:

* The `@explosion-bot please` must be the beginning of the command - you cannot add anything in front of this or else the robot won't know how to parse it. Adding anything at the end aside from the test name will also confuse the robot, so keep it simple!
* The command name (such as `test_gpu`) must be one of the tests that the bot knows how to run. The available commands are documented in the bot's [workflow config](https://github.com/explosion/spaCy/blob/master/.github/workflows/explosionbot.yml#L26) and must match exactly one of the commands listed there.
* The robot can't do multiple things at once, so if you want it to run multiple tests, you'll have to summon it with one comment per test.
* For the `test_gpu` command, you can specify an optional thinc branch (from the spaCy repo) or a spaCy branch (from the thinc repo) with either the `--thinc-branch` or `--spacy-branch` flags. By default, the bot will pull in the PR branch from the repo where the command was issued, and the main branch of the other repository. However, if you need to run against another branch, you can say (for example):
- The `@explosion-bot please` must be the beginning of the command - you cannot add anything in front of this or else the robot won't know how to parse it. Adding anything at the end aside from the test name will also confuse the robot, so keep it simple!
- The command name (such as `test_gpu`) must be one of the tests that the bot knows how to run. The available commands are documented in the bot's [workflow config](https://github.com/explosion/spaCy/blob/master/.github/workflows/explosionbot.yml#L26) and must match exactly one of the commands listed there.
- The robot can't do multiple things at once, so if you want it to run multiple tests, you'll have to summon it with one comment per test.

```
@explosion-bot please test_gpu --thinc-branch develop
```
You can also specify a branch from an unmerged PR:
```
@explosion-bot please test_gpu --thinc-branch refs/pull/633/head
```
### Examples

- Execute spaCy slow GPU tests with a custom thinc branch from a spaCy PR:

```
@explosion-bot please test_slow_gpu --thinc-branch <branch_name>
```

`branch_name` can either be a named branch, e.g: `develop`, or an unmerged PR, e.g: `refs/pull/<pr_number>/head`.

- Execute spaCy Transformers GPU tests from a spaCy PR:

```
@explosion-bot please test_gpu --run-on spacy-transformers --run-on-branch master --spacy-branch current_pr
```

This will launch the GPU pipeline for the `spacy-transformers` repo on its `master` branch, using the current spaCy PR's branch to build spaCy. The name of the repository passed to `--run-on` is case-sensitive, e.g: use `spaCy` instead of `spacy`.

- General info about supported commands.

```
@explosion-bot please info
```

- Help text for a specific command
```
@explosion-bot please <command> --help
```

## Troubleshooting

If the robot isn't responding to commands as expected, you can check its logs in the [Github Action](https://github.com/explosion/spaCy/actions/workflows/explosionbot.yml).
If the robot isn't responding to commands as expected, you can check its logs in the [Github Action](https://github.com/explosion/spaCy/actions/workflows/explosionbot.yml).

For each command sent to the bot, there should be a run of the `explosion-bot` workflow. In the `Install and run explosion-bot` step, towards the ends of the logs you should see info about the configuration that the bot was run with, as well as any errors that the bot encountered.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ requires = [
"cymem>=2.0.2,<2.1.0",
"preshed>=3.0.2,<3.1.0",
"murmurhash>=0.28.0,<1.1.0",
"thinc>=8.1.0.dev3,<8.2.0",
"thinc>=8.1.0,<8.2.0",
"pathy",
"numpy>=1.15.0",
]
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ spacy-legacy>=3.0.9,<3.1.0
spacy-loggers>=1.0.0,<2.0.0
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
thinc>=8.1.0.dev3,<8.2.0
thinc>=8.1.0,<8.2.0
ml_datasets>=0.2.0,<0.3.0
murmurhash>=0.28.0,<1.1.0
wasabi>=0.9.1,<1.1.0
Expand All @@ -30,7 +30,7 @@ pytest-timeout>=1.3.0,<2.0.0
mock>=2.0.0,<3.0.0
flake8>=3.8.0,<3.10.0
hypothesis>=3.27.0,<7.0.0
mypy>=0.910,<=0.960
mypy>=0.910,<0.970; platform_machine!='aarch64'
types-dataclasses>=0.1.3; python_version < "3.7"
types-mock>=0.1.1
types-requests
Expand Down
10 changes: 7 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,15 @@ setup_requires =
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
murmurhash>=0.28.0,<1.1.0
thinc>=8.1.0.dev3,<8.2.0
thinc>=8.1.0,<8.2.0
install_requires =
# Our libraries
spacy-legacy>=3.0.9,<3.1.0
spacy-loggers>=1.0.0,<2.0.0
murmurhash>=0.28.0,<1.1.0
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
thinc>=8.1.0.dev3,<8.2.0
thinc>=8.1.0,<8.2.0
wasabi>=0.9.1,<1.1.0
srsly>=2.4.3,<3.0.0
catalogue>=2.0.6,<2.1.0
Expand Down Expand Up @@ -103,14 +103,18 @@ cuda114 =
cupy-cuda114>=5.0.0b4,<11.0.0
cuda115 =
cupy-cuda115>=5.0.0b4,<11.0.0
cuda116 =
cupy-cuda116>=5.0.0b4,<11.0.0
cuda117 =
cupy-cuda117>=5.0.0b4,<11.0.0
apple =
thinc-apple-ops>=0.1.0.dev0,<1.0.0
# Language tokenizers with external dependencies
ja =
sudachipy>=0.5.2,!=0.6.1
sudachidict_core>=20211220
ko =
natto-py==0.9.0
natto-py>=0.9.0
th =
pythainlp>=2.0

Expand Down
8 changes: 7 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ def build_options(self):

class build_ext_subclass(build_ext, build_ext_options):
def build_extensions(self):
if self.parallel is None and os.environ.get("SPACY_NUM_BUILD_JOBS") is not None:
self.parallel = int(os.environ.get("SPACY_NUM_BUILD_JOBS"))
build_ext_options.build_options(self)
build_ext.build_extensions(self)

Expand Down Expand Up @@ -206,7 +208,11 @@ def setup_package():
for name in MOD_NAMES:
mod_path = name.replace(".", "/") + ".pyx"
ext = Extension(
name, [mod_path], language="c++", include_dirs=include_dirs, extra_compile_args=["-std=c++11"]
name,
[mod_path],
language="c++",
include_dirs=include_dirs,
extra_compile_args=["-std=c++11"],
)
ext_modules.append(ext)
print("Cythonizing sources")
Expand Down
2 changes: 1 addition & 1 deletion spacy/about.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# fmt: off
__title__ = "spacy"
__version__ = "3.4.0"
__version__ = "3.4.1"
__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
__projects__ = "https://github.com/explosion/projects"
Expand Down
17 changes: 17 additions & 0 deletions spacy/cli/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,23 @@ def git_sparse_checkout(repo, subpath, dest, branch):
shutil.move(str(source_path), str(dest))


def git_repo_branch_exists(repo: str, branch: str) -> bool:
"""Uses 'git ls-remote' to check if a repository and branch exists
repo (str): URL to get repo.
branch (str): Branch on repo to check.
RETURNS (bool): True if repo:branch exists.
"""
get_git_version()
cmd = f"git ls-remote {repo} {branch}"
# We might be tempted to use `--exit-code` with `git ls-remote`, but
# `run_command` handles the `returncode` for us, so we'll rely on
# the fact that stdout returns '' if the requested branch doesn't exist
ret = run_command(cmd, capture=True)
exists = ret.stdout != ""
return exists


def get_git_version(
error: str = "Could not run 'git'. Make sure it's installed and the executable is available.",
) -> Tuple[int, int]:
Expand Down
2 changes: 1 addition & 1 deletion spacy/cli/pretrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def pretrain_cli(
# TODO: What's the solution here? How do we handle optional blocks?
msg.fail("The [pretraining] block in your config is empty", exits=1)
if not output_dir.exists():
output_dir.mkdir()
output_dir.mkdir(parents=True)
msg.good(f"Created output directory: {output_dir}")
# Save non-interpolated config
raw_config.to_disk(output_dir / "config.cfg")
Expand Down
Loading

0 comments on commit 288d27e

Please sign in to comment.