Skip to content

Commit

Permalink
Merge branch 'master' into feat/update_v4
Browse files Browse the repository at this point in the history
  • Loading branch information
svlandeg committed May 14, 2024
2 parents 287deee + c195ca4 commit c27679f
Show file tree
Hide file tree
Showing 47 changed files with 1,115 additions and 758 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/explosionbot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
env:
GITHUB_CONTEXT: ${{ toJson(github) }}
run: echo "$GITHUB_CONTEXT"
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
- name: Install and run explosion-bot
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/gputests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
strategy:
fail-fast: false
matrix:
branch: [master, main]
branch: [master, v4]
if: github.repository_owner == 'explosion'
runs-on: ubuntu-latest
steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/lock.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
if: github.repository_owner == 'explosion'
runs-on: ubuntu-latest
steps:
- uses: dessant/lock-threads@v4
- uses: dessant/lock-threads@v5
with:
process-only: 'issues'
issue-inactive-days: '30'
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/slowtests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ jobs:
strategy:
fail-fast: false
matrix:
branch: [master, main]
branch: [master, v4]
if: github.repository_owner == 'explosion'
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
ref: ${{ matrix.branch }}
- name: Get commits from past 24 hours
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/spacy_universe_alert.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
run: |
echo "$GITHUB_CONTEXT"
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: '3.10'
Expand Down
6 changes: 2 additions & 4 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,12 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Check out repo
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Configure Python version
uses: actions/setup-python@v4
with:
python-version: "3.9"
architecture: x64

- name: black
run: |
Expand Down Expand Up @@ -71,13 +70,12 @@ jobs:

steps:
- name: Check out repo
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Configure Python version
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python_version }}
architecture: x64

- name: Install dependencies
run: |
Expand Down
3 changes: 1 addition & 2 deletions .github/workflows/universe_validation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,12 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Check out repo
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Configure Python version
uses: actions/setup-python@v4
with:
python-version: "3.9"
architecture: x64

- name: Validate website/meta/universe.json
run: |
Expand Down
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
The MIT License (MIT)

Copyright (C) 2016-2023 ExplosionAI GmbH, 2016 spaCy GmbH, 2015 Matthew Honnibal
Copyright (C) 2016-2024 ExplosionAI GmbH, 2016 spaCy GmbH, 2015 Matthew Honnibal

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
5 changes: 2 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@ murmurhash>=0.28.0,<1.1.0
wasabi>=0.9.1,<1.2.0
srsly>=2.4.3,<3.0.0
catalogue>=2.0.6,<2.1.0
typer>=0.3.0,<0.10.0
smart-open>=5.2.1,<7.0.0
weasel>=0.1.0,<0.4.0
typer>=0.3.0,<1.0.0
weasel>=0.1.0,<0.5.0
# Third party dependencies
numpy>=1.15.0; python_version < "3.9"
numpy>=1.19.0; python_version >= "3.9"
Expand Down
5 changes: 2 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,9 @@ install_requires =
wasabi>=0.9.1,<1.2.0
srsly>=2.4.3,<3.0.0
catalogue>=2.0.6,<2.1.0
weasel>=0.1.0,<0.4.0
weasel>=0.1.0,<0.5.0
# Third-party dependencies
typer>=0.3.0,<0.10.0
smart-open>=5.2.1,<7.0.0
typer>=0.3.0,<1.0.0
tqdm>=4.38.0,<5.0.0
numpy>=1.15.0; python_version < "3.9"
numpy>=1.19.0; python_version >= "3.9"
Expand Down
2 changes: 2 additions & 0 deletions spacy/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from wasabi import msg

# Needed for testing
from . import download as download_module # noqa: F401
from ._util import app, setup_cli # noqa: F401
from .apply import apply # noqa: F401
from .assemble import assemble_cli # noqa: F401
Expand Down
19 changes: 18 additions & 1 deletion spacy/cli/download.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import sys
from typing import Optional, Sequence
from urllib.parse import urljoin

import requests
import typer
Expand Down Expand Up @@ -64,6 +65,13 @@ def download(
)
pip_args = pip_args + ("--no-deps",)
if direct:
# Reject model names with '/', in order to prevent shenanigans.
if "/" in model:
msg.fail(
title="Model download rejected",
text=f"Cannot download model '{model}'. Models are expected to be file names, not URLs or fragments",
exits=True,
)
components = model.split("-")
model_name = "".join(components[:-1])
version = components[-1]
Expand Down Expand Up @@ -156,7 +164,16 @@ def get_latest_version(model: str) -> str:
def download_model(
filename: str, user_pip_args: Optional[Sequence[str]] = None
) -> None:
download_url = about.__download_url__ + "/" + filename
# Construct the download URL carefully. We need to make sure we don't
# allow relative paths or other shenanigans to trick us into download
# from outside our own repo.
base_url = about.__download_url__
# urljoin requires that the path ends with /, or the last path part will be dropped
if not base_url.endswith("/"):
base_url = about.__download_url__ + "/"
download_url = urljoin(base_url, filename)
if not download_url.startswith(about.__download_url__):
raise ValueError(f"Download from {filename} rejected. Was it a relative path?")
pip_args = list(user_pip_args) if user_pip_args is not None else []
cmd = [sys.executable, "-m", "pip", "install"] + pip_args + [download_url]
run_command(cmd)
4 changes: 2 additions & 2 deletions spacy/cli/find_threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def find_threshold_cli(
# fmt: on
):
"""
Runs prediction trials for a trained model with varying tresholds to maximize
Runs prediction trials for a trained model with varying thresholds to maximize
the specified metric. The search space for the threshold is traversed linearly
from 0 to 1 in `n_trials` steps. Results are displayed in a table on `stdout`
(the corresponding API call to `spacy.cli.find_threshold.find_threshold()`
Expand Down Expand Up @@ -81,7 +81,7 @@ def find_threshold(
silent: bool = True,
) -> Tuple[float, float, Dict[float, float]]:
"""
Runs prediction trials for models with varying tresholds to maximize the specified metric.
Runs prediction trials for models with varying thresholds to maximize the specified metric.
model (Union[str, Path]): Pipeline to evaluate. Can be a package or a path to a data directory.
data_path (Path): Path to file with DocBin with docs to use for threshold search.
pipe_name (str): Name of pipe to examine thresholds for.
Expand Down
1 change: 1 addition & 0 deletions spacy/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ class Warnings(metaclass=ErrorsWithCodes):
"key attribute for vectors, configure it through Vectors(attr=) or "
"'spacy init vectors --attr'")
W126 = ("These keys are unsupported: {unsupported}")
W127 = ("Not all `Language.pipe` worker processes completed successfully")

# v4 warning strings
W401 = ("`incl_prior is True`, but the selected knowledge base type {kb_type} doesn't support prior probability "
Expand Down
5 changes: 5 additions & 0 deletions spacy/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -1844,6 +1844,9 @@ def prepare_input(
for proc in procs:
proc.join()

if not all(proc.exitcode == 0 for proc in procs):
warnings.warn(Warnings.W127)

def _link_components(self) -> None:
"""Register 'listeners' within pipeline components, to allow them to
effectively share weights.
Expand Down Expand Up @@ -2467,6 +2470,7 @@ def _apply_pipes(
if isinstance(texts_with_ctx, _WorkDoneSentinel):
sender.close()
receiver.close()
return

docs = (
ensure_doc(doc_like, context) for doc_like, context in texts_with_ctx
Expand All @@ -2492,6 +2496,7 @@ def _apply_pipes(
# stop processing.
sender.close()
receiver.close()
return


class _Sender:
Expand Down
Loading

0 comments on commit c27679f

Please sign in to comment.