diff --git a/pyproject.toml b/pyproject.toml
index 317c5fdbeaa..7abd7a96f3a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,6 @@ requires = [
"preshed>=3.0.2,<3.1.0",
"murmurhash>=0.28.0,<1.1.0",
"thinc>=8.1.0,<8.2.0",
- "pathy",
"numpy>=1.15.0",
]
build-backend = "setuptools.build_meta"
diff --git a/requirements.txt b/requirements.txt
index 437dd415a93..3e8501b2f3c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
# Our libraries
-spacy-legacy>=3.0.9,<3.1.0
+spacy-legacy>=3.0.10,<3.1.0
spacy-loggers>=1.0.0,<2.0.0
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
@@ -34,4 +34,5 @@ mypy>=0.910,<0.970; platform_machine!='aarch64'
types-dataclasses>=0.1.3; python_version < "3.7"
types-mock>=0.1.1
types-requests
+types-setuptools>=57.0.0
black>=22.0,<23.0
diff --git a/setup.cfg b/setup.cfg
index 708300b0450..5fd820a96bc 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -41,7 +41,7 @@ setup_requires =
thinc>=8.1.0,<8.2.0
install_requires =
# Our libraries
- spacy-legacy>=3.0.9,<3.1.0
+ spacy-legacy>=3.0.10,<3.1.0
spacy-loggers>=1.0.0,<2.0.0
murmurhash>=0.28.0,<1.1.0
cymem>=2.0.2,<2.1.0
@@ -50,9 +50,9 @@ install_requires =
wasabi>=0.9.1,<1.1.0
srsly>=2.4.3,<3.0.0
catalogue>=2.0.6,<2.1.0
+ # Third-party dependencies
typer>=0.3.0,<0.5.0
pathy>=0.3.5
- # Third-party dependencies
tqdm>=4.38.0,<5.0.0
numpy>=1.15.0
requests>=2.13.0,<3.0.0
diff --git a/spacy/__init__.py b/spacy/__init__.py
index 069215fda77..d60f46b96a1 100644
--- a/spacy/__init__.py
+++ b/spacy/__init__.py
@@ -31,21 +31,21 @@ def load(
name: Union[str, Path],
*,
vocab: Union[Vocab, bool] = True,
- disable: Iterable[str] = util.SimpleFrozenList(),
- enable: Iterable[str] = util.SimpleFrozenList(),
- exclude: Iterable[str] = util.SimpleFrozenList(),
+ disable: Union[str, Iterable[str]] = util.SimpleFrozenList(),
+ enable: Union[str, Iterable[str]] = util.SimpleFrozenList(),
+ exclude: Union[str, Iterable[str]] = util.SimpleFrozenList(),
config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
) -> Language:
"""Load a spaCy model from an installed package or a local path.
name (str): Package name or model path.
vocab (Vocab): A Vocab object. If True, a vocab is created.
- disable (Iterable[str]): Names of pipeline components to disable. Disabled
+ disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
pipes will be loaded but they won't be run unless you explicitly
enable them by calling nlp.enable_pipe.
- enable (Iterable[str]): Names of pipeline components to enable. All other
+ enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
pipes will be disabled (but can be enabled later using nlp.enable_pipe).
- exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
+ exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
components won't be loaded.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
diff --git a/spacy/cli/download.py b/spacy/cli/download.py
index b7de8872975..0c9a32b933c 100644
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@@ -20,7 +20,7 @@ def download_cli(
ctx: typer.Context,
model: str = Arg(..., help="Name of pipeline package to download"),
direct: bool = Opt(False, "--direct", "-d", "-D", help="Force direct download of name + version"),
- sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel")
+ sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel"),
# fmt: on
):
"""
@@ -36,7 +36,12 @@ def download_cli(
download(model, direct, sdist, *ctx.args)
-def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) -> None:
+def download(
+ model: str,
+ direct: bool = False,
+ sdist: bool = False,
+ *pip_args,
+) -> None:
if (
not (is_package("spacy") or is_package("spacy-nightly"))
and "--no-deps" not in pip_args
@@ -50,13 +55,10 @@ def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) -
"dependencies, you'll have to install them manually."
)
pip_args = pip_args + ("--no-deps",)
- suffix = SDIST_SUFFIX if sdist else WHEEL_SUFFIX
- dl_tpl = "{m}-{v}/{m}-{v}{s}#egg={m}=={v}"
if direct:
components = model.split("-")
model_name = "".join(components[:-1])
version = components[-1]
- download_model(dl_tpl.format(m=model_name, v=version, s=suffix), pip_args)
else:
model_name = model
if model in OLD_MODEL_SHORTCUTS:
@@ -67,13 +69,26 @@ def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) -
model_name = OLD_MODEL_SHORTCUTS[model]
compatibility = get_compatibility()
version = get_version(model_name, compatibility)
- download_model(dl_tpl.format(m=model_name, v=version, s=suffix), pip_args)
+
+ filename = get_model_filename(model_name, version, sdist)
+
+ download_model(filename, pip_args)
msg.good(
"Download and installation successful",
f"You can now load the package via spacy.load('{model_name}')",
)
+def get_model_filename(model_name: str, version: str, sdist: bool = False) -> str:
+ dl_tpl = "{m}-{v}/{m}-{v}{s}"
+ egg_tpl = "#egg={m}=={v}"
+ suffix = SDIST_SUFFIX if sdist else WHEEL_SUFFIX
+ filename = dl_tpl.format(m=model_name, v=version, s=suffix)
+ if sdist:
+ filename += egg_tpl.format(m=model_name, v=version)
+ return filename
+
+
def get_compatibility() -> dict:
if is_prerelease_version(about.__version__):
version: Optional[str] = about.__version__
@@ -105,6 +120,11 @@ def get_version(model: str, comp: dict) -> str:
return comp[model][0]
+def get_latest_version(model: str) -> str:
+ comp = get_compatibility()
+ return get_version(model, comp)
+
+
def download_model(
filename: str, user_pip_args: Optional[Sequence[str]] = None
) -> None:
diff --git a/spacy/cli/info.py b/spacy/cli/info.py
index e6a1cb616b7..e6ac4270f5e 100644
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@@ -1,10 +1,13 @@
from typing import Optional, Dict, Any, Union, List
import platform
+import pkg_resources
+import json
from pathlib import Path
from wasabi import Printer, MarkdownRenderer
import srsly
from ._util import app, Arg, Opt, string_to_list
+from .download import get_model_filename, get_latest_version
from .. import util
from .. import about
@@ -16,6 +19,7 @@ def info_cli(
markdown: bool = Opt(False, "--markdown", "-md", help="Generate Markdown for GitHub issues"),
silent: bool = Opt(False, "--silent", "-s", "-S", help="Don't print anything (just return)"),
exclude: str = Opt("labels", "--exclude", "-e", help="Comma-separated keys to exclude from the print-out"),
+ url: bool = Opt(False, "--url", "-u", help="Print the URL to download the most recent compatible version of the pipeline"),
# fmt: on
):
"""
@@ -23,10 +27,19 @@ def info_cli(
print its meta information. Flag --markdown prints details in Markdown for easy
copy-pasting to GitHub issues.
+ Flag --url prints only the download URL of the most recent compatible
+ version of the pipeline.
+
DOCS: https://spacy.io/api/cli#info
"""
exclude = string_to_list(exclude)
- info(model, markdown=markdown, silent=silent, exclude=exclude)
+ info(
+ model,
+ markdown=markdown,
+ silent=silent,
+ exclude=exclude,
+ url=url,
+ )
def info(
@@ -35,11 +48,20 @@ def info(
markdown: bool = False,
silent: bool = True,
exclude: Optional[List[str]] = None,
+ url: bool = False,
) -> Union[str, dict]:
msg = Printer(no_print=silent, pretty=not silent)
if not exclude:
exclude = []
- if model:
+ if url:
+ if model is not None:
+ title = f"Download info for pipeline '{model}'"
+ data = info_model_url(model)
+ print(data["download_url"])
+ return data
+ else:
+ msg.fail("--url option requires a pipeline name", exits=1)
+ elif model:
title = f"Info about pipeline '{model}'"
data = info_model(model, silent=silent)
else:
@@ -99,11 +121,43 @@ def info_model(model: str, *, silent: bool = True) -> Dict[str, Any]:
meta["source"] = str(model_path.resolve())
else:
meta["source"] = str(model_path)
+ download_url = info_installed_model_url(model)
+ if download_url:
+ meta["download_url"] = download_url
return {
k: v for k, v in meta.items() if k not in ("accuracy", "performance", "speed")
}
+def info_installed_model_url(model: str) -> Optional[str]:
+ """Given a pipeline name, get the download URL if available, otherwise
+ return None.
+
+ This is only available for pipelines installed as modules that have
+ dist-info available.
+ """
+ try:
+ dist = pkg_resources.get_distribution(model)
+ data = json.loads(dist.get_metadata("direct_url.json"))
+ return data["url"]
+ except pkg_resources.DistributionNotFound:
+ # no such package
+ return None
+ except Exception:
+ # something else, like no file or invalid JSON
+ return None
+
+def info_model_url(model: str) -> Dict[str, Any]:
+ """Return the download URL for the latest version of a pipeline."""
+ version = get_latest_version(model)
+
+ filename = get_model_filename(model, version)
+ download_url = about.__download_url__ + "/" + filename
+ release_tpl = "https://github.com/explosion/spacy-models/releases/tag/{m}-{v}"
+ release_url = release_tpl.format(m=model, v=version)
+ return {"download_url": download_url, "release_url": release_url}
+
+
def get_markdown(
data: Dict[str, Any],
title: Optional[str] = None,
diff --git a/spacy/errors.py b/spacy/errors.py
index 608305a064e..5ee1476c2ce 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -230,8 +230,9 @@ class Errors(metaclass=ErrorsWithCodes):
"initialized component.")
E004 = ("Can't set up pipeline component: a factory for '{name}' already "
"exists. Existing factory: {func}. New factory: {new_func}")
- E005 = ("Pipeline component '{name}' returned None. If you're using a "
- "custom component, maybe you forgot to return the processed Doc?")
+ E005 = ("Pipeline component '{name}' returned {returned_type} instead of a "
+ "Doc. If you're using a custom component, maybe you forgot to "
+ "return the processed Doc?")
E006 = ("Invalid constraints for adding pipeline component. You can only "
"set one of the following: before (component name or index), "
"after (component name or index), first (True) or last (True). "
diff --git a/spacy/lang/la/__init__.py b/spacy/lang/la/__init__.py
new file mode 100644
index 00000000000..15b87c5b9ee
--- /dev/null
+++ b/spacy/lang/la/__init__.py
@@ -0,0 +1,18 @@
+from ...language import Language, BaseDefaults
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
+from .stop_words import STOP_WORDS
+from .lex_attrs import LEX_ATTRS
+
+
+class LatinDefaults(BaseDefaults):
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
+ stop_words = STOP_WORDS
+ lex_attr_getters = LEX_ATTRS
+
+
+class Latin(Language):
+ lang = "la"
+ Defaults = LatinDefaults
+
+
+__all__ = ["Latin"]
diff --git a/spacy/lang/la/lex_attrs.py b/spacy/lang/la/lex_attrs.py
new file mode 100644
index 00000000000..9efb4dd3c58
--- /dev/null
+++ b/spacy/lang/la/lex_attrs.py
@@ -0,0 +1,34 @@
+from ...attrs import LIKE_NUM
+import re
+
+# cf. Goyvaerts/Levithan 2009; case-insensitive, allow 4
+roman_numerals_compile = re.compile(
+ r"(?i)^(?=[MDCLXVI])M*(C[MD]|D?C{0,4})(X[CL]|L?X{0,4})(I[XV]|V?I{0,4})$"
+)
+
+_num_words = set(
+ """
+unus una unum duo duae tres tria quattuor quinque sex septem octo novem decem
+""".split()
+)
+
+_ordinal_words = set(
+ """
+primus prima primum secundus secunda secundum tertius tertia tertium
+""".split()
+)
+
+
+def like_num(text):
+ if text.isdigit():
+ return True
+ if roman_numerals_compile.match(text):
+ return True
+ if text.lower() in _num_words:
+ return True
+ if text.lower() in _ordinal_words:
+ return True
+ return False
+
+
+LEX_ATTRS = {LIKE_NUM: like_num}
diff --git a/spacy/lang/la/stop_words.py b/spacy/lang/la/stop_words.py
new file mode 100644
index 00000000000..8b590bb67b3
--- /dev/null
+++ b/spacy/lang/la/stop_words.py
@@ -0,0 +1,37 @@
+# Corrected Perseus list, cf. https://wiki.digitalclassicist.org/Stopwords_for_Greek_and_Latin
+
+STOP_WORDS = set(
+ """
+ab ac ad adhuc aliqui aliquis an ante apud at atque aut autem
+
+cum cur
+
+de deinde dum
+
+ego enim ergo es est et etiam etsi ex
+
+fio
+
+haud hic
+
+iam idem igitur ille in infra inter interim ipse is ita
+
+magis modo mox
+
+nam ne nec necque neque nisi non nos
+
+o ob
+
+per possum post pro
+
+quae quam quare qui quia quicumque quidem quilibet quis quisnam quisquam quisque quisquis quo quoniam
+
+sed si sic sive sub sui sum super suus
+
+tam tamen trans tu tum
+
+ubi uel uero
+
+vel vero
+""".split()
+)
diff --git a/spacy/lang/la/tokenizer_exceptions.py b/spacy/lang/la/tokenizer_exceptions.py
new file mode 100644
index 00000000000..060f6e085af
--- /dev/null
+++ b/spacy/lang/la/tokenizer_exceptions.py
@@ -0,0 +1,76 @@
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import ORTH
+from ...util import update_exc
+
+
+## TODO: Look into systematically handling u/v
+_exc = {
+ "mecum": [{ORTH: "me"}, {ORTH: "cum"}],
+ "tecum": [{ORTH: "te"}, {ORTH: "cum"}],
+ "nobiscum": [{ORTH: "nobis"}, {ORTH: "cum"}],
+ "vobiscum": [{ORTH: "vobis"}, {ORTH: "cum"}],
+ "uobiscum": [{ORTH: "uobis"}, {ORTH: "cum"}],
+}
+
+for orth in [
+ "A.",
+ "Agr.",
+ "Ap.",
+ "C.",
+ "Cn.",
+ "D.",
+ "F.",
+ "K.",
+ "L.",
+ "M'.",
+ "M.",
+ "Mam.",
+ "N.",
+ "Oct.",
+ "Opet.",
+ "P.",
+ "Paul.",
+ "Post.",
+ "Pro.",
+ "Q.",
+ "S.",
+ "Ser.",
+ "Sert.",
+ "Sex.",
+ "St.",
+ "Sta.",
+ "T.",
+ "Ti.",
+ "V.",
+ "Vol.",
+ "Vop.",
+ "U.",
+ "Uol.",
+ "Uop.",
+ "Ian.",
+ "Febr.",
+ "Mart.",
+ "Apr.",
+ "Mai.",
+ "Iun.",
+ "Iul.",
+ "Aug.",
+ "Sept.",
+ "Oct.",
+ "Nov.",
+ "Nou.",
+ "Dec.",
+ "Non.",
+ "Id.",
+ "A.D.",
+ "Coll.",
+ "Cos.",
+ "Ord.",
+ "Pl.",
+ "S.C.",
+ "Suff.",
+ "Trib.",
+]:
+ _exc[orth] = [{ORTH: orth}]
+
+TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
diff --git a/spacy/language.py b/spacy/language.py
index e89ae142b82..34a06e57665 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -1028,8 +1028,8 @@ def __call__(
raise ValueError(Errors.E109.format(name=name)) from e
except Exception as e:
error_handler(name, proc, [doc], e)
- if doc is None:
- raise ValueError(Errors.E005.format(name=name))
+ if not isinstance(doc, Doc):
+ raise ValueError(Errors.E005.format(name=name, returned_type=type(doc)))
return doc
def disable_pipes(self, *names) -> "DisabledPipes":
@@ -1063,7 +1063,7 @@ def select_pipes(
"""
if enable is None and disable is None:
raise ValueError(Errors.E991)
- if disable is not None and isinstance(disable, str):
+ if isinstance(disable, str):
disable = [disable]
if enable is not None:
if isinstance(enable, str):
@@ -1698,9 +1698,9 @@ def from_config(
config: Union[Dict[str, Any], Config] = {},
*,
vocab: Union[Vocab, bool] = True,
- disable: Iterable[str] = SimpleFrozenList(),
- enable: Iterable[str] = SimpleFrozenList(),
- exclude: Iterable[str] = SimpleFrozenList(),
+ disable: Union[str, Iterable[str]] = SimpleFrozenList(),
+ enable: Union[str, Iterable[str]] = SimpleFrozenList(),
+ exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
meta: Dict[str, Any] = SimpleFrozenDict(),
auto_fill: bool = True,
validate: bool = True,
@@ -1711,12 +1711,12 @@ def from_config(
config (Dict[str, Any] / Config): The loaded config.
vocab (Vocab): A Vocab object. If True, a vocab is created.
- disable (Iterable[str]): Names of pipeline components to disable.
+ disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable.
Disabled pipes will be loaded but they won't be run unless you
explicitly enable them by calling nlp.enable_pipe.
- enable (Iterable[str]): Names of pipeline components to enable. All other
+ enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
- exclude (Iterable[str]): Names of pipeline components to exclude.
+ exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude.
Excluded components won't be loaded.
meta (Dict[str, Any]): Meta overrides for nlp.meta.
auto_fill (bool): Automatically fill in missing values in config based
@@ -1727,6 +1727,12 @@ def from_config(
DOCS: https://spacy.io/api/language#from_config
"""
+ if isinstance(disable, str):
+ disable = [disable]
+ if isinstance(enable, str):
+ enable = [enable]
+ if isinstance(exclude, str):
+ exclude = [exclude]
if auto_fill:
config = Config(
cls.default_config, section_order=CONFIG_SECTION_ORDER
@@ -2031,25 +2037,29 @@ def to_disk(
@staticmethod
def _resolve_component_status(
- disable: Iterable[str], enable: Iterable[str], pipe_names: Collection[str]
+ disable: Union[str, Iterable[str]],
+ enable: Union[str, Iterable[str]],
+ pipe_names: Iterable[str],
) -> Tuple[str, ...]:
"""Derives whether (1) `disable` and `enable` values are consistent and (2)
resolves those to a single set of disabled components. Raises an error in
case of inconsistency.
- disable (Iterable[str]): Names of components or serialization fields to disable.
- enable (Iterable[str]): Names of pipeline components to enable.
+ disable (Union[str, Iterable[str]]): Name(s) of component(s) or serialization fields to disable.
+ enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable.
pipe_names (Iterable[str]): Names of all pipeline components.
RETURNS (Tuple[str, ...]): Names of components to exclude from pipeline w.r.t.
specified includes and excludes.
"""
- if disable is not None and isinstance(disable, str):
+ if isinstance(disable, str):
disable = [disable]
to_disable = disable
if enable:
+ if isinstance(enable, str):
+ enable = [enable]
to_disable = [
pipe_name for pipe_name in pipe_names if pipe_name not in enable
]
diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx
index 5105f69edb8..e1dba01a2ca 100644
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@@ -1,5 +1,5 @@
# cython: infer_types=True, cython: profile=True
-from typing import List
+from typing import List, Iterable
from libcpp.vector cimport vector
from libc.stdint cimport int32_t, int8_t
@@ -867,20 +867,27 @@ class _SetPredicate:
def __call__(self, Token token):
if self.is_extension:
- value = get_string_id(token._.get(self.attr))
+ value = token._.get(self.attr)
else:
value = get_token_attr_for_matcher(token.c, self.attr)
- if self.predicate in ("IS_SUBSET", "IS_SUPERSET", "INTERSECTS"):
+ if self.predicate in ("IN", "NOT_IN"):
+ if isinstance(value, (str, int)):
+ value = get_string_id(value)
+ else:
+ return False
+ elif self.predicate in ("IS_SUBSET", "IS_SUPERSET", "INTERSECTS"):
+ # ensure that all values are enclosed in a set
if self.attr == MORPH:
# break up MORPH into individual Feat=Val values
value = set(get_string_id(v) for v in MorphAnalysis.from_id(self.vocab, value))
+ elif isinstance(value, (str, int)):
+ value = set((get_string_id(value),))
+ elif isinstance(value, Iterable) and all(isinstance(v, (str, int)) for v in value):
+ value = set(get_string_id(v) for v in value)
else:
- # treat a single value as a list
- if isinstance(value, (str, int)):
- value = set([get_string_id(value)])
- else:
- value = set(get_string_id(v) for v in value)
+ return False
+
if self.predicate == "IN":
return value in self.value
elif self.predicate == "NOT_IN":
diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py
index e70fcd6dd7b..3c1c1333a01 100644
--- a/spacy/tests/conftest.py
+++ b/spacy/tests/conftest.py
@@ -256,6 +256,11 @@ def ko_tokenizer_tokenizer():
return nlp.tokenizer
+@pytest.fixture(scope="module")
+def la_tokenizer():
+ return get_lang_class("la")().tokenizer
+
+
@pytest.fixture(scope="session")
def lb_tokenizer():
return get_lang_class("lb")().tokenizer
diff --git a/spacy/tests/lang/la/__init__.py b/spacy/tests/lang/la/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/spacy/tests/lang/la/test_exception.py b/spacy/tests/lang/la/test_exception.py
new file mode 100644
index 00000000000..966ae22cfec
--- /dev/null
+++ b/spacy/tests/lang/la/test_exception.py
@@ -0,0 +1,8 @@
+import pytest
+
+
+def test_la_tokenizer_handles_exc_in_text(la_tokenizer):
+ text = "scio te omnia facturum, ut nobiscum quam primum sis"
+ tokens = la_tokenizer(text)
+ assert len(tokens) == 11
+ assert tokens[6].text == "nobis"
diff --git a/spacy/tests/lang/la/test_text.py b/spacy/tests/lang/la/test_text.py
new file mode 100644
index 00000000000..48e7359a438
--- /dev/null
+++ b/spacy/tests/lang/la/test_text.py
@@ -0,0 +1,35 @@
+import pytest
+from spacy.lang.la.lex_attrs import like_num
+
+
+@pytest.mark.parametrize(
+ "text,match",
+ [
+ ("IIII", True),
+ ("VI", True),
+ ("vi", True),
+ ("IV", True),
+ ("iv", True),
+ ("IX", True),
+ ("ix", True),
+ ("MMXXII", True),
+ ("0", True),
+ ("1", True),
+ ("quattuor", True),
+ ("decem", True),
+ ("tertius", True),
+ ("canis", False),
+ ("MMXX11", False),
+ (",", False),
+ ],
+)
+def test_lex_attrs_like_number(la_tokenizer, text, match):
+ tokens = la_tokenizer(text)
+ assert len(tokens) == 1
+ assert tokens[0].like_num == match
+
+
+@pytest.mark.parametrize("word", ["quinque"])
+def test_la_lex_attrs_capitals(word):
+ assert like_num(word)
+ assert like_num(word.upper())
diff --git a/spacy/tests/matcher/test_matcher_api.py b/spacy/tests/matcher/test_matcher_api.py
index 7c16da9f84e..ac905eeb452 100644
--- a/spacy/tests/matcher/test_matcher_api.py
+++ b/spacy/tests/matcher/test_matcher_api.py
@@ -368,6 +368,16 @@ def test_matcher_intersect_value_operator(en_vocab):
doc[0]._.ext = ["A", "B"]
assert len(matcher(doc)) == 1
+ # INTERSECTS matches nothing for iterables that aren't all str or int
+ matcher = Matcher(en_vocab)
+ pattern = [{"_": {"ext": {"INTERSECTS": ["Abx", "C"]}}}]
+ matcher.add("M", [pattern])
+ doc = Doc(en_vocab, words=["a", "b", "c"])
+ doc[0]._.ext = [["Abx"], "B"]
+ assert len(matcher(doc)) == 0
+ doc[0]._.ext = ["Abx", "B"]
+ assert len(matcher(doc)) == 1
+
# INTERSECTS with an empty pattern list matches nothing
matcher = Matcher(en_vocab)
pattern = [{"_": {"ext": {"INTERSECTS": []}}}]
@@ -476,14 +486,22 @@ def test_matcher_extension_set_membership(en_vocab):
assert len(matches) == 0
-@pytest.mark.xfail(reason="IN predicate must handle sequence values in extensions")
def test_matcher_extension_in_set_predicate(en_vocab):
matcher = Matcher(en_vocab)
Token.set_extension("ext", default=[])
pattern = [{"_": {"ext": {"IN": ["A", "C"]}}}]
matcher.add("M", [pattern])
doc = Doc(en_vocab, words=["a", "b", "c"])
+
+ # The IN predicate expects an exact match between the
+ # extension value and one of the pattern's values.
doc[0]._.ext = ["A", "B"]
+ assert len(matcher(doc)) == 0
+
+ doc[0]._.ext = ["A"]
+ assert len(matcher(doc)) == 0
+
+ doc[0]._.ext = "A"
assert len(matcher(doc)) == 1
diff --git a/spacy/tests/package/test_requirements.py b/spacy/tests/package/test_requirements.py
index e20227455f5..b403f274f17 100644
--- a/spacy/tests/package/test_requirements.py
+++ b/spacy/tests/package/test_requirements.py
@@ -17,6 +17,7 @@ def test_build_dependencies():
"types-dataclasses",
"types-mock",
"types-requests",
+ "types-setuptools",
]
# ignore language-specific packages that shouldn't be installed by all
libs_ignore_setup = [
diff --git a/spacy/tests/pipeline/test_pipe_methods.py b/spacy/tests/pipeline/test_pipe_methods.py
index 6f00a1cd97a..b946061f608 100644
--- a/spacy/tests/pipeline/test_pipe_methods.py
+++ b/spacy/tests/pipeline/test_pipe_methods.py
@@ -618,6 +618,7 @@ def test_load_disable_enable() -> None:
base_nlp.to_disk(tmp_dir)
to_disable = ["parser", "tagger"]
to_enable = ["tagger", "parser"]
+ single_str = "tagger"
# Setting only `disable`.
nlp = spacy.load(tmp_dir, disable=to_disable)
@@ -632,6 +633,16 @@ def test_load_disable_enable() -> None:
]
)
+ # Loading with a string representing one component
+ nlp = spacy.load(tmp_dir, exclude=single_str)
+ assert single_str not in nlp.component_names
+
+ nlp = spacy.load(tmp_dir, disable=single_str)
+ assert single_str in nlp.component_names
+ assert single_str not in nlp.pipe_names
+ assert nlp._disabled == {single_str}
+ assert nlp.disabled == [single_str]
+
# Testing consistent enable/disable combination.
nlp = spacy.load(
tmp_dir,
diff --git a/spacy/tests/test_language.py b/spacy/tests/test_language.py
index 6f3ba8acc93..03a98d32fbf 100644
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@@ -670,3 +670,25 @@ def test_dot_in_factory_names(nlp):
with pytest.raises(ValueError, match="not permitted"):
Language.factory("my.evil.component.v1", func=evil_component)
+
+
+def test_component_return():
+ """Test that an error is raised if components return a type other than a
+ doc."""
+ nlp = English()
+
+ @Language.component("test_component_good_pipe")
+ def good_pipe(doc):
+ return doc
+
+ nlp.add_pipe("test_component_good_pipe")
+ nlp("text")
+ nlp.remove_pipe("test_component_good_pipe")
+
+ @Language.component("test_component_bad_pipe")
+ def bad_pipe(doc):
+ return doc.text
+
+ nlp.add_pipe("test_component_bad_pipe")
+ with pytest.raises(ValueError, match="instead of a Doc"):
+ nlp("text")
diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py
index d8743d32275..1c9b045ac08 100644
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@@ -10,7 +10,8 @@
from spacy.util import dot_to_object, SimpleFrozenList, import_file
from spacy.util import to_ternary_int
from thinc.api import Config, Optimizer, ConfigValidationError
-from thinc.api import set_current_ops
+from thinc.api import get_current_ops, set_current_ops, NumpyOps, CupyOps, MPSOps
+from thinc.compat import has_cupy_gpu, has_torch_mps_gpu
from spacy.training.batchers import minibatch_by_words
from spacy.lang.en import English
from spacy.lang.nl import Dutch
@@ -18,7 +19,6 @@
from spacy.schemas import ConfigSchemaTraining, TokenPattern, TokenPatternSchema
from pydantic import ValidationError
-from thinc.api import get_current_ops, NumpyOps, CupyOps
from .util import get_random_doc, make_tempdir
@@ -111,26 +111,25 @@ def test_PrecomputableAffine(nO=4, nI=5, nF=3, nP=2):
def test_prefer_gpu():
current_ops = get_current_ops()
- try:
- import cupy # noqa: F401
-
- prefer_gpu()
+ if has_cupy_gpu:
+ assert prefer_gpu()
assert isinstance(get_current_ops(), CupyOps)
- except ImportError:
+ elif has_torch_mps_gpu:
+ assert prefer_gpu()
+ assert isinstance(get_current_ops(), MPSOps)
+ else:
assert not prefer_gpu()
set_current_ops(current_ops)
def test_require_gpu():
current_ops = get_current_ops()
- try:
- import cupy # noqa: F401
-
+ if has_cupy_gpu:
require_gpu()
assert isinstance(get_current_ops(), CupyOps)
- except ImportError:
- with pytest.raises(ValueError):
- require_gpu()
+ elif has_torch_mps_gpu:
+ require_gpu()
+ assert isinstance(get_current_ops(), MPSOps)
set_current_ops(current_ops)
diff --git a/spacy/tests/training/test_logger.py b/spacy/tests/training/test_logger.py
new file mode 100644
index 00000000000..0dfd0cbf416
--- /dev/null
+++ b/spacy/tests/training/test_logger.py
@@ -0,0 +1,30 @@
+import pytest
+import spacy
+
+from spacy.training import loggers
+
+
+@pytest.fixture()
+def nlp():
+ nlp = spacy.blank("en")
+ nlp.add_pipe("ner")
+ return nlp
+
+
+@pytest.fixture()
+def info():
+ return {
+ "losses": {"ner": 100},
+ "other_scores": {"ENTS_F": 0.85, "ENTS_P": 0.90, "ENTS_R": 0.80},
+ "epoch": 100,
+ "step": 125,
+ "score": 85,
+ }
+
+
+def test_console_logger(nlp, info):
+ console_logger = loggers.console_logger(
+ progress_bar=True, console_output=True, output_file=None
+ )
+ log_step, finalize = console_logger(nlp)
+ log_step(info)
diff --git a/spacy/training/loggers.py b/spacy/training/loggers.py
index edd0f1959cb..408ea71405f 100644
--- a/spacy/training/loggers.py
+++ b/spacy/training/loggers.py
@@ -1,10 +1,13 @@
-from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO
+from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO, Union
from wasabi import Printer
+from pathlib import Path
import tqdm
import sys
+import srsly
from ..util import registry
from ..errors import Errors
+from .. import util
if TYPE_CHECKING:
from ..language import Language # noqa: F401
@@ -23,13 +26,44 @@ def setup_table(
return final_cols, final_widths, ["r" for _ in final_widths]
-@registry.loggers("spacy.ConsoleLogger.v1")
-def console_logger(progress_bar: bool = False):
+@registry.loggers("spacy.ConsoleLogger.v2")
+def console_logger(
+ progress_bar: bool = False,
+ console_output: bool = True,
+ output_file: Optional[Union[str, Path]] = None,
+):
+ """The ConsoleLogger.v2 prints out training logs in the console and/or saves them to a jsonl file.
+ progress_bar (bool): Whether the logger should print the progress bar.
+ console_output (bool): Whether the logger should print the logs on the console.
+ output_file (Optional[Union[str, Path]]): The file to save the training logs to.
+ """
+ _log_exist = False
+ if output_file:
+ output_file = util.ensure_path(output_file) # type: ignore
+ if output_file.exists(): # type: ignore
+ _log_exist = True
+ if not output_file.parents[0].exists(): # type: ignore
+ output_file.parents[0].mkdir(parents=True) # type: ignore
+
def setup_printer(
nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr
) -> Tuple[Callable[[Optional[Dict[str, Any]]], None], Callable[[], None]]:
write = lambda text: print(text, file=stdout, flush=True)
msg = Printer(no_print=True)
+
+ nonlocal output_file
+ output_stream = None
+ if _log_exist:
+ write(
+ msg.warn(
+ f"Saving logs is disabled because {output_file} already exists."
+ )
+ )
+ output_file = None
+ elif output_file:
+ write(msg.info(f"Saving results to {output_file}"))
+ output_stream = open(output_file, "w", encoding="utf-8")
+
# ensure that only trainable components are logged
logged_pipes = [
name
@@ -40,13 +74,15 @@ def setup_printer(
score_weights = nlp.config["training"]["score_weights"]
score_cols = [col for col, value in score_weights.items() if value is not None]
loss_cols = [f"Loss {pipe}" for pipe in logged_pipes]
- spacing = 2
- table_header, table_widths, table_aligns = setup_table(
- cols=["E", "#"] + loss_cols + score_cols + ["Score"],
- widths=[3, 6] + [8 for _ in loss_cols] + [6 for _ in score_cols] + [6],
- )
- write(msg.row(table_header, widths=table_widths, spacing=spacing))
- write(msg.row(["-" * width for width in table_widths], spacing=spacing))
+
+ if console_output:
+ spacing = 2
+ table_header, table_widths, table_aligns = setup_table(
+ cols=["E", "#"] + loss_cols + score_cols + ["Score"],
+ widths=[3, 6] + [8 for _ in loss_cols] + [6 for _ in score_cols] + [6],
+ )
+ write(msg.row(table_header, widths=table_widths, spacing=spacing))
+ write(msg.row(["-" * width for width in table_widths], spacing=spacing))
progress = None
def log_step(info: Optional[Dict[str, Any]]) -> None:
@@ -57,12 +93,15 @@ def log_step(info: Optional[Dict[str, Any]]) -> None:
if progress is not None:
progress.update(1)
return
- losses = [
- "{0:.2f}".format(float(info["losses"][pipe_name]))
- for pipe_name in logged_pipes
- ]
+
+ losses = []
+ log_losses = {}
+ for pipe_name in logged_pipes:
+ losses.append("{0:.2f}".format(float(info["losses"][pipe_name])))
+ log_losses[pipe_name] = float(info["losses"][pipe_name])
scores = []
+ log_scores = {}
for col in score_cols:
score = info["other_scores"].get(col, 0.0)
try:
@@ -73,6 +112,7 @@ def log_step(info: Optional[Dict[str, Any]]) -> None:
if col != "speed":
score *= 100
scores.append("{0:.2f}".format(score))
+ log_scores[str(col)] = score
data = (
[info["epoch"], info["step"]]
@@ -80,20 +120,36 @@ def log_step(info: Optional[Dict[str, Any]]) -> None:
+ scores
+ ["{0:.2f}".format(float(info["score"]))]
)
+
+ if output_stream:
+ # Write to log file per log_step
+ log_data = {
+ "epoch": info["epoch"],
+ "step": info["step"],
+ "losses": log_losses,
+ "scores": log_scores,
+ "score": float(info["score"]),
+ }
+ output_stream.write(srsly.json_dumps(log_data) + "\n")
+
if progress is not None:
progress.close()
- write(
- msg.row(data, widths=table_widths, aligns=table_aligns, spacing=spacing)
- )
- if progress_bar:
- # Set disable=None, so that it disables on non-TTY
- progress = tqdm.tqdm(
- total=eval_frequency, disable=None, leave=False, file=stderr
+ if console_output:
+ write(
+ msg.row(
+ data, widths=table_widths, aligns=table_aligns, spacing=spacing
+ )
)
- progress.set_description(f"Epoch {info['epoch']+1}")
+ if progress_bar:
+ # Set disable=None, so that it disables on non-TTY
+ progress = tqdm.tqdm(
+ total=eval_frequency, disable=None, leave=False, file=stderr
+ )
+ progress.set_description(f"Epoch {info['epoch']+1}")
def finalize() -> None:
- pass
+ if output_stream:
+ output_stream.close()
return log_step, finalize
diff --git a/spacy/util.py b/spacy/util.py
index d170fc15b0c..4e1a62d053b 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -398,9 +398,9 @@ def load_model(
name: Union[str, Path],
*,
vocab: Union["Vocab", bool] = True,
- disable: Iterable[str] = SimpleFrozenList(),
- enable: Iterable[str] = SimpleFrozenList(),
- exclude: Iterable[str] = SimpleFrozenList(),
+ disable: Union[str, Iterable[str]] = SimpleFrozenList(),
+ enable: Union[str, Iterable[str]] = SimpleFrozenList(),
+ exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
) -> "Language":
"""Load a model from a package or data path.
@@ -408,9 +408,9 @@ def load_model(
name (str): Package name or model path.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
- disable (Iterable[str]): Names of pipeline components to disable.
- enable (Iterable[str]): Names of pipeline components to enable. All others will be disabled.
- exclude (Iterable[str]): Names of pipeline components to exclude.
+ disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable.
+ enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All others will be disabled.
+ exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
RETURNS (Language): The loaded nlp object.
@@ -440,9 +440,9 @@ def load_model_from_package(
name: str,
*,
vocab: Union["Vocab", bool] = True,
- disable: Iterable[str] = SimpleFrozenList(),
- enable: Iterable[str] = SimpleFrozenList(),
- exclude: Iterable[str] = SimpleFrozenList(),
+ disable: Union[str, Iterable[str]] = SimpleFrozenList(),
+ enable: Union[str, Iterable[str]] = SimpleFrozenList(),
+ exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
) -> "Language":
"""Load a model from an installed package.
@@ -450,12 +450,12 @@ def load_model_from_package(
name (str): The package name.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
- disable (Iterable[str]): Names of pipeline components to disable. Disabled
+ disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
pipes will be loaded but they won't be run unless you explicitly
enable them by calling nlp.enable_pipe.
- enable (Iterable[str]): Names of pipeline components to enable. All other
+ enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
- exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
+ exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
components won't be loaded.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
@@ -470,9 +470,9 @@ def load_model_from_path(
*,
meta: Optional[Dict[str, Any]] = None,
vocab: Union["Vocab", bool] = True,
- disable: Iterable[str] = SimpleFrozenList(),
- enable: Iterable[str] = SimpleFrozenList(),
- exclude: Iterable[str] = SimpleFrozenList(),
+ disable: Union[str, Iterable[str]] = SimpleFrozenList(),
+ enable: Union[str, Iterable[str]] = SimpleFrozenList(),
+ exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
) -> "Language":
"""Load a model from a data directory path. Creates Language class with
@@ -482,12 +482,12 @@ def load_model_from_path(
meta (Dict[str, Any]): Optional model meta.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
- disable (Iterable[str]): Names of pipeline components to disable. Disabled
+ disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
pipes will be loaded but they won't be run unless you explicitly
enable them by calling nlp.enable_pipe.
- enable (Iterable[str]): Names of pipeline components to enable. All other
+ enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
- exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
+ exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
components won't be loaded.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
@@ -516,9 +516,9 @@ def load_model_from_config(
*,
meta: Dict[str, Any] = SimpleFrozenDict(),
vocab: Union["Vocab", bool] = True,
- disable: Iterable[str] = SimpleFrozenList(),
- enable: Iterable[str] = SimpleFrozenList(),
- exclude: Iterable[str] = SimpleFrozenList(),
+ disable: Union[str, Iterable[str]] = SimpleFrozenList(),
+ enable: Union[str, Iterable[str]] = SimpleFrozenList(),
+ exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
auto_fill: bool = False,
validate: bool = True,
) -> "Language":
@@ -529,12 +529,12 @@ def load_model_from_config(
meta (Dict[str, Any]): Optional model meta.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
- disable (Iterable[str]): Names of pipeline components to disable. Disabled
+ disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
pipes will be loaded but they won't be run unless you explicitly
enable them by calling nlp.enable_pipe.
- enable (Iterable[str]): Names of pipeline components to enable. All other
+ enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
- exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
+ exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
components won't be loaded.
auto_fill (bool): Whether to auto-fill config with missing defaults.
validate (bool): Whether to show config validation errors.
@@ -616,9 +616,9 @@ def load_model_from_init_py(
init_file: Union[Path, str],
*,
vocab: Union["Vocab", bool] = True,
- disable: Iterable[str] = SimpleFrozenList(),
- enable: Iterable[str] = SimpleFrozenList(),
- exclude: Iterable[str] = SimpleFrozenList(),
+ disable: Union[str, Iterable[str]] = SimpleFrozenList(),
+ enable: Union[str, Iterable[str]] = SimpleFrozenList(),
+ exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
) -> "Language":
"""Helper function to use in the `load()` method of a model package's
@@ -626,12 +626,12 @@ def load_model_from_init_py(
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
- disable (Iterable[str]): Names of pipeline components to disable. Disabled
+ disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
pipes will be loaded but they won't be run unless you explicitly
enable them by calling nlp.enable_pipe.
- enable (Iterable[str]): Names of pipeline components to enable. All other
+ enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
- exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
+ exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
components won't be loaded.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
diff --git a/website/docs/api/cli.md b/website/docs/api/cli.md
index cbd1f794a33..e5cd3089b35 100644
--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@@ -77,14 +77,15 @@ $ python -m spacy info [--markdown] [--silent] [--exclude]
$ python -m spacy info [model] [--markdown] [--silent] [--exclude]
```
-| Name | Description |
-| ------------------------------------------------ | --------------------------------------------------------------------------------------------- |
-| `model` | A trained pipeline, i.e. package name or path (optional). ~~Optional[str] \(option)~~ |
-| `--markdown`, `-md` | Print information as Markdown. ~~bool (flag)~~ |
-| `--silent`, `-s` 2.0.12 | Don't print anything, just return the values. ~~bool (flag)~~ |
-| `--exclude`, `-e` | Comma-separated keys to exclude from the print-out. Defaults to `"labels"`. ~~Optional[str]~~ |
-| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
-| **PRINTS** | Information about your spaCy installation. |
+| Name | Description |
+| ------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------- |
+| `model` | A trained pipeline, i.e. package name or path (optional). ~~Optional[str] \(option)~~ |
+| `--markdown`, `-md` | Print information as Markdown. ~~bool (flag)~~ |
+| `--silent`, `-s` 2.0.12 | Don't print anything, just return the values. ~~bool (flag)~~ |
+| `--exclude`, `-e` | Comma-separated keys to exclude from the print-out. Defaults to `"labels"`. ~~Optional[str]~~ |
+| `--url`, `-u` 3.5.0 | Print the URL to download the most recent compatible version of the pipeline. Requires a pipeline name. ~~bool (flag)~~ |
+| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
+| **PRINTS** | Information about your spaCy installation. |
## validate {#validate new="2" tag="command"}
diff --git a/website/docs/api/language.md b/website/docs/api/language.md
index 9a413efaf80..ed763e36a28 100644
--- a/website/docs/api/language.md
+++ b/website/docs/api/language.md
@@ -63,17 +63,18 @@ spaCy loads a model under the hood based on its
> nlp = Language.from_config(config)
> ```
-| Name | Description |
-| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `config` | The loaded config. ~~Union[Dict[str, Any], Config]~~ |
-| _keyword-only_ | |
-| `vocab` | A `Vocab` object. If `True`, a vocab is created using the default language data settings. ~~Vocab~~ |
-| `disable` | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~List[str]~~ |
-| `exclude` | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~ |
-| `meta` | [Meta data](/api/data-formats#meta) overrides. ~~Dict[str, Any]~~ |
-| `auto_fill` | Whether to automatically fill in missing values in the config, based on defaults and function argument annotations. Defaults to `True`. ~~bool~~ |
-| `validate` | Whether to validate the component config and arguments against the types expected by the factory. Defaults to `True`. ~~bool~~ |
-| **RETURNS** | The initialized object. ~~Language~~ |
+| Name | Description |
+| ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `config` | The loaded config. ~~Union[Dict[str, Any], Config]~~ |
+| _keyword-only_ | |
+| `vocab` | A `Vocab` object. If `True`, a vocab is created using the default language data settings. ~~Vocab~~ |
+| `disable` | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
+| `enable` 3.4 | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled, but can be enabled again using [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
+| `exclude` | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~ |
+| `meta` | [Meta data](/api/data-formats#meta) overrides. ~~Dict[str, Any]~~ |
+| `auto_fill` | Whether to automatically fill in missing values in the config, based on defaults and function argument annotations. Defaults to `True`. ~~bool~~ |
+| `validate` | Whether to validate the component config and arguments against the types expected by the factory. Defaults to `True`. ~~bool~~ |
+| **RETURNS** | The initialized object. ~~Language~~ |
## Language.component {#component tag="classmethod" new="3"}
@@ -695,8 +696,8 @@ As of spaCy v3.0, the `disable_pipes` method has been renamed to `select_pipes`:
| Name | Description |
| -------------- | ------------------------------------------------------------------------------------------------------ |
| _keyword-only_ | |
-| `disable` | Name(s) of pipeline components to disable. ~~Optional[Union[str, Iterable[str]]]~~ |
-| `enable` | Name(s) of pipeline components that will not be disabled. ~~Optional[Union[str, Iterable[str]]]~~ |
+| `disable` | Name(s) of pipeline component(s) to disable. ~~Optional[Union[str, Iterable[str]]]~~ |
+| `enable` | Name(s) of pipeline component(s) that will not be disabled. ~~Optional[Union[str, Iterable[str]]]~~ |
| **RETURNS** | The disabled pipes that can be restored by calling the object's `.restore()` method. ~~DisabledPipes~~ |
## Language.get_factory_meta {#get_factory_meta tag="classmethod" new="3"}
diff --git a/website/docs/api/legacy.md b/website/docs/api/legacy.md
index 31d178b6779..d9167c76f6a 100644
--- a/website/docs/api/legacy.md
+++ b/website/docs/api/legacy.md
@@ -248,6 +248,59 @@ added to an existing vectors table. See more details in
## Loggers {#loggers}
+These functions are available from `@spacy.registry.loggers`.
+
+### spacy.ConsoleLogger.v1 {#ConsoleLogger_v1}
+
+> #### Example config
+>
+> ```ini
+> [training.logger]
+> @loggers = "spacy.ConsoleLogger.v1"
+> progress_bar = true
+> ```
+
+Writes the results of a training step to the console in a tabular format.
+
+
+
+```cli
+$ python -m spacy train config.cfg
+```
+
+```
+ℹ Using CPU
+ℹ Loading config and nlp from: config.cfg
+ℹ Pipeline: ['tok2vec', 'tagger']
+ℹ Start training
+ℹ Training. Initial learn rate: 0.0
+
+E # LOSS TOK2VEC LOSS TAGGER TAG_ACC SCORE
+--- ------ ------------ ----------- ------- ------
+ 0 0 0.00 86.20 0.22 0.00
+ 0 200 3.08 18968.78 34.00 0.34
+ 0 400 31.81 22539.06 33.64 0.34
+ 0 600 92.13 22794.91 43.80 0.44
+ 0 800 183.62 21541.39 56.05 0.56
+ 0 1000 352.49 25461.82 65.15 0.65
+ 0 1200 422.87 23708.82 71.84 0.72
+ 0 1400 601.92 24994.79 76.57 0.77
+ 0 1600 662.57 22268.02 80.20 0.80
+ 0 1800 1101.50 28413.77 82.56 0.83
+ 0 2000 1253.43 28736.36 85.00 0.85
+ 0 2200 1411.02 28237.53 87.42 0.87
+ 0 2400 1605.35 28439.95 88.70 0.89
+```
+
+Note that the cumulative loss keeps increasing within one epoch, but should
+start decreasing across epochs.
+
+
+
+| Name | Description |
+| -------------- | --------------------------------------------------------- |
+| `progress_bar` | Whether the logger should print the progress bar ~~bool~~ |
+
Logging utilities for spaCy are implemented in the
[`spacy-loggers`](https://github.com/explosion/spacy-loggers) repo, and the
functions are typically available from `@spacy.registry.loggers`.
diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md
index 1e19254420a..220b2d6e92c 100644
--- a/website/docs/api/top-level.md
+++ b/website/docs/api/top-level.md
@@ -45,16 +45,16 @@ specified separately using the new `exclude` keyword argument.
> nlp = spacy.load("en_core_web_sm", exclude=["parser", "tagger"])
> ```
-| Name | Description |
-| ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `name` | Pipeline to load, i.e. package name or path. ~~Union[str, Path]~~ |
-| _keyword-only_ | |
-| `vocab` | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~ |
-| `disable` | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~List[str]~~ |
-| `enable` | Names of pipeline components to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled. ~~List[str]~~ |
-| `exclude` 3 | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~ |
-| `config` 3 | Optional config overrides, either as nested dict or dict keyed by section value in dot notation, e.g. `"components.name.value"`. ~~Union[Dict[str, Any], Config]~~ |
-| **RETURNS** | A `Language` object with the loaded pipeline. ~~Language~~ |
+| Name | Description |
+| ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `name` | Pipeline to load, i.e. package name or path. ~~Union[str, Path]~~ |
+| _keyword-only_ | |
+| `vocab` | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~ |
+| `disable` | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
+| `enable` 3.4 | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled. ~~Union[str, Iterable[str]]~~ |
+| `exclude` 3 | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~ |
+| `config` 3 | Optional config overrides, either as nested dict or dict keyed by section value in dot notation, e.g. `"components.name.value"`. ~~Union[Dict[str, Any], Config]~~ |
+| **RETURNS** | A `Language` object with the loaded pipeline. ~~Language~~ |
Essentially, `spacy.load()` is a convenience wrapper that reads the pipeline's
[`config.cfg`](/api/data-formats#config), uses the language and pipeline
@@ -275,8 +275,8 @@ Render a dependency parse tree or named entity visualization.
### displacy.parse_deps {#displacy.parse_deps tag="method" new="2"}
-Generate dependency parse in `{'words': [], 'arcs': []}` format.
-For use with the `manual=True` argument in `displacy.render`.
+Generate dependency parse in `{'words': [], 'arcs': []}` format. For use with
+the `manual=True` argument in `displacy.render`.
> #### Example
>
@@ -297,8 +297,8 @@ For use with the `manual=True` argument in `displacy.render`.
### displacy.parse_ents {#displacy.parse_ents tag="method" new="2"}
-Generate named entities in `[{start: i, end: i, label: 'label'}]` format.
-For use with the `manual=True` argument in `displacy.render`.
+Generate named entities in `[{start: i, end: i, label: 'label'}]` format. For
+use with the `manual=True` argument in `displacy.render`.
> #### Example
>
@@ -319,8 +319,8 @@ For use with the `manual=True` argument in `displacy.render`.
### displacy.parse_spans {#displacy.parse_spans tag="method" new="2"}
-Generate spans in `[{start_token: i, end_token: i, label: 'label'}]` format.
-For use with the `manual=True` argument in `displacy.render`.
+Generate spans in `[{start_token: i, end_token: i, label: 'label'}]` format. For
+use with the `manual=True` argument in `displacy.render`.
> #### Example
>
@@ -451,7 +451,7 @@ factories.
| Registry name | Description |
| ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `architectures` | Registry for functions that create [model architectures](/api/architectures). Can be used to register custom model architectures and reference them in the `config.cfg`. |
-| `augmenters` | Registry for functions that create [data augmentation](#augmenters) callbacks for corpora and other training data iterators. |
+| `augmenters` | Registry for functions that create [data augmentation](#augmenters) callbacks for corpora and other training data iterators. |
| `batchers` | Registry for training and evaluation [data batchers](#batchers). |
| `callbacks` | Registry for custom callbacks to [modify the `nlp` object](/usage/training#custom-code-nlp-callbacks) before training. |
| `displacy_colors` | Registry for custom color scheme for the [`displacy` NER visualizer](/usage/visualizers). Automatically reads from [entry points](/usage/saving-loading#entry-points). |
@@ -505,7 +505,7 @@ finished. To log each training step, a
and the accuracy scores on the development set.
The built-in, default logger is the ConsoleLogger, which prints results to the
-console in tabular format. The
+console in tabular format and saves them to a `jsonl` file. The
[spacy-loggers](https://github.com/explosion/spacy-loggers) package, included as
a dependency of spaCy, enables other loggers, such as one that sends results to
a [Weights & Biases](https://www.wandb.com/) dashboard.
@@ -513,16 +513,20 @@ a [Weights & Biases](https://www.wandb.com/) dashboard.
Instead of using one of the built-in loggers, you can
[implement your own](/usage/training#custom-logging).
-#### spacy.ConsoleLogger.v1 {#ConsoleLogger tag="registered function"}
+#### spacy.ConsoleLogger.v2 {#ConsoleLogger tag="registered function"}
> #### Example config
>
> ```ini
> [training.logger]
-> @loggers = "spacy.ConsoleLogger.v1"
+> @loggers = "spacy.ConsoleLogger.v2"
+> progress_bar = true
+> console_output = true
+> output_file = "training_log.jsonl"
> ```
-Writes the results of a training step to the console in a tabular format.
+Writes the results of a training step to the console in a tabular format and
+saves them to a `jsonl` file.
@@ -536,22 +540,23 @@ $ python -m spacy train config.cfg
ℹ Pipeline: ['tok2vec', 'tagger']
ℹ Start training
ℹ Training. Initial learn rate: 0.0
+ℹ Saving results to training_log.jsonl
E # LOSS TOK2VEC LOSS TAGGER TAG_ACC SCORE
--- ------ ------------ ----------- ------- ------
- 1 0 0.00 86.20 0.22 0.00
- 1 200 3.08 18968.78 34.00 0.34
- 1 400 31.81 22539.06 33.64 0.34
- 1 600 92.13 22794.91 43.80 0.44
- 1 800 183.62 21541.39 56.05 0.56
- 1 1000 352.49 25461.82 65.15 0.65
- 1 1200 422.87 23708.82 71.84 0.72
- 1 1400 601.92 24994.79 76.57 0.77
- 1 1600 662.57 22268.02 80.20 0.80
- 1 1800 1101.50 28413.77 82.56 0.83
- 1 2000 1253.43 28736.36 85.00 0.85
- 1 2200 1411.02 28237.53 87.42 0.87
- 1 2400 1605.35 28439.95 88.70 0.89
+ 0 0 0.00 86.20 0.22 0.00
+ 0 200 3.08 18968.78 34.00 0.34
+ 0 400 31.81 22539.06 33.64 0.34
+ 0 600 92.13 22794.91 43.80 0.44
+ 0 800 183.62 21541.39 56.05 0.56
+ 0 1000 352.49 25461.82 65.15 0.65
+ 0 1200 422.87 23708.82 71.84 0.72
+ 0 1400 601.92 24994.79 76.57 0.77
+ 0 1600 662.57 22268.02 80.20 0.80
+ 0 1800 1101.50 28413.77 82.56 0.83
+ 0 2000 1253.43 28736.36 85.00 0.85
+ 0 2200 1411.02 28237.53 87.42 0.87
+ 0 2400 1605.35 28439.95 88.70 0.89
```
Note that the cumulative loss keeps increasing within one epoch, but should
@@ -559,6 +564,12 @@ start decreasing across epochs.
+| Name | Description |
+| ---------------- | --------------------------------------------------------------------- |
+| `progress_bar` | Whether the logger should print the progress bar ~~bool~~ |
+| `console_output` | Whether the logger should print the logs on the console. ~~bool~~ |
+| `output_file` | The file to save the training logs to. ~~Optional[Union[str, Path]]~~ |
+
## Readers {#readers}
### File readers {#file-readers source="github.com/explosion/srsly" new="3"}
@@ -1038,15 +1049,16 @@ and create a `Language` object. The model data will then be loaded in via
> nlp = util.load_model("/path/to/data")
> ```
-| Name | Description |
-| ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `name` | Package name or path. ~~str~~ |
-| _keyword-only_ | |
-| `vocab` | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~ |
-| `disable` | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~List[str]~~ |
-| `exclude` 3 | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~ |
-| `config` 3 | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~ |
-| **RETURNS** | `Language` class with the loaded pipeline. ~~Language~~ |
+| Name | Description |
+| ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `name` | Package name or path. ~~str~~ |
+| _keyword-only_ | |
+| `vocab` | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~ |
+| `disable` | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
+| `enable` 3.4 | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled, but can be enabled again using [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
+| `exclude` | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~ |
+| `config` 3 | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~ |
+| **RETURNS** | `Language` class with the loaded pipeline. ~~Language~~ |
### util.load_model_from_init_py {#util.load_model_from_init_py tag="function" new="2"}
@@ -1062,15 +1074,16 @@ A helper function to use in the `load()` method of a pipeline package's
> return load_model_from_init_py(__file__, **overrides)
> ```
-| Name | Description |
-| ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `init_file` | Path to package's `__init__.py`, i.e. `__file__`. ~~Union[str, Path]~~ |
-| _keyword-only_ | |
-| `vocab` 3 | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~ |
-| `disable` | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~List[str]~~ |
-| `exclude` 3 | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~ |
-| `config` 3 | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~ |
-| **RETURNS** | `Language` class with the loaded pipeline. ~~Language~~ |
+| Name | Description |
+| ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `init_file` | Path to package's `__init__.py`, i.e. `__file__`. ~~Union[str, Path]~~ |
+| _keyword-only_ | |
+| `vocab` 3 | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~ |
+| `disable` | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
+| `enable` 3.4 | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled, but can be enabled again using [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
+| `exclude` 3 | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~ |
+| `config` 3 | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~ |
+| **RETURNS** | `Language` class with the loaded pipeline. ~~Language~~ |
### util.load_config {#util.load_config tag="function" new="3"}
diff --git a/website/docs/usage/models.md b/website/docs/usage/models.md
index 56992e7e3b6..6971ac8b49f 100644
--- a/website/docs/usage/models.md
+++ b/website/docs/usage/models.md
@@ -365,15 +365,32 @@ pipeline package can be found.
To download a trained pipeline directly using
[pip](https://pypi.python.org/pypi/pip), point `pip install` to the URL or local
path of the wheel file or archive. Installing the wheel is usually more
-efficient. To find the direct link to a package, head over to the
-[releases](https://github.com/explosion/spacy-models/releases), right click on
-the archive link and copy it to your clipboard.
+efficient.
+
+> #### Pipeline Package URLs {#pipeline-urls}
+>
+> Pretrained pipeline distributions are hosted on
+> [Github Releases](https://github.com/explosion/spacy-models/releases), and you
+> can find download links there, as well as on the model page. You can also get
+> URLs directly from the command line by using `spacy info` with the `--url`
+> flag, which may be useful for automation.
+>
+> ```bash
+> spacy info en_core_web_sm --url
+> ```
+>
+> This command will print the URL for the latest version of a pipeline
+> compatible with the version of spaCy you're using. Note that in order to look
+> up the compatibility information an internet connection is required.
```bash
# With external URL
$ pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0-py3-none-any.whl
$ pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz
+# Using spacy info to get the external URL
+$ pip install $(spacy info en_core_web_sm --url)
+
# With local file
$ pip install /Users/you/en_core_web_sm-3.0.0-py3-none-any.whl
$ pip install /Users/you/en_core_web_sm-3.0.0.tar.gz
@@ -514,21 +531,16 @@ should be specifying them directly.
Because pipeline packages are valid Python packages, you can add them to your
application's `requirements.txt`. If you're running your own internal PyPi
installation, you can upload the pipeline packages there. pip's
-[requirements file format](https://pip.pypa.io/en/latest/reference/pip_install/#requirements-file-format)
-supports both package names to download via a PyPi server, as well as direct
-URLs.
+[requirements file format](https://pip.pypa.io/en/latest/reference/requirements-file-format/)
+supports both package names to download via a PyPi server, as well as
+[direct URLs](#pipeline-urls).
```text
### requirements.txt
spacy>=3.0.0,<4.0.0
-https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz#egg=en_core_web_sm
+en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.0/en_core_web_sm-3.4.0-py3-none-any.whl
```
-Specifying `#egg=` with the package name tells pip which package to expect from
-the download URL. This way, the package won't be re-downloaded and overwritten
-if it's already installed - just like when you're downloading a package from
-PyPi.
-
All pipeline packages are versioned and specify their spaCy dependency. This
ensures cross-compatibility and lets you specify exact version requirements for
each pipeline. If you've [trained](/usage/training) your own pipeline, you can
diff --git a/website/src/templates/models.js b/website/src/templates/models.js
index df53f8c3c38..16a2360d525 100644
--- a/website/src/templates/models.js
+++ b/website/src/templates/models.js
@@ -76,6 +76,7 @@ const MODEL_META = {
benchmark_ner: 'NER accuracy',
benchmark_speed: 'Speed',
compat: 'Latest compatible package version for your spaCy installation',
+ download_link: 'Download link for the pipeline',
}
const LABEL_SCHEME_META = {
@@ -138,6 +139,13 @@ function formatAccuracy(data, lang) {
.filter(item => item)
}
+function formatDownloadLink(lang, name, version) {
+ const fullName = `${lang}_${name}-${version}`
+ const filename = `${fullName}-py3-none-any.whl`
+ const url = `https://github.com/explosion/spacy-models/releases/download/${fullName}/${filename}`
+ return {filename}
+}
+
function formatModelMeta(data) {
return {
fullName: `${data.lang}_${data.name}-${data.version}`,
@@ -154,6 +162,7 @@ function formatModelMeta(data) {
labels: isEmptyObj(data.labels) ? null : data.labels,
vectors: formatVectors(data.vectors),
accuracy: formatAccuracy(data.performance, data.lang),
+ download_link: formatDownloadLink(data.lang, data.name, data.version),
}
}
@@ -244,6 +253,7 @@ const Model = ({
{ label: 'Components', content: components, help: MODEL_META.components },
{ label: 'Pipeline', content: pipeline, help: MODEL_META.pipeline },
{ label: 'Vectors', content: meta.vectors, help: MODEL_META.vecs },
+ { label: 'Download Link', content: meta.download_link, help: MODEL_META.download_link },
{ label: 'Sources', content: sources, help: MODEL_META.sources },
{ label: 'Author', content: author },
{ label: 'License', content: license },