diff --git a/flair/embeddings.py b/flair/embeddings.py index bc5cec02b6..4cf5b67bd8 100644 --- a/flair/embeddings.py +++ b/flair/embeddings.py @@ -1852,6 +1852,7 @@ def __init__(self, model, fine_tune: bool = False, chars_per_chunk: int = 512): cache_dir = Path("embeddings") aws_path: str = "https://s3.eu-central-1.amazonaws.com/alan-nlp/resources" + hu_path: str = "https://flair.informatik.hu-berlin.de/resources" self.PRETRAINED_MODEL_ARCHIVE_MAP = { # multilingual models @@ -1895,6 +1896,8 @@ def __init__(self, model, fine_tune: bool = False, chars_per_chunk: int = 512): "de-historic-ha-backward": f"{aws_path}/embeddings-stefan-it/lm-historic-hamburger-anzeiger-backward-v0.1.pt", "de-historic-wz-forward": f"{aws_path}/embeddings-stefan-it/lm-historic-wiener-zeitung-forward-v0.1.pt", "de-historic-wz-backward": f"{aws_path}/embeddings-stefan-it/lm-historic-wiener-zeitung-backward-v0.1.pt", + "de-historic-rw-forward": f"{hu_path}/embeddings/redewiedergabe_lm_forward.pt", + "de-historic-rw-backward": f"{hu_path}/embeddings/redewiedergabe_lm_backward.pt", # Spanish "es-forward": f"{aws_path}/embeddings-v0.4/language_model_es_forward_long/lm-es-forward.pt", "es-backward": f"{aws_path}/embeddings-v0.4/language_model_es_backward_long/lm-es-backward.pt", diff --git a/resources/docs/embeddings/FLAIR_EMBEDDINGS.md b/resources/docs/embeddings/FLAIR_EMBEDDINGS.md index 19a58491e2..0bf7e2ee01 100644 --- a/resources/docs/embeddings/FLAIR_EMBEDDINGS.md +++ b/resources/docs/embeddings/FLAIR_EMBEDDINGS.md @@ -40,6 +40,7 @@ Currently, the following contextual string embeddings are provided (note: replac | 'de-X' | German | Trained with mixed corpus (Web, Wikipedia, Subtitles) | | 'de-historic-ha-X' | German (historical) | Added by [@stefan-it](https://github.com/stefan-it/flair-lms): Historical German trained over *Hamburger Anzeiger* | | 'de-historic-wz-X' | German (historical) | Added by [@stefan-it](https://github.com/stefan-it/flair-lms): Historical German trained over *Wiener Zeitung* | +| 'de-historic-rw-X' | German (historical) | Added by [@redewiedergabe](https://github.com/redewiedergabe): Historical German trained over 100 million tokens | | 'es-X' | Spanish | Added by [@iamyihwa](https://github.com/zalandoresearch/flair/issues/80): Trained with Wikipedia | | 'es-X-fast' | Spanish | Added by [@iamyihwa](https://github.com/zalandoresearch/flair/issues/80): Trained with Wikipediam CPU-friendly | | 'eu-X' | Basque | Added by [@stefan-it](https://github.com/zalandoresearch/flair/issues/614): Trained with Wikipedia/OPUS |