Skip to content

Commit

Permalink
- explicitly constructing the path for stanza_resources
Browse files Browse the repository at this point in the history
  • Loading branch information
mircealungu committed Feb 10, 2025
1 parent 6d255d7 commit cd2e804
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 6 deletions.
8 changes: 6 additions & 2 deletions install_stanza_models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from zeeguu.core.model.language import Language
from os import getenv
from os import getenv, path
import stanza


Expand All @@ -8,7 +8,11 @@ def stanza_model_installation():
ZEEGUU_DATA_FOLDER = getenv("ZEEGUU_DATA_FOLDER")

for l_code in Language.CODES_OF_LANGUAGES_THAT_CAN_BE_LEARNED:
stanza.download(l_code, processors="tokenize,pos", model_dir=ZEEGUU_DATA_FOLDER)
stanza.download(
l_code,
processors="tokenize,pos",
model_dir=path.join(ZEEGUU_DATA_FOLDER, "stanza_resources"),
)


stanza_model_installation()
3 changes: 3 additions & 0 deletions zeeguu/api/endpoints/article.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ def find_or_create_article():

capture_exception(e)
flask.abort(500)
import traceback

traceback.print_stack()


# ---------------------------------------------------------------------------
Expand Down
13 changes: 10 additions & 3 deletions zeeguu/api/test/test_article.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
from zeeguu.core.test.mocking_the_web import URL_SPIEGEL_VENEZUELA, URL_FAZ_LEIGHTATHLETIK
from zeeguu.core.test.mocking_the_web import (
URL_SPIEGEL_VENEZUELA,
URL_FAZ_LEIGHTATHLETIK,
)
from fixtures import logged_in_client as client


def test_create_article(client):
response_data = client.post("/find_or_create_article", data=dict(url=URL_SPIEGEL_VENEZUELA))
response_data = client.post(
"/find_or_create_article", data=dict(url=URL_SPIEGEL_VENEZUELA)
)

assert response_data
assert response_data["id"] == 1
Expand All @@ -16,7 +21,9 @@ def test_starred_or_liked(client):
assert len(result) == 0

# Star article
article_id = client.post("/find_or_create_article", dict(url=URL_FAZ_LEIGHTATHLETIK))["id"]
article_id = client.post(
"/find_or_create_article", dict(url=URL_FAZ_LEIGHTATHLETIK)
)["id"]
client.post(f"/user_article", data=dict(starred="True", article_id=article_id))

# One article is starred eventually
Expand Down
3 changes: 2 additions & 1 deletion zeeguu/core/tokenization/stanza_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from os import getenv

import stanza
import os

STANZA_PARAGRAPH_DELIMITER = re.compile(r"((\s?)+\\n+)")
APOSTROPHE_BEFORE_WORD = re.compile(r" (')([\w]+)")
Expand Down Expand Up @@ -43,7 +44,7 @@ def __init__(self, language: Language, model: TokenizerModel):
lang=self.language.code,
processors=StanzaTokenizer._get_processor(model),
download_method=None,
dir=ZEEGUU_DATA_FOLDER,
model_dir=os.path.join(ZEEGUU_DATA_FOLDER, "stanza_resources"),
)
StanzaTokenizer.CACHED_NLP_PIPELINES[key] = pipeline
self.nlp_pipeline = StanzaTokenizer.CACHED_NLP_PIPELINES[key]
Expand Down

0 comments on commit cd2e804

Please sign in to comment.