From cd2e8045cad8b31a56887af6c6f1dd75a80ef940 Mon Sep 17 00:00:00 2001 From: Mircea Filip Lungu Date: Mon, 10 Feb 2025 12:12:53 +0100 Subject: [PATCH] - explicitly constructing the path for stanza_resources --- install_stanza_models.py | 8 ++++++-- zeeguu/api/endpoints/article.py | 3 +++ zeeguu/api/test/test_article.py | 13 ++++++++++--- zeeguu/core/tokenization/stanza_tokenizer.py | 3 ++- 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/install_stanza_models.py b/install_stanza_models.py index acf06a4a..8663c67a 100644 --- a/install_stanza_models.py +++ b/install_stanza_models.py @@ -1,5 +1,5 @@ from zeeguu.core.model.language import Language -from os import getenv +from os import getenv, path import stanza @@ -8,7 +8,11 @@ def stanza_model_installation(): ZEEGUU_DATA_FOLDER = getenv("ZEEGUU_DATA_FOLDER") for l_code in Language.CODES_OF_LANGUAGES_THAT_CAN_BE_LEARNED: - stanza.download(l_code, processors="tokenize,pos", model_dir=ZEEGUU_DATA_FOLDER) + stanza.download( + l_code, + processors="tokenize,pos", + model_dir=path.join(ZEEGUU_DATA_FOLDER, "stanza_resources"), + ) stanza_model_installation() diff --git a/zeeguu/api/endpoints/article.py b/zeeguu/api/endpoints/article.py index a8621be8..f4c053ff 100644 --- a/zeeguu/api/endpoints/article.py +++ b/zeeguu/api/endpoints/article.py @@ -46,6 +46,9 @@ def find_or_create_article(): capture_exception(e) flask.abort(500) + import traceback + + traceback.print_stack() # --------------------------------------------------------------------------- diff --git a/zeeguu/api/test/test_article.py b/zeeguu/api/test/test_article.py index f246cf41..17fa5e60 100644 --- a/zeeguu/api/test/test_article.py +++ b/zeeguu/api/test/test_article.py @@ -1,9 +1,14 @@ -from zeeguu.core.test.mocking_the_web import URL_SPIEGEL_VENEZUELA, URL_FAZ_LEIGHTATHLETIK +from zeeguu.core.test.mocking_the_web import ( + URL_SPIEGEL_VENEZUELA, + URL_FAZ_LEIGHTATHLETIK, +) from fixtures import logged_in_client as client def test_create_article(client): - response_data = client.post("/find_or_create_article", data=dict(url=URL_SPIEGEL_VENEZUELA)) + response_data = client.post( + "/find_or_create_article", data=dict(url=URL_SPIEGEL_VENEZUELA) + ) assert response_data assert response_data["id"] == 1 @@ -16,7 +21,9 @@ def test_starred_or_liked(client): assert len(result) == 0 # Star article - article_id = client.post("/find_or_create_article", dict(url=URL_FAZ_LEIGHTATHLETIK))["id"] + article_id = client.post( + "/find_or_create_article", dict(url=URL_FAZ_LEIGHTATHLETIK) + )["id"] client.post(f"/user_article", data=dict(starred="True", article_id=article_id)) # One article is starred eventually diff --git a/zeeguu/core/tokenization/stanza_tokenizer.py b/zeeguu/core/tokenization/stanza_tokenizer.py index 672e4394..95b42967 100644 --- a/zeeguu/core/tokenization/stanza_tokenizer.py +++ b/zeeguu/core/tokenization/stanza_tokenizer.py @@ -5,6 +5,7 @@ from os import getenv import stanza +import os STANZA_PARAGRAPH_DELIMITER = re.compile(r"((\s?)+\\n+)") APOSTROPHE_BEFORE_WORD = re.compile(r" (')([\w]+)") @@ -43,7 +44,7 @@ def __init__(self, language: Language, model: TokenizerModel): lang=self.language.code, processors=StanzaTokenizer._get_processor(model), download_method=None, - dir=ZEEGUU_DATA_FOLDER, + model_dir=os.path.join(ZEEGUU_DATA_FOLDER, "stanza_resources"), ) StanzaTokenizer.CACHED_NLP_PIPELINES[key] = pipeline self.nlp_pipeline = StanzaTokenizer.CACHED_NLP_PIPELINES[key]