- explicitly constructing the path for stanza_resources

zeeguu · Feb 10, 2025 · cd2e804 · cd2e804
1 parent 6d255d7
commit cd2e804
Show file tree

Hide file tree

Showing 4 changed files with 21 additions and 6 deletions.
diff --git a/install_stanza_models.py b/install_stanza_models.py
@@ -1,5 +1,5 @@
 from zeeguu.core.model.language import Language
-from os import getenv
+from os import getenv, path
 import stanza
 
 
@@ -8,7 +8,11 @@ def stanza_model_installation():
     ZEEGUU_DATA_FOLDER = getenv("ZEEGUU_DATA_FOLDER")
 
     for l_code in Language.CODES_OF_LANGUAGES_THAT_CAN_BE_LEARNED:
-        stanza.download(l_code, processors="tokenize,pos", model_dir=ZEEGUU_DATA_FOLDER)
+        stanza.download(
+            l_code,
+            processors="tokenize,pos",
+            model_dir=path.join(ZEEGUU_DATA_FOLDER, "stanza_resources"),
+        )
 
 
 stanza_model_installation()
diff --git a/zeeguu/api/endpoints/article.py b/zeeguu/api/endpoints/article.py
@@ -46,6 +46,9 @@ def find_or_create_article():
 
         capture_exception(e)
         flask.abort(500)
+        import traceback
+
+        traceback.print_stack()
 
 
 # ---------------------------------------------------------------------------

diff --git a/zeeguu/api/test/test_article.py b/zeeguu/api/test/test_article.py
@@ -1,9 +1,14 @@
-from zeeguu.core.test.mocking_the_web import URL_SPIEGEL_VENEZUELA, URL_FAZ_LEIGHTATHLETIK
+from zeeguu.core.test.mocking_the_web import (
+    URL_SPIEGEL_VENEZUELA,
+    URL_FAZ_LEIGHTATHLETIK,
+)
 from fixtures import logged_in_client as client
 
 
 def test_create_article(client):
-    response_data = client.post("/find_or_create_article", data=dict(url=URL_SPIEGEL_VENEZUELA))
+    response_data = client.post(
+        "/find_or_create_article", data=dict(url=URL_SPIEGEL_VENEZUELA)
+    )
 
     assert response_data
     assert response_data["id"] == 1
@@ -16,7 +21,9 @@ def test_starred_or_liked(client):
     assert len(result) == 0
 
     # Star article
-    article_id = client.post("/find_or_create_article", dict(url=URL_FAZ_LEIGHTATHLETIK))["id"]
+    article_id = client.post(
+        "/find_or_create_article", dict(url=URL_FAZ_LEIGHTATHLETIK)
+    )["id"]
     client.post(f"/user_article", data=dict(starred="True", article_id=article_id))
 
     # One article is starred eventually

diff --git a/zeeguu/core/tokenization/stanza_tokenizer.py b/zeeguu/core/tokenization/stanza_tokenizer.py
@@ -5,6 +5,7 @@
 from os import getenv
 
 import stanza
+import os
 
 STANZA_PARAGRAPH_DELIMITER = re.compile(r"((\s?)+\\n+)")
 APOSTROPHE_BEFORE_WORD = re.compile(r" (')([\w]+)")
@@ -43,7 +44,7 @@ def __init__(self, language: Language, model: TokenizerModel):
                     lang=self.language.code,
                     processors=StanzaTokenizer._get_processor(model),
                     download_method=None,
-                    dir=ZEEGUU_DATA_FOLDER,
+                    model_dir=os.path.join(ZEEGUU_DATA_FOLDER, "stanza_resources"),
                 )
                 StanzaTokenizer.CACHED_NLP_PIPELINES[key] = pipeline
         self.nlp_pipeline = StanzaTokenizer.CACHED_NLP_PIPELINES[key]