- unified the dependency to stanza_resources folder

zeeguu · Feb 10, 2025 · 5f3d228 · 5f3d228
1 parent 50f142f
commit 5f3d228
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 5 deletions.
diff --git a/install_stanza_models.py b/install_stanza_models.py
@@ -1,17 +1,17 @@
 from zeeguu.core.model.language import Language
-from os import getenv, path
+
 import stanza
 
+from zeeguu.core.tokenization.stanza_tokenizer import STANZA_RESOURCE_DIR
 
-def stanza_model_installation():
 
-    ZEEGUU_DATA_FOLDER = getenv("ZEEGUU_DATA_FOLDER")
+def stanza_model_installation():
 
     for l_code in Language.CODES_OF_LANGUAGES_THAT_CAN_BE_LEARNED:
         stanza.download(
             l_code,
             processors="tokenize,pos",
-            model_dir=path.join(ZEEGUU_DATA_FOLDER, "stanza_resources"),
+            model_dir=STANZA_RESOURCE_DIR,
         )
 
 

diff --git a/zeeguu/core/tokenization/stanza_tokenizer.py b/zeeguu/core/tokenization/stanza_tokenizer.py
@@ -19,6 +19,13 @@
 EMAIL_PLACEHOLDER = "#EMAIL#"
 
 
+STANZA_RESOURCE_DIR = (
+    os.path.join(ZEEGUU_DATA_FOLDER, "stanza_resources")
+    if ZEEGUU_DATA_FOLDER
+    else os.path.join(os.path.expanduser("~"), "stanza_resources")
+)
+
+
 class StanzaTokenizer(ZeeguuTokenizer):
     STANZA_MODELS = set(
         [TokenizerModel.STANZA_TOKEN_ONLY, TokenizerModel.STANZA_TOKEN_POS]
@@ -44,7 +51,7 @@ def __init__(self, language: Language, model: TokenizerModel):
                     lang=self.language.code,
                     processors=StanzaTokenizer._get_processor(model),
                     download_method=None,
-                    model_dir=os.path.join(ZEEGUU_DATA_FOLDER, "stanza_resources"),
+                    model_dir=STANZA_RESOURCE_DIR,
                 )
                 StanzaTokenizer.CACHED_NLP_PIPELINES[key] = pipeline
         self.nlp_pipeline = StanzaTokenizer.CACHED_NLP_PIPELINES[key]