diff --git a/poetry.lock b/poetry.lock index 24301f079..4d6d699d1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1151,6 +1151,17 @@ files = [ {file = "docx2txt-0.8.tar.gz", hash = "sha256:2c06d98d7cfe2d3947e5760a57d924e3ff07745b379c8737723922e7009236e5"}, ] +[[package]] +name = "einops" +version = "0.8.0" +description = "A new flavour of deep learning operations" +optional = true +python-versions = ">=3.8" +files = [ + {file = "einops-0.8.0-py3-none-any.whl", hash = "sha256:9572fb63046264a862693b0a87088af3bdc8c068fde03de63453cbbde245465f"}, + {file = "einops-0.8.0.tar.gz", hash = "sha256:63486517fed345712a8385c100cb279108d9d47e6ae59099b07657e983deae85"}, +] + [[package]] name = "email-validator" version = "2.2.0" @@ -6659,7 +6670,7 @@ cffi = ["cffi (>=1.11)"] [extras] embeddings-azopenai = ["llama-index-embeddings-azure-openai"] embeddings-gemini = ["llama-index-embeddings-gemini"] -embeddings-huggingface = ["llama-index-embeddings-huggingface"] +embeddings-huggingface = ["einops", "llama-index-embeddings-huggingface"] embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"] embeddings-openai = ["llama-index-embeddings-openai"] embeddings-sagemaker = ["boto3"] @@ -6682,4 +6693,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"] [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "dce5b88d92bcfa047bf1e4c9fe2dbb9c63eb864d6bbca2340801ac0a2f02a8d4" +content-hash = "25abbb45bc462dbf056b83c0925b505ad1232484a18e50f07c5e7f517dd84e6f" diff --git a/private_gpt/components/embedding/embedding_component.py b/private_gpt/components/embedding/embedding_component.py index 89a577b2a..5d3e99749 100644 --- a/private_gpt/components/embedding/embedding_component.py +++ b/private_gpt/components/embedding/embedding_component.py @@ -31,6 +31,7 @@ def __init__(self, settings: Settings) -> None: self.embedding_model = HuggingFaceEmbedding( model_name=settings.huggingface.embedding_hf_model_name, cache_folder=str(models_cache_path), + trust_remote_code=settings.huggingface.trust_remote_code, ) case "sagemaker": try: diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index c968f8088..4cf192a3e 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -189,6 +189,10 @@ class HuggingFaceSettings(BaseModel): None, description="Huggingface access token, required to download some models", ) + trust_remote_code: bool = Field( + False, + description="If set to True, the code from the remote model will be trusted and executed.", + ) class EmbeddingSettings(BaseModel): diff --git a/pyproject.toml b/pyproject.toml index 5c6429b11..2ebe5f94c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,6 +65,9 @@ google-generativeai = {version ="^0.5.4", optional = true} # Optional Ollama client ollama = {version ="^0.3.0", optional = true} +# Optional HF Transformers +einops = {version = "^0.8.0", optional = true} + [tool.poetry.extras] ui = ["gradio", "ffmpy"] llms-llama-cpp = ["llama-index-llms-llama-cpp"] @@ -75,7 +78,7 @@ llms-sagemaker = ["boto3"] llms-azopenai = ["llama-index-llms-azure-openai"] llms-gemini = ["llama-index-llms-gemini", "google-generativeai"] embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"] -embeddings-huggingface = ["llama-index-embeddings-huggingface"] +embeddings-huggingface = ["llama-index-embeddings-huggingface", "einops"] embeddings-openai = ["llama-index-embeddings-openai"] embeddings-sagemaker = ["boto3"] embeddings-azopenai = ["llama-index-embeddings-azure-openai"] diff --git a/settings.yaml b/settings.yaml index a890733e8..f030604a3 100644 --- a/settings.yaml +++ b/settings.yaml @@ -91,6 +91,9 @@ embedding: huggingface: embedding_hf_model_name: nomic-ai/nomic-embed-text-v1.5 access_token: ${HF_TOKEN:} + # Warning: Enabling this option will allow the model to download and execute code from the internet. + # Nomic AI requires this option to be enabled to use the model, be aware if you are using a different model. + trust_remote_code: true vectorstore: database: qdrant