diff --git a/chromadb/utils/embedding_functions.py b/chromadb/utils/embedding_functions.py index cd8804297b2..5e0f80ff785 100644 --- a/chromadb/utils/embedding_functions.py +++ b/chromadb/utils/embedding_functions.py @@ -76,3 +76,19 @@ def __init__(self, api_key: str, model_name: str = "sentence-transformers/all-Mi def __call__(self, texts: Documents) -> Embeddings: # Call HuggingFace Embedding API for each document return self._session.post(self._api_url, json={"inputs": texts, "options":{"wait_for_model":True}}).json() + + +class InstructorEmbeddingFunction(EmbeddingFunction): + # If you have a GPU with at least 6GB try model_name = "hkunlp/instructor-xl" and device = "cuda" + # for a full list of options: https://github.com/HKUNLP/instructor-embedding#model-list + def __init__(self, model_name: str = "hkunlp/instructor-base", device= "cpu"): + try: + from InstructorEmbedding import INSTRUCTOR + except ImportError: + raise ValueError( + "The InstructorEmbedding python package is not installed. Please install it with `pip install InstructorEmbedding`" + ) + self._model = INSTRUCTOR(model_name, device=device) + + def __call__(self, texts: Documents) -> Embeddings: + return self._model.encode(texts).tolist()