Skip to content

Commit

Permalink
added embedding function for the Instructor models.
Browse files Browse the repository at this point in the history
  • Loading branch information
dbasch committed Mar 24, 2023
1 parent 4463d13 commit e6a7aee
Showing 1 changed file with 16 additions and 0 deletions.
16 changes: 16 additions & 0 deletions chromadb/utils/embedding_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,19 @@ def __init__(self, api_key: str, model_name: str = "sentence-transformers/all-Mi
def __call__(self, texts: Documents) -> Embeddings:
# Call HuggingFace Embedding API for each document
return self._session.post(self._api_url, json={"inputs": texts, "options":{"wait_for_model":True}}).json()


class InstructorEmbeddingFunction(EmbeddingFunction):
# If you have a GPU with at least 6GB try model_name = "hkunlp/instructor-xl" and device = "cuda"
# for a full list of options: https://github.com/HKUNLP/instructor-embedding#model-list
def __init__(self, model_name: str = "hkunlp/instructor-base", device= "cpu"):
try:
from InstructorEmbedding import INSTRUCTOR
except ImportError:
raise ValueError(
"The InstructorEmbedding python package is not installed. Please install it with `pip install InstructorEmbedding`"
)
self._model = INSTRUCTOR(model_name, device=device)

def __call__(self, texts: Documents) -> Embeddings:
return self._model.encode(texts).tolist()

0 comments on commit e6a7aee

Please sign in to comment.