Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: autopull ollama models #2019

Merged
merged 7 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions fern/docs/pages/installation/installation.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -130,16 +130,20 @@ Go to [ollama.ai](https://ollama.ai/) and follow the instructions to install Oll

After the installation, make sure the Ollama desktop app is closed.

Install the models to be used, the default settings-ollama.yaml is configured to user `mistral 7b` LLM (~4GB) and `nomic-embed-text` Embeddings (~275MB). Therefore:

Now, start Ollama service (it will start a local inference server, serving both the LLM and the Embeddings):
```bash
ollama pull mistral
ollama pull nomic-embed-text
ollama serve
```

Now, start Ollama service (it will start a local inference server, serving both the LLM and the Embeddings):
Install the models to be used, the default settings-ollama.yaml is configured to user mistral 7b LLM (~4GB) and nomic-embed-text Embeddings (~275MB)

By default, PGPT will automatically pull models as needed. This behavior can be changed by modifying the `ollama.autopull_models` property.

In any case, if you want to manually pull models, run the following commands:

```bash
ollama serve
ollama pull mistral
ollama pull nomic-embed-text
```

Once done, on a different terminal, you can install PrivateGPT with the following command:
Expand Down
33 changes: 24 additions & 9 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 31 additions & 1 deletion private_gpt/components/embedding/embedding_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,16 +71,46 @@ def __init__(self, settings: Settings) -> None:
from llama_index.embeddings.ollama import ( # type: ignore
OllamaEmbedding,
)
from ollama import Client # type: ignore
except ImportError as e:
raise ImportError(
"Local dependencies not found, install with `poetry install --extras embeddings-ollama`"
) from e

ollama_settings = settings.ollama

# Calculate embedding model. If not provided tag, it will be use latest
model_name = (
ollama_settings.embedding_model + ":latest"
if ":" not in ollama_settings.embedding_model
else ollama_settings.embedding_model
)

self.embedding_model = OllamaEmbedding(
model_name=ollama_settings.embedding_model,
model_name=model_name,
base_url=ollama_settings.embedding_api_base,
)

if ollama_settings.autopull_models:
if ollama_settings.autopull_models:
from private_gpt.utils.ollama import (
check_connection,
pull_model,
)

# TODO: Reuse llama-index client when llama-index is updated
client = Client(
host=ollama_settings.embedding_api_base,
timeout=ollama_settings.request_timeout,
)

if not check_connection(client):
raise ValueError(
f"Failed to connect to Ollama, "
f"check if Ollama server is running on {ollama_settings.api_base}"
)
pull_model(client, model_name)

case "azopenai":
try:
from llama_index.embeddings.azure_openai import ( # type: ignore
Expand Down
23 changes: 21 additions & 2 deletions private_gpt/components/llm/llm_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,15 +146,32 @@ def __init__(self, settings: Settings) -> None:
"repeat_penalty": ollama_settings.repeat_penalty, # ollama llama-cpp
}

self.llm = Ollama(
model=ollama_settings.llm_model,
# calculate llm model. If not provided tag, it will be use latest
model_name = (
ollama_settings.llm_model + ":latest"
if ":" not in ollama_settings.llm_model
else ollama_settings.llm_model
)

llm = Ollama(
model=model_name,
base_url=ollama_settings.api_base,
temperature=settings.llm.temperature,
context_window=settings.llm.context_window,
additional_kwargs=settings_kwargs,
request_timeout=ollama_settings.request_timeout,
)

if ollama_settings.autopull_models:
from private_gpt.utils.ollama import check_connection, pull_model

if not check_connection(llm.client):
raise ValueError(
f"Failed to connect to Ollama, "
f"check if Ollama server is running on {ollama_settings.api_base}"
)
pull_model(llm.client, model_name)

if (
ollama_settings.keep_alive
!= ollama_settings.model_fields["keep_alive"].default
Expand All @@ -172,6 +189,8 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
Ollama.complete = add_keep_alive(Ollama.complete)
Ollama.stream_complete = add_keep_alive(Ollama.stream_complete)

self.llm = llm

case "azopenai":
try:
from llama_index.llms.azure_openai import ( # type: ignore
Expand Down
4 changes: 4 additions & 0 deletions private_gpt/settings/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,10 @@ class OllamaSettings(BaseModel):
120.0,
description="Time elapsed until ollama times out the request. Default is 120s. Format is float. ",
)
autopull_models: bool = Field(
False,
description="If set to True, the Ollama will automatically pull the models from the API base.",
)


class AzureOpenAISettings(BaseModel):
Expand Down
32 changes: 32 additions & 0 deletions private_gpt/utils/ollama.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import logging

try:
from ollama import Client # type: ignore
except ImportError as e:
raise ImportError(
"Ollama dependencies not found, install with `poetry install --extras llms-ollama or embeddings-ollama`"
) from e

logger = logging.getLogger(__name__)


def check_connection(client: Client) -> bool:
try:
client.list()
return True
except Exception as e:
logger.error(f"Failed to connect to Ollama: {e!s}")
return False


def pull_model(client: Client, model_name: str, raise_error: bool = True) -> None:
try:
installed_models = [model["name"] for model in client.list().get("models", {})]
if model_name not in installed_models:
logger.info(f"Pulling model {model_name}. Please wait...")
client.pull(model_name)
logger.info(f"Model {model_name} pulled successfully")
except Exception as e:
logger.error(f"Failed to pull model {model_name}: {e!s}")
if raise_error:
raise e
9 changes: 6 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ llama-index-readers-file = "^0.1.27"
llama-index-llms-llama-cpp = {version = "^0.1.4", optional = true}
llama-index-llms-openai = {version = "^0.1.25", optional = true}
llama-index-llms-openai-like = {version ="^0.1.3", optional = true}
llama-index-llms-ollama = {version ="^0.1.5", optional = true}
llama-index-llms-ollama = {version ="^0.2.2", optional = true}
llama-index-llms-azure-openai = {version ="^0.1.8", optional = true}
llama-index-llms-gemini = {version ="^0.1.11", optional = true}
llama-index-embeddings-ollama = {version ="^0.1.2", optional = true}
Expand Down Expand Up @@ -62,16 +62,19 @@ ffmpy = {git = "https://github.com/EuDs63/ffmpy.git", rev = "333a19ee4d21f32537c
# Optional Google Gemini dependency
google-generativeai = {version ="^0.5.4", optional = true}

# Optional Ollama client
ollama = {version ="^0.3.0", optional = true}

[tool.poetry.extras]
ui = ["gradio", "ffmpy"]
llms-llama-cpp = ["llama-index-llms-llama-cpp"]
llms-openai = ["llama-index-llms-openai"]
llms-openai-like = ["llama-index-llms-openai-like"]
llms-ollama = ["llama-index-llms-ollama"]
llms-ollama = ["llama-index-llms-ollama", "ollama"]
llms-sagemaker = ["boto3"]
llms-azopenai = ["llama-index-llms-azure-openai"]
llms-gemini = ["llama-index-llms-gemini", "google-generativeai"]
embeddings-ollama = ["llama-index-embeddings-ollama"]
embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"]
embeddings-huggingface = ["llama-index-embeddings-huggingface"]
embeddings-openai = ["llama-index-embeddings-openai"]
embeddings-sagemaker = ["boto3"]
Expand Down
1 change: 1 addition & 0 deletions settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ ollama:
embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama
keep_alive: 5m
request_timeout: 120.0
autopull_models: true

azopenai:
api_key: ${AZ_OPENAI_API_KEY:}
Expand Down
Loading