Skip to content

Commit

Permalink
Merge pull request #471 from julep-ai/x/fix-dploy-v0.4
Browse files Browse the repository at this point in the history
  • Loading branch information
creatorrr authored Aug 28, 2024
2 parents ea74887 + 0cc7371 commit 9f5f0cb
Show file tree
Hide file tree
Showing 21 changed files with 273 additions and 324 deletions.
121 changes: 68 additions & 53 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,54 +1,69 @@
AGENTS_API_KEY=myauthkey
AGENTS_API_KEY_HEADER_NAME=Authorization
AGENTS_API_URL=http://agents-api:8080
COZO_AUTH_TOKEN=myauthkey
COZO_HOST=http://memory-store:9070
COZO_PORT=9070
COZO_ROCKSDB_DIR=cozo.db
DTYPE=float16
EMBEDDING_SERVICE_BASE=http://text-embeddings-inference
EMBEDDING_SERVICE_URL=${EMBEDDING_SERVICE_BASE}/embed
GATEWAY_PORT=80
GPU_MEMORY_UTILIZATION=0.90

HF_TOKEN=
HUGGING_FACE_HUB_TOKEN=
JWT_SHARED_KEY=

MAX_MODEL_LEN=8192
MAX_NUM_SEQS=1
MNT_DIR=/data

# Security
# --------
JWT_SHARED_KEY=<your_jwt_shared_key>
AGENTS_API_KEY=<your_agents_api_key>
COZO_AUTH_TOKEN=<your_cozo_auth_token>
TEMPORAL_POSTGRES_PASSWORD=<your_temporal_postgres_password>
LITELLM_POSTGRES_PASSWORD=<your_litellm_postgres_password>
LITELLM_MASTER_KEY=<your_litellm_master_key>
LITELLM_REDIS_PASSWORD=<your_litellm_redis_password>
SKIP_CHECK_DEVELOPER_HEADERS=true
SUMMARIZATION_TOKENS_THRESHOLD=2048
TEMPERATURE_SCALING_FACTOR=0.9
TEMPERATURE_SCALING_POWER=0.9
TEMPORAL_ENDPOINT=temporal:7233
TEMPORAL_NAMESPACE=default
TEMPORAL_WORKER_URL=temporal:7233
TP_SIZE=1
TRUNCATE_EMBED_TEXT=true
TRAEFIK_LOG_LEVEL=DEBUG
WORKER_URL=temporal:7233

AGENTS_API_DEBUG=false
OPENAI_API_KEY=
ANTHROPIC_API_KEY=
GROQ_API_KEY=
CLOUDFLARE_API_KEY=
CLOUDFLARE_ACCOUNT_ID=
NVIDIA_NIM_API_KEY=
GITHUB_API_KEY=
VOYAGE_API_KEY=
GOOGLE_APPLICATION_CREDENTIALS=

LITELLM_URL=http://litellm:4000
POSTGRES_DB=litellm
POSTGRES_USER=llmproxy
POSTGRES_PASSWORD=
LITELLM_DATABASE_URL=postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@litellm-db:5432/${POSTGRES_DB}
LITELLM_MASTER_KEY=
LITELLM_REDIS_HOST=litellm-redis
LITELLM_REDIS_PORT=6379
LITELLM_REDIS_PASSWORD=
REDIS_ARGS="--requirepass ${LITELLM_REDIS_PASSWORD}"
EMBEDDING_SERVICE_BASE=http://text-embeddings-inference-<gpu|cpu> # Use the 'gpu' profile to run on GPU

# Memory Store
# -----------

# COZO_HOST=http://memory-store:9070
# COZO_PORT=9070
# COZO_ROCKSDB_DIR=cozo.db
# COZO_BACKUP_DIR=backup
# COZO_MNT_DIR=/data

# Gateway
# ------

# GATEWAY_PORT=80
# TRAEFIK_LOG_LEVEL=INFO

# Agents API
# ---------

# AGENTS_API_KEY_HEADER_NAME=Authorization
# AGENTS_API_URL=http://agents-api:8080
# TRUNCATE_EMBED_TEXT=true
# WORKER_URL=temporal:7233
# AGENTS_API_DEBUG=false
# EMBEDDING_MODEL_ID=Alibaba-NLP/gte-large-en-v1.5
# NUM_GPUS=1

# Temporal
# --------

# TEMPORAL_ENDPOINT=temporal:7233
# TEMPORAL_NAMESPACE=default
# TEMPORAL_WORKER_URL=temporal:7233
# TEMPORAL_POSTGRES_DB=temporal
# TEMPORAL_POSTGRES_USER=temporal

# LiteLLM
# -------

# LITELLM_URL=http://litellm:4000
# LITELLM_POSTGRES_DB=litellm
# LITELLM_POSTGRES_USER=llmproxy
# LITELLM_REDIS_HOST=litellm-redis
# LITELLM_REDIS_PORT=6379

# LLM Providers
# --------------

# OPENAI_API_KEY=<your_openai_api_key>
# HUGGING_FACE_HUB_TOKEN=<your_hugging_face_hub_token>
# ANTHROPIC_API_KEY=<your_anthropic_api_key>
# GROQ_API_KEY=<your_groq_api_key>
# CLOUDFLARE_API_KEY=<your_cloudflare_api_key>
# CLOUDFLARE_ACCOUNT_ID=<your_cloudflare_account_id>
# NVIDIA_NIM_API_KEY=<your_nvidia_nim_api_key>
# GITHUB_API_KEY=<your_github_api_key>
# VOYAGE_API_KEY=<your_voyage_api_key>
# GOOGLE_APPLICATION_CREDENTIALS=.keys/julep-vertexai-svc.json
4 changes: 2 additions & 2 deletions agents-api/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10-slim
FROM python:3.11-slim

ENV PYTHONUNBUFFERED True
ENV POETRY_CACHE_DIR=/tmp/poetry_cache
Expand All @@ -16,4 +16,4 @@ COPY . ./

RUN poetry install --no-dev

ENTRYPOINT ["python", "agents_api/web.py", "--host", "0.0.0.0", "--port", "8080"]
ENTRYPOINT ["python", "-m", "agents_api.web", "--host", "0.0.0.0", "--port", "8080"]
4 changes: 0 additions & 4 deletions agents-api/Dockerfile.temporal

This file was deleted.

2 changes: 1 addition & 1 deletion agents-api/Dockerfile.worker
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10-slim
FROM python:3.11-slim

ENV PYTHONUNBUFFERED True
ENV POETRY_CACHE_DIR=/tmp/poetry_cache
Expand Down
96 changes: 56 additions & 40 deletions agents-api/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,17 +1,49 @@
name: julep-agents-api
version: "3"

# Base for embedding service
x--text-embeddings-inference: &text-embeddings-inference
container_name: text-embeddings-inference
environment:
- MODEL_ID=${EMBEDDING_MODEL_ID:-Alibaba-NLP/gte-large-en-v1.5}

image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
ports:
- "8082:80"
volumes:
- ~/.cache/huggingface/hub:/data

# Shared environment variables
x-shared-environment: &shared-environment
AGENTS_API_KEY: ${AGENTS_API_KEY}
LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY}
COZO_AUTH_TOKEN: ${COZO_AUTH_TOKEN}
SKIP_CHECK_DEVELOPER_HEADERS: ${SKIP_CHECK_DEVELOPER_HEADERS:-True}
AGENTS_API_KEY_HEADER_NAME: ${AGENTS_API_KEY_HEADER_NAME:-Authorization}
AGENTS_API_URL: ${AGENTS_API_URL:-http://agents-api:8080}
TRUNCATE_EMBED_TEXT: ${TRUNCATE_EMBED_TEXT:-False}
WORKER_URL: ${WORKER_URL:-temporal:7233}
DEBUG: ${AGENTS_API_DEBUG:-False}
EMBEDDING_SERVICE_BASE: ${EMBEDDING_SERVICE_BASE:-http://text-embeddings-inference}
EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID:-Alibaba-NLP/gte-large-en-v1.5}
LITELLM_URL: ${LITELLM_URL:-http://litellm:4000}
COZO_HOST: ${COZO_HOST:-http://memory-store:9070}
SUMMARIZATION_MODEL_NAME: ${SUMMARIZATION_MODEL_NAME:-gpt-4-turbo}
TEMPORAL_WORKER_URL: ${TEMPORAL_WORKER_URL:-temporal:7233}
TEMPORAL_NAMESPACE: ${TEMPORAL_NAMESPACE:-default}
TEMPORAL_ENDPOINT: ${TEMPORAL_ENDPOINT:-temporal:7233}
TEMPORAL_TASK_QUEUE: ${TEMPORAL_TASK_QUEUE:-julep-task-queue}

services:
agents-api:
image: julepai/agents-api:dev
env_file: "../.env"

image: julepai/agents-api:${TAG:-dev}
container_name: agents-api
depends_on:
memory-store:
condition: service_started
worker:
condition: service_started
environment:
<<: *shared-environment
build:
context: .
dockerfile: Dockerfile
Expand All @@ -31,15 +63,13 @@ services:
path: Dockerfile

worker:
image: julepai/worker:dev
env_file: "../.env"

image: julepai/worker:${TAG:-dev}
environment:
<<: *shared-environment
build:
context: .
dockerfile: Dockerfile.worker
depends_on:
text-embeddings-inference:
condition: service_started
temporal:
condition: service_started

Expand All @@ -55,49 +85,32 @@ services:
- action: rebuild
path: Dockerfile.worker

text-embeddings-inference:
container_name: text-embeddings-inference
text-embeddings-inference-cpu:
<<: *text-embeddings-inference
profiles:
- '' # Acts as a default profile. See: https://stackoverflow.com/questions/75758174/how-to-make-profile-default-for-docker-compose

text-embeddings-inference-gpu:
<<: *text-embeddings-inference
profiles:
- gpu
image: ghcr.io/huggingface/text-embeddings-inference:1.5
environment:
- DTYPE=float16
- MODEL_ID=Alibaba-NLP/gte-large-en-v1.5
- MODEL_ID=${EMBEDDING_MODEL_ID:-Alibaba-NLP/gte-large-en-v1.5}
- NVIDIA_VISIBLE_DEVICES=all

image: ghcr.io/huggingface/text-embeddings-inference:1.5
ports:
- "8082:80"
volumes:
- ~/.cache/huggingface/hub:/data
shm_size: "2gb"
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
count: ${NUM_GPUS:-1}
capabilities: [gpu]


temporal:
image: julepai/temporal:dev
container_name: temporal
env_file: "../.env"

build:
context: .
dockerfile: Dockerfile.temporal
ports:
- 7233:7233
volumes:
- temporal_data:/home/temporal

develop:
watch:
- action: rebuild
path: Dockerfile.temporal

cozo-migrate:
image: julepai/cozo-migrate:dev
env_file: "../.env"

image: julepai/cozo-migrate:${TAG:-dev}
container_name: cozo-migrate
depends_on:
memory-store:
Expand All @@ -106,6 +119,9 @@ services:
context: .
dockerfile: Dockerfile.migration
restart: "no" # Make sure to double quote this
environment:
- COZO_HOST=${COZO_HOST:-http://cozo:9070}
- COZO_AUTH_TOKEN=${COZO_AUTH_TOKEN:-myauthkey}

develop:
watch:
Expand Down
2 changes: 1 addition & 1 deletion agents-api/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion agents-api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ openai = "^1.41.0"
httpx = "^0.27.0"
sentry-sdk = {extras = ["fastapi"], version = "^2.13.0"}
temporalio = "^1.6.0"
pydantic = "^2.8.2"
pydantic = {extras = ["email"], version = "^2.8.2"}
arrow = "^1.3.0"
jinja2 = "^3.1.4"
jinja2schema = "^0.1.4"
Expand Down
13 changes: 0 additions & 13 deletions deploy/.env.example

This file was deleted.

Loading

0 comments on commit 9f5f0cb

Please sign in to comment.