From c943fd287cad1be68d06565b87774bf573311875 Mon Sep 17 00:00:00 2001 From: sonalibud Date: Mon, 23 Sep 2024 07:21:20 +0000 Subject: [PATCH 1/6] feat: custom auth dependency added --- litellm/custom_auth.py | 158 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 litellm/custom_auth.py diff --git a/litellm/custom_auth.py b/litellm/custom_auth.py new file mode 100644 index 000000000000..807d092da8bc --- /dev/null +++ b/litellm/custom_auth.py @@ -0,0 +1,158 @@ +from datetime import datetime, timedelta +from pydantic import BaseModel + +import litellm +from litellm.proxy._types import * +from litellm.proxy.auth.auth_utils import ( + get_request_route, + pre_db_read_auth_checks, +) +from litellm._logging import verbose_proxy_logger +from litellm.proxy.common_utils.http_parsing_utils import _read_request_body + + +async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: + """ + Custom Auth dependency for User API Key Authentication + We receive budserve ap key and check if it is valid + + Steps: + + 1. Check api-key in cache + 2. Get api-key details from db + 3. Check expiry + 4. Check budget + 5. Check model budget + """ + try: + from litellm.proxy.proxy_server import user_api_key_cache, master_key + + route: str = get_request_route(request=request) + # get the request body + request_data = await _read_request_body(request=request) + await pre_db_read_auth_checks( + request_data=request_data, + request=request, + route=route, + ) + + # look for info is user_api_key_auth cache + valid_token: Optional[UserAPIKeyAuth] = await user_api_key_cache.async_get_cache( + key=hash_token(api_key) + ) + # OR + # valid_token: Optional[UserAPIKeyAuth] = user_api_key_cache.get_cache( # type: ignore + # key=api_key + # ) + if valid_token is None: + # getting token details from authentication service + _valid_token = BaseModel( + api_key=api_key, + expires=(datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S"), + budget=100, + model_max_budget={}, + model_spend={}, + spend=50, + ) + valid_token = UserAPIKeyAuth( + **_valid_token.model_dump(exclude_none=True) + ) + if valid_token is not None: + if valid_token.expires is not None: + current_time = datetime.now(timezone.utc) + expiry_time = datetime.fromisoformat(valid_token.expires) + if ( + expiry_time.tzinfo is None + or expiry_time.tzinfo.utcoffset(expiry_time) is None + ): + expiry_time = expiry_time.replace(tzinfo=timezone.utc) + verbose_proxy_logger.debug( + f"Checking if token expired, expiry time {expiry_time} and current time {current_time}" + ) + if expiry_time < current_time: + # Token exists but is expired. + raise ProxyException( + message=f"Authentication Error - Expired Key. Key Expiry time {expiry_time} and current time {current_time}", + type=ProxyErrorTypes.expired_key, + code=400, + param=api_key, + ) + if valid_token.spend is not None and valid_token.max_budget is not None: + if valid_token.spend >= valid_token.max_budget: + raise litellm.BudgetExceededError( + current_cost=valid_token.spend, + max_budget=valid_token.max_budget, + ) + max_budget_per_model = valid_token.model_max_budget + current_model = request_data.get("model", None) + if ( + max_budget_per_model is not None + and isinstance(max_budget_per_model, dict) + and len(max_budget_per_model) > 0 + and prisma_client is not None + and current_model is not None + and valid_token.token is not None + ): + ## GET THE SPEND FOR THIS MODEL + twenty_eight_days_ago = datetime.now() - timedelta(days=28) + model_spend = await prisma_client.db.litellm_spendlogs.group_by( + by=["model"], + sum={"spend": True}, + where={ + "AND": [ + {"api_key": valid_token.token}, + {"startTime": {"gt": twenty_eight_days_ago}}, + {"model": current_model}, + ] + }, # type: ignore + ) + if ( + len(model_spend) > 0 + and max_budget_per_model.get(current_model, None) is not None + ): + if ( + "model" in model_spend[0] + and model_spend[0].get("model") == current_model + and "_sum" in model_spend[0] + and "spend" in model_spend[0]["_sum"] + and model_spend[0]["_sum"]["spend"] + >= max_budget_per_model[current_model] + ): + current_model_spend = model_spend[0]["_sum"]["spend"] + current_model_budget = max_budget_per_model[current_model] + raise litellm.BudgetExceededError( + current_cost=current_model_spend, + max_budget=current_model_budget, + ) + # Add hashed token to cache + await user_api_key_cache.async_set_cache( + key=api_key, + value=valid_token, + ) + else: + # No token was found when looking up in the DB + raise Exception("Invalid proxy server token passed") + + except Exception as e: + if isinstance(e, litellm.BudgetExceededError): + raise ProxyException( + message=e.message, + type=ProxyErrorTypes.budget_exceeded, + param=None, + code=400, + ) + if isinstance(e, HTTPException): + raise ProxyException( + message=getattr(e, "detail", f"Authentication Error({str(e)})"), + type=ProxyErrorTypes.auth_error, + param=getattr(e, "param", "None"), + code=getattr(e, "status_code", status.HTTP_401_UNAUTHORIZED), + ) + elif isinstance(e, ProxyException): + raise e + raise ProxyException( + message="Authentication Error, " + str(e), + type=ProxyErrorTypes.auth_error, + param=getattr(e, "param", "None"), + code=status.HTTP_401_UNAUTHORIZED, + ) \ No newline at end of file From 9d7d507cd70ac9312d1887b9e7c0c60ab6d37134 Mon Sep 17 00:00:00 2001 From: sonalibud Date: Tue, 24 Sep 2024 08:18:51 +0000 Subject: [PATCH 2/6] fix: resolved dependency issues and cache store issue --- litellm/custom_auth.py | 71 +++++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 25 deletions(-) diff --git a/litellm/custom_auth.py b/litellm/custom_auth.py index 807d092da8bc..75eec4cac92f 100644 --- a/litellm/custom_auth.py +++ b/litellm/custom_auth.py @@ -1,17 +1,16 @@ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone + +from fastapi import HTTPException, Request, status from pydantic import BaseModel import litellm -from litellm.proxy._types import * -from litellm.proxy.auth.auth_utils import ( - get_request_route, - pre_db_read_auth_checks, -) from litellm._logging import verbose_proxy_logger +from litellm.proxy._types import * +from litellm.proxy.auth.auth_utils import get_request_route, pre_db_read_auth_checks from litellm.proxy.common_utils.http_parsing_utils import _read_request_body -async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: +async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: """ Custom Auth dependency for User API Key Authentication We receive budserve ap key and check if it is valid @@ -25,8 +24,14 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: 5. Check model budget """ try: - from litellm.proxy.proxy_server import user_api_key_cache, master_key - + from litellm.proxy.proxy_server import ( + master_key, + prisma_client, + user_api_key_cache, + ) + + api_key = f"sk-{api_key}" + route: str = get_request_route(request=request) # get the request body request_data = await _read_request_body(request=request) @@ -35,10 +40,16 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: request=request, route=route, ) - + # look for info is user_api_key_auth cache - valid_token: Optional[UserAPIKeyAuth] = await user_api_key_cache.async_get_cache( - key=hash_token(api_key) + verbose_proxy_logger.debug(f"API key sent in request >>> {api_key}") + hashed_token = hash_token(api_key) + valid_token: Optional[UserAPIKeyAuth] = ( + await user_api_key_cache.async_get_cache(key=hashed_token) + ) + + verbose_proxy_logger.debug( + f"Valid token from cache for key : {hashed_token} >>> {valid_token}" ) # OR # valid_token: Optional[UserAPIKeyAuth] = user_api_key_cache.get_cache( # type: ignore @@ -46,17 +57,22 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: # ) if valid_token is None: # getting token details from authentication service - _valid_token = BaseModel( - api_key=api_key, - expires=(datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S"), - budget=100, - model_max_budget={}, - model_spend={}, - spend=50, - ) + credential_dict = { + "key": api_key.removeprefix("sk-"), + "expiry": (datetime.now() + timedelta(days=1)).strftime( + "%Y-%m-%d %H:%M:%S" + ), + "max_budget": 0.005, + "model_budgets": {"gpt-4": 0.003, "gpt-3.5-turbo": 0.002}, + } valid_token = UserAPIKeyAuth( - **_valid_token.model_dump(exclude_none=True) + api_key=f"sk-{credential_dict['key']}", + expires=credential_dict["expiry"], + max_budget=credential_dict["max_budget"], + model_max_budget=credential_dict["model_budgets"], ) + verbose_proxy_logger.debug(f"Valid token from DB >>> {valid_token}") + verbose_proxy_logger.debug(f"Valid token spend >> {valid_token.spend}") if valid_token is not None: if valid_token.expires is not None: current_time = datetime.now(timezone.utc) @@ -106,6 +122,7 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: ] }, # type: ignore ) + verbose_proxy_logger.debug(f"model spends >> {model_spend}") if ( len(model_spend) > 0 and max_budget_per_model.get(current_model, None) is not None @@ -125,15 +142,19 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: max_budget=current_model_budget, ) # Add hashed token to cache + verbose_proxy_logger.debug( + f"Valid token storing in cache for key : {valid_token.token}" + ) await user_api_key_cache.async_set_cache( - key=api_key, + key=valid_token.token, value=valid_token, ) + return valid_token else: # No token was found when looking up in the DB raise Exception("Invalid proxy server token passed") - - except Exception as e: + + except Exception as e: if isinstance(e, litellm.BudgetExceededError): raise ProxyException( message=e.message, @@ -155,4 +176,4 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: type=ProxyErrorTypes.auth_error, param=getattr(e, "param", "None"), code=status.HTTP_401_UNAUTHORIZED, - ) \ No newline at end of file + ) From d5f60b805ebb77b0dda75381d5217aa7dc34336f Mon Sep 17 00:00:00 2001 From: sonalibud Date: Tue, 24 Sep 2024 08:44:31 +0000 Subject: [PATCH 3/6] fix: initial api key spend detail taken from db --- litellm/custom_auth.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/litellm/custom_auth.py b/litellm/custom_auth.py index 75eec4cac92f..91613ff9439c 100644 --- a/litellm/custom_auth.py +++ b/litellm/custom_auth.py @@ -30,6 +30,9 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: user_api_key_cache, ) + if prisma_client is None: + raise Exception("Prisma client not initialized") + api_key = f"sk-{api_key}" route: str = get_request_route(request=request) @@ -62,7 +65,7 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: "expiry": (datetime.now() + timedelta(days=1)).strftime( "%Y-%m-%d %H:%M:%S" ), - "max_budget": 0.005, + "max_budget": 1, "model_budgets": {"gpt-4": 0.003, "gpt-3.5-turbo": 0.002}, } valid_token = UserAPIKeyAuth( @@ -71,6 +74,22 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: max_budget=credential_dict["max_budget"], model_max_budget=credential_dict["model_budgets"], ) + api_key_spend = await prisma_client.db.litellm_spendlogs.group_by( + by=["api_key"], + sum={"spend": True}, + where={ + "AND": [ + {"api_key": valid_token.token}, + ] + }, # type: ignore + ) + if ( + len(api_key_spend) > 0 + and "_sum" in api_key_spend[0] + and "spend" in api_key_spend[0]["_sum"] + and api_key_spend[0]["_sum"]["spend"] + ): + valid_token.spend = api_key_spend[0]["_sum"]["spend"] verbose_proxy_logger.debug(f"Valid token from DB >>> {valid_token}") verbose_proxy_logger.debug(f"Valid token spend >> {valid_token.spend}") if valid_token is not None: From 2e676d49ae7d56ce8264b67dc621a298310fbe23 Mon Sep 17 00:00:00 2001 From: sonalibud Date: Wed, 25 Sep 2024 14:56:17 +0000 Subject: [PATCH 4/6] feat: budserve middleware to fetch endpoint and project settings and build user_config dictionary and pass it along llm request --- litellm/proxy/budserve_middleware.py | 99 ++++++++++++++++++++++++++++ litellm/proxy/proxy_server.py | 2 + litellm/router.py | 20 +++++- 3 files changed, 118 insertions(+), 3 deletions(-) create mode 100644 litellm/proxy/budserve_middleware.py diff --git a/litellm/proxy/budserve_middleware.py b/litellm/proxy/budserve_middleware.py new file mode 100644 index 000000000000..c1b229f194b4 --- /dev/null +++ b/litellm/proxy/budserve_middleware.py @@ -0,0 +1,99 @@ +import json + +from fastapi import Request +from starlette.middleware.base import BaseHTTPMiddleware + +from litellm._logging import verbose_proxy_logger +from litellm.proxy.auth.auth_utils import get_request_route +from litellm.proxy.common_utils.http_parsing_utils import _read_request_body + + +class BudServeMiddleware(BaseHTTPMiddleware): + llm_request_list = [ + "/chat/completions", + "/completions", + "/embeddings", + "/images/generation", + "/audio/speech", + "/audio/transcriptions", + ] + + async def get_api_key(self, request): + authorization_header = request.headers.get("Authorization") + api_key = authorization_header.split(" ")[1] + return api_key + + async def dispatch( + self, + request, + call_next, + ): + """ + Steps to prepare user_config + + 1. api_key and model (endpoint_name) fetch all endpoint details : model_list + 2. Using models involved in endpoint details, fetch proprietary credentials + 3. Create user_config using model_configuration (endpoint model) and router_config (project model) + 4. Add validations for fallbacks + """ + route: str = get_request_route(request=request) + verbose_proxy_logger.info(f"Request: {route}") + run_through_middleware = any( + each_route in route for each_route in self.llm_request_list + ) + verbose_proxy_logger.info(f"Run Through Middleware: {run_through_middleware}") + if not run_through_middleware: + return await call_next(request) + + # get the request body + request_data = await _read_request_body(request=request) + api_key = await self.get_api_key(request) + endpoint_name = request_data.get("model") + + # get endpoint details to fill cache_params + # redis connection params we will set as kubernetes env variables + # can be fetched using os.getenv + import os + + request_data["user_config"] = { + "cache_responses": False, + "redis_host": os.getenv("REDIS_HOST", "localhost"), + "redis_port": os.getenv("REDIS_PORT", 6379), + "redis_password": os.getenv("REDIS_PASSWORD", ""), + "endpoint_cache_settings": { + "cache": False, + "type": "redis-semantic", # gpt_cache_redis + "cache_params": { + "host": os.getenv("REDIS_HOST", "localhost"), + "port": os.getenv("REDIS_PORT", 6379), + "password": os.getenv("REDIS_PASSWORD", ""), + "similarity_threshold": 0.8, + "redis_semantic_cache_use_async": False, + "redis_semantic_cache_embedding_model": "sentence-transformers/all-mpnet-base-v2", + "eviction_policy": {"policy": "ttl", "max_size": 100, "ttl": 600}, + }, + }, + "model_list": [ + { + "model_name": "gpt4", + "litellm_params": { + "model": "openai/gpt-3.5-turbo", + "api_key": os.getenv("OPENAI_API_KEY", "dummy"), + "rpm": 100, + "request_timeout": 120, + }, + "model_info": {"id": "model_id:123"}, + }, + { + "model_name": "gpt4", + "litellm_params": { + "model": "openai/gpt-4", + "api_key": os.getenv("OPENAI_API_KEY", "dummy"), + "tpm": 10000, + }, + "model_info": {"id": "model_id:456"}, + }, + ], + } + request._body = json.dumps(request_data).encode("utf-8") + return await call_next(request) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 03ba2e839ed3..4ee4f44b3bd6 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -135,6 +135,7 @@ def generate_feedback_box(): get_team_models, ) from litellm.proxy.auth.user_api_key_auth import user_api_key_auth +from litellm.proxy.budserve_middleware import BudServeMiddleware ## Import All Misc routes here ## from litellm.proxy.caching_routes import router as caching_router @@ -450,6 +451,7 @@ async def redirect_ui_middleware(request: Request, call_next): allow_methods=["*"], allow_headers=["*"], ) +app.add_middleware(BudServeMiddleware) from typing import Dict diff --git a/litellm/router.py b/litellm/router.py index b3a07ad4e56f..3ef14b134440 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -206,6 +206,7 @@ def __init__( router_general_settings: Optional[ RouterGeneralSettings ] = RouterGeneralSettings(), + endpoint_cache_settings: Optional[dict] = None, ) -> None: """ Initialize the Router class with the given parameters for caching, reliability, and routing strategy. @@ -307,7 +308,13 @@ def __init__( and redis_port is not None and redis_password is not None ): - cache_type = "redis" + cache_type = ( + "redis" + if endpoint_cache_settings is None + else endpoint_cache_settings.get("cache_params", {}).get( + "type", "redis" + ) + ) if redis_url is not None: cache_config["url"] = redis_url @@ -326,9 +333,16 @@ def __init__( redis_cache = RedisCache(**cache_config) if cache_responses: - if litellm.cache is None: + if litellm.cache is None and endpoint_cache_settings is not None: # the cache can be initialized on the proxy server. We should not overwrite it - litellm.cache = litellm.Cache(type=cache_type, **cache_config) # type: ignore + # user_config : enabled cache + enable_cache = endpoint_cache_settings.get("cache", False) + if enable_cache: + endpoint_cache_config = endpoint_cache_settings.get( + "cache_params", {} + ) + if endpoint_cache_config: + litellm.cache = litellm.Cache(type=cache_type, **endpoint_cache_config) # type: ignore self.cache_responses = cache_responses self.cache = DualCache( redis_cache=redis_cache, in_memory_cache=InMemoryCache() From 792dc8b5a460f3f600cab0bb3e59d3277c4705c2 Mon Sep 17 00:00:00 2001 From: sonalibud Date: Thu, 26 Sep 2024 05:59:11 +0000 Subject: [PATCH 5/6] feat: added retrive credential details API in custom auth --- litellm/custom_auth.py | 64 +++++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/litellm/custom_auth.py b/litellm/custom_auth.py index 91613ff9439c..fd21bbe223f4 100644 --- a/litellm/custom_auth.py +++ b/litellm/custom_auth.py @@ -1,5 +1,8 @@ +import asyncio +import os from datetime import datetime, timedelta, timezone +import httpx from fastapi import HTTPException, Request, status from pydantic import BaseModel @@ -10,6 +13,15 @@ from litellm.proxy.common_utils.http_parsing_utils import _read_request_body +async def fetch_data(url: str): + async with httpx.AsyncClient() as client: + response = await client.get(url) + return response + + +budserve_app_baseurl = os.getenv("BUDSERVE_APP_BASEURL", "http://localhost:9000") + + async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: """ Custom Auth dependency for User API Key Authentication @@ -51,28 +63,30 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: await user_api_key_cache.async_get_cache(key=hashed_token) ) - verbose_proxy_logger.debug( + verbose_proxy_logger.info( f"Valid token from cache for key : {hashed_token} >>> {valid_token}" ) - # OR - # valid_token: Optional[UserAPIKeyAuth] = user_api_key_cache.get_cache( # type: ignore - # key=api_key - # ) if valid_token is None: # getting token details from authentication service - credential_dict = { - "key": api_key.removeprefix("sk-"), - "expiry": (datetime.now() + timedelta(days=1)).strftime( - "%Y-%m-%d %H:%M:%S" - ), - "max_budget": 1, - "model_budgets": {"gpt-4": 0.003, "gpt-3.5-turbo": 0.002}, - } + url = f"{budserve_app_baseurl}/credentials/details/{api_key.removeprefix('sk-')}" + credential_details_response = await fetch_data(url) + if credential_details_response.status_code != 200: + # No token was found when looking up in the DB + raise Exception("Invalid api key passed") + credential_dict = credential_details_response.json()["result"] + # credential_dict = { + # "key": api_key.removeprefix("sk-"), + # "expiry": (datetime.now() + timedelta(days=1)).strftime( + # "%Y-%m-%d %H:%M:%S" + # ), + # "max_budget": 1, + # "model_budgets": {"gpt-4": 0.003, "gpt-3.5-turbo": 0.002}, + # } valid_token = UserAPIKeyAuth( api_key=f"sk-{credential_dict['key']}", expires=credential_dict["expiry"], max_budget=credential_dict["max_budget"], - model_max_budget=credential_dict["model_budgets"], + model_max_budget=credential_dict["model_budgets"] or {}, ) api_key_spend = await prisma_client.db.litellm_spendlogs.group_by( by=["api_key"], @@ -90,8 +104,16 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: and api_key_spend[0]["_sum"]["spend"] ): valid_token.spend = api_key_spend[0]["_sum"]["spend"] - verbose_proxy_logger.debug(f"Valid token from DB >>> {valid_token}") - verbose_proxy_logger.debug(f"Valid token spend >> {valid_token.spend}") + # Add hashed token to cache + verbose_proxy_logger.info( + f"Valid token storing in cache for key : {valid_token.token}" + ) + await user_api_key_cache.async_set_cache( + key=valid_token.token, + value=valid_token, + ) + verbose_proxy_logger.info(f"Valid token from DB >>> {valid_token}") + verbose_proxy_logger.info(f"Valid token spend >> {valid_token.spend}") if valid_token is not None: if valid_token.expires is not None: current_time = datetime.now(timezone.utc) @@ -160,18 +182,10 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: current_cost=current_model_spend, max_budget=current_model_budget, ) - # Add hashed token to cache - verbose_proxy_logger.debug( - f"Valid token storing in cache for key : {valid_token.token}" - ) - await user_api_key_cache.async_set_cache( - key=valid_token.token, - value=valid_token, - ) return valid_token else: # No token was found when looking up in the DB - raise Exception("Invalid proxy server token passed") + raise Exception("Invalid api key passed") except Exception as e: if isinstance(e, litellm.BudgetExceededError): From fa1a0f416d442f0b7f208ff338ba4d3754a50dd6 Mon Sep 17 00:00:00 2001 From: sonalibud Date: Thu, 26 Sep 2024 06:07:54 +0000 Subject: [PATCH 6/6] chore: added example env and config for global litellm proxy server --- .env.example.global | 8 ++++++++ litellm_config.yaml | 8 ++++++++ 2 files changed, 16 insertions(+) create mode 100644 .env.example.global create mode 100644 litellm_config.yaml diff --git a/.env.example.global b/.env.example.global new file mode 100644 index 000000000000..6ccaa58aa3ed --- /dev/null +++ b/.env.example.global @@ -0,0 +1,8 @@ +export LITELLM_MASTER_KEY= +export DATABASE_URL= +export STORE_MODEL_IN_DB='True' +export LITELLM_SALT_KEY= +export REDIS_HOST=localhost +export REDIS_PORT=6379 +export REDIS_PASSWORD= +export BUDSERVE_APP_BASEURL="http://localhost:8000" \ No newline at end of file diff --git a/litellm_config.yaml b/litellm_config.yaml new file mode 100644 index 000000000000..01422da9f8f7 --- /dev/null +++ b/litellm_config.yaml @@ -0,0 +1,8 @@ +general_settings: + store_model_in_db: True + custom_auth: litellm.custom_auth.user_api_key_auth +router_settings: + cache_responses: False + redis_host: "os.environ/REDIS_HOST" + redis_port: "os.environ/REDIS_PORT" + redis_password: "os.environ/REDIS_PASSWORD" \ No newline at end of file