From c943fd287cad1be68d06565b87774bf573311875 Mon Sep 17 00:00:00 2001
From: sonalibud <sonali@bud.studio>
Date: Mon, 23 Sep 2024 07:21:20 +0000
Subject: [PATCH 1/6] feat: custom auth dependency added

---
 litellm/custom_auth.py | 158 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 158 insertions(+)
 create mode 100644 litellm/custom_auth.py

diff --git a/litellm/custom_auth.py b/litellm/custom_auth.py
new file mode 100644
index 000000000000..807d092da8bc
--- /dev/null
+++ b/litellm/custom_auth.py
@@ -0,0 +1,158 @@
+from datetime import datetime, timedelta
+from pydantic import BaseModel
+
+import litellm
+from litellm.proxy._types import *
+from litellm.proxy.auth.auth_utils import (
+    get_request_route,
+    pre_db_read_auth_checks,
+)
+from litellm._logging import verbose_proxy_logger
+from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
+
+
+async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: 
+    """
+    Custom Auth dependency for User API Key Authentication
+    We receive budserve ap key and check if it is valid
+
+    Steps:
+
+    1. Check api-key in cache
+    2. Get api-key details from db
+    3. Check expiry
+    4. Check budget
+    5. Check model budget
+    """
+    try:
+        from litellm.proxy.proxy_server import user_api_key_cache, master_key
+        
+        route: str = get_request_route(request=request)
+        # get the request body
+        request_data = await _read_request_body(request=request)
+        await pre_db_read_auth_checks(
+            request_data=request_data,
+            request=request,
+            route=route,
+        )
+        
+        # look for info is user_api_key_auth cache
+        valid_token: Optional[UserAPIKeyAuth] = await user_api_key_cache.async_get_cache(
+            key=hash_token(api_key)
+        )
+        # OR
+        # valid_token: Optional[UserAPIKeyAuth] = user_api_key_cache.get_cache(  # type: ignore
+        #     key=api_key
+        # )
+        if valid_token is None:
+            # getting token details from authentication service
+            _valid_token = BaseModel(
+                api_key=api_key,
+                expires=(datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S"),
+                budget=100,
+                model_max_budget={},
+                model_spend={},
+                spend=50,
+            )
+            valid_token = UserAPIKeyAuth(
+                **_valid_token.model_dump(exclude_none=True)
+            )
+        if valid_token is not None:
+            if valid_token.expires is not None:
+                current_time = datetime.now(timezone.utc)
+                expiry_time = datetime.fromisoformat(valid_token.expires)
+                if (
+                    expiry_time.tzinfo is None
+                    or expiry_time.tzinfo.utcoffset(expiry_time) is None
+                ):
+                    expiry_time = expiry_time.replace(tzinfo=timezone.utc)
+                verbose_proxy_logger.debug(
+                    f"Checking if token expired, expiry time {expiry_time} and current time {current_time}"
+                )
+                if expiry_time < current_time:
+                    # Token exists but is expired.
+                    raise ProxyException(
+                        message=f"Authentication Error - Expired Key. Key Expiry time {expiry_time} and current time {current_time}",
+                        type=ProxyErrorTypes.expired_key,
+                        code=400,
+                        param=api_key,
+                    )
+            if valid_token.spend is not None and valid_token.max_budget is not None:
+                if valid_token.spend >= valid_token.max_budget:
+                    raise litellm.BudgetExceededError(
+                        current_cost=valid_token.spend,
+                        max_budget=valid_token.max_budget,
+                    )
+                max_budget_per_model = valid_token.model_max_budget
+                current_model = request_data.get("model", None)
+                if (
+                    max_budget_per_model is not None
+                    and isinstance(max_budget_per_model, dict)
+                    and len(max_budget_per_model) > 0
+                    and prisma_client is not None
+                    and current_model is not None
+                    and valid_token.token is not None
+                ):
+                    ## GET THE SPEND FOR THIS MODEL
+                    twenty_eight_days_ago = datetime.now() - timedelta(days=28)
+                    model_spend = await prisma_client.db.litellm_spendlogs.group_by(
+                        by=["model"],
+                        sum={"spend": True},
+                        where={
+                            "AND": [
+                                {"api_key": valid_token.token},
+                                {"startTime": {"gt": twenty_eight_days_ago}},
+                                {"model": current_model},
+                            ]
+                        },  # type: ignore
+                    )
+                    if (
+                        len(model_spend) > 0
+                        and max_budget_per_model.get(current_model, None) is not None
+                    ):
+                        if (
+                            "model" in model_spend[0]
+                            and model_spend[0].get("model") == current_model
+                            and "_sum" in model_spend[0]
+                            and "spend" in model_spend[0]["_sum"]
+                            and model_spend[0]["_sum"]["spend"]
+                            >= max_budget_per_model[current_model]
+                        ):
+                            current_model_spend = model_spend[0]["_sum"]["spend"]
+                            current_model_budget = max_budget_per_model[current_model]
+                            raise litellm.BudgetExceededError(
+                                current_cost=current_model_spend,
+                                max_budget=current_model_budget,
+                            )
+            # Add hashed token to cache
+            await user_api_key_cache.async_set_cache(
+                key=api_key,
+                value=valid_token,
+            )
+        else:
+            # No token was found when looking up in the DB
+            raise Exception("Invalid proxy server token passed")
+        
+    except Exception as e: 
+        if isinstance(e, litellm.BudgetExceededError):
+            raise ProxyException(
+                message=e.message,
+                type=ProxyErrorTypes.budget_exceeded,
+                param=None,
+                code=400,
+            )
+        if isinstance(e, HTTPException):
+            raise ProxyException(
+                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
+                type=ProxyErrorTypes.auth_error,
+                param=getattr(e, "param", "None"),
+                code=getattr(e, "status_code", status.HTTP_401_UNAUTHORIZED),
+            )
+        elif isinstance(e, ProxyException):
+            raise e
+        raise ProxyException(
+            message="Authentication Error, " + str(e),
+            type=ProxyErrorTypes.auth_error,
+            param=getattr(e, "param", "None"),
+            code=status.HTTP_401_UNAUTHORIZED,
+        )
\ No newline at end of file

From 9d7d507cd70ac9312d1887b9e7c0c60ab6d37134 Mon Sep 17 00:00:00 2001
From: sonalibud <sonali@bud.studio>
Date: Tue, 24 Sep 2024 08:18:51 +0000
Subject: [PATCH 2/6] fix: resolved dependency issues and cache store issue

---
 litellm/custom_auth.py | 71 +++++++++++++++++++++++++++---------------
 1 file changed, 46 insertions(+), 25 deletions(-)

diff --git a/litellm/custom_auth.py b/litellm/custom_auth.py
index 807d092da8bc..75eec4cac92f 100644
--- a/litellm/custom_auth.py
+++ b/litellm/custom_auth.py
@@ -1,17 +1,16 @@
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
+
+from fastapi import HTTPException, Request, status
 from pydantic import BaseModel
 
 import litellm
-from litellm.proxy._types import *
-from litellm.proxy.auth.auth_utils import (
-    get_request_route,
-    pre_db_read_auth_checks,
-)
 from litellm._logging import verbose_proxy_logger
+from litellm.proxy._types import *
+from litellm.proxy.auth.auth_utils import get_request_route, pre_db_read_auth_checks
 from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
 
 
-async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: 
+async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
     """
     Custom Auth dependency for User API Key Authentication
     We receive budserve ap key and check if it is valid
@@ -25,8 +24,14 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
     5. Check model budget
     """
     try:
-        from litellm.proxy.proxy_server import user_api_key_cache, master_key
-        
+        from litellm.proxy.proxy_server import (
+            master_key,
+            prisma_client,
+            user_api_key_cache,
+        )
+
+        api_key = f"sk-{api_key}"
+
         route: str = get_request_route(request=request)
         # get the request body
         request_data = await _read_request_body(request=request)
@@ -35,10 +40,16 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
             request=request,
             route=route,
         )
-        
+
         # look for info is user_api_key_auth cache
-        valid_token: Optional[UserAPIKeyAuth] = await user_api_key_cache.async_get_cache(
-            key=hash_token(api_key)
+        verbose_proxy_logger.debug(f"API key sent in request >>> {api_key}")
+        hashed_token = hash_token(api_key)
+        valid_token: Optional[UserAPIKeyAuth] = (
+            await user_api_key_cache.async_get_cache(key=hashed_token)
+        )
+
+        verbose_proxy_logger.debug(
+            f"Valid token from cache for key : {hashed_token} >>> {valid_token}"
         )
         # OR
         # valid_token: Optional[UserAPIKeyAuth] = user_api_key_cache.get_cache(  # type: ignore
@@ -46,17 +57,22 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
         # )
         if valid_token is None:
             # getting token details from authentication service
-            _valid_token = BaseModel(
-                api_key=api_key,
-                expires=(datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S"),
-                budget=100,
-                model_max_budget={},
-                model_spend={},
-                spend=50,
-            )
+            credential_dict = {
+                "key": api_key.removeprefix("sk-"),
+                "expiry": (datetime.now() + timedelta(days=1)).strftime(
+                    "%Y-%m-%d %H:%M:%S"
+                ),
+                "max_budget": 0.005,
+                "model_budgets": {"gpt-4": 0.003, "gpt-3.5-turbo": 0.002},
+            }
             valid_token = UserAPIKeyAuth(
-                **_valid_token.model_dump(exclude_none=True)
+                api_key=f"sk-{credential_dict['key']}",
+                expires=credential_dict["expiry"],
+                max_budget=credential_dict["max_budget"],
+                model_max_budget=credential_dict["model_budgets"],
             )
+            verbose_proxy_logger.debug(f"Valid token from DB >>> {valid_token}")
+        verbose_proxy_logger.debug(f"Valid token spend >> {valid_token.spend}")
         if valid_token is not None:
             if valid_token.expires is not None:
                 current_time = datetime.now(timezone.utc)
@@ -106,6 +122,7 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
                             ]
                         },  # type: ignore
                     )
+                    verbose_proxy_logger.debug(f"model spends >> {model_spend}")
                     if (
                         len(model_spend) > 0
                         and max_budget_per_model.get(current_model, None) is not None
@@ -125,15 +142,19 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
                                 max_budget=current_model_budget,
                             )
             # Add hashed token to cache
+            verbose_proxy_logger.debug(
+                f"Valid token storing in cache for key : {valid_token.token}"
+            )
             await user_api_key_cache.async_set_cache(
-                key=api_key,
+                key=valid_token.token,
                 value=valid_token,
             )
+            return valid_token
         else:
             # No token was found when looking up in the DB
             raise Exception("Invalid proxy server token passed")
-        
-    except Exception as e: 
+
+    except Exception as e:
         if isinstance(e, litellm.BudgetExceededError):
             raise ProxyException(
                 message=e.message,
@@ -155,4 +176,4 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
             type=ProxyErrorTypes.auth_error,
             param=getattr(e, "param", "None"),
             code=status.HTTP_401_UNAUTHORIZED,
-        )
\ No newline at end of file
+        )

From d5f60b805ebb77b0dda75381d5217aa7dc34336f Mon Sep 17 00:00:00 2001
From: sonalibud <sonali@bud.studio>
Date: Tue, 24 Sep 2024 08:44:31 +0000
Subject: [PATCH 3/6] fix: initial api key spend detail taken from db

---
 litellm/custom_auth.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/litellm/custom_auth.py b/litellm/custom_auth.py
index 75eec4cac92f..91613ff9439c 100644
--- a/litellm/custom_auth.py
+++ b/litellm/custom_auth.py
@@ -30,6 +30,9 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
             user_api_key_cache,
         )
 
+        if prisma_client is None:
+            raise Exception("Prisma client not initialized")
+
         api_key = f"sk-{api_key}"
 
         route: str = get_request_route(request=request)
@@ -62,7 +65,7 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
                 "expiry": (datetime.now() + timedelta(days=1)).strftime(
                     "%Y-%m-%d %H:%M:%S"
                 ),
-                "max_budget": 0.005,
+                "max_budget": 1,
                 "model_budgets": {"gpt-4": 0.003, "gpt-3.5-turbo": 0.002},
             }
             valid_token = UserAPIKeyAuth(
@@ -71,6 +74,22 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
                 max_budget=credential_dict["max_budget"],
                 model_max_budget=credential_dict["model_budgets"],
             )
+            api_key_spend = await prisma_client.db.litellm_spendlogs.group_by(
+                by=["api_key"],
+                sum={"spend": True},
+                where={
+                    "AND": [
+                        {"api_key": valid_token.token},
+                    ]
+                },  # type: ignore
+            )
+            if (
+                len(api_key_spend) > 0
+                and "_sum" in api_key_spend[0]
+                and "spend" in api_key_spend[0]["_sum"]
+                and api_key_spend[0]["_sum"]["spend"]
+            ):
+                valid_token.spend = api_key_spend[0]["_sum"]["spend"]
             verbose_proxy_logger.debug(f"Valid token from DB >>> {valid_token}")
         verbose_proxy_logger.debug(f"Valid token spend >> {valid_token.spend}")
         if valid_token is not None:

From 2e676d49ae7d56ce8264b67dc621a298310fbe23 Mon Sep 17 00:00:00 2001
From: sonalibud <sonali@bud.studio>
Date: Wed, 25 Sep 2024 14:56:17 +0000
Subject: [PATCH 4/6] feat: budserve middleware to fetch endpoint and project
 settings and build user_config dictionary and pass it along llm request

---
 litellm/proxy/budserve_middleware.py | 99 ++++++++++++++++++++++++++++
 litellm/proxy/proxy_server.py        |  2 +
 litellm/router.py                    | 20 +++++-
 3 files changed, 118 insertions(+), 3 deletions(-)
 create mode 100644 litellm/proxy/budserve_middleware.py

diff --git a/litellm/proxy/budserve_middleware.py b/litellm/proxy/budserve_middleware.py
new file mode 100644
index 000000000000..c1b229f194b4
--- /dev/null
+++ b/litellm/proxy/budserve_middleware.py
@@ -0,0 +1,99 @@
+import json
+
+from fastapi import Request
+from starlette.middleware.base import BaseHTTPMiddleware
+
+from litellm._logging import verbose_proxy_logger
+from litellm.proxy.auth.auth_utils import get_request_route
+from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
+
+
+class BudServeMiddleware(BaseHTTPMiddleware):
+    llm_request_list = [
+        "/chat/completions",
+        "/completions",
+        "/embeddings",
+        "/images/generation",
+        "/audio/speech",
+        "/audio/transcriptions",
+    ]
+
+    async def get_api_key(self, request):
+        authorization_header = request.headers.get("Authorization")
+        api_key = authorization_header.split(" ")[1]
+        return api_key
+
+    async def dispatch(
+        self,
+        request,
+        call_next,
+    ):
+        """
+        Steps to prepare user_config
+
+        1. api_key and model (endpoint_name) fetch all endpoint details : model_list
+        2. Using models involved in endpoint details, fetch proprietary credentials
+        3. Create user_config using model_configuration (endpoint model) and router_config (project model)
+        4. Add validations for fallbacks
+        """
+        route: str = get_request_route(request=request)
+        verbose_proxy_logger.info(f"Request: {route}")
+        run_through_middleware = any(
+            each_route in route for each_route in self.llm_request_list
+        )
+        verbose_proxy_logger.info(f"Run Through Middleware: {run_through_middleware}")
+        if not run_through_middleware:
+            return await call_next(request)
+
+        # get the request body
+        request_data = await _read_request_body(request=request)
+        api_key = await self.get_api_key(request)
+        endpoint_name = request_data.get("model")
+
+        # get endpoint details to fill cache_params
+        # redis connection params we will set as kubernetes env variables
+        # can be fetched using os.getenv
+        import os
+
+        request_data["user_config"] = {
+            "cache_responses": False,
+            "redis_host": os.getenv("REDIS_HOST", "localhost"),
+            "redis_port": os.getenv("REDIS_PORT", 6379),
+            "redis_password": os.getenv("REDIS_PASSWORD", ""),
+            "endpoint_cache_settings": {
+                "cache": False,
+                "type": "redis-semantic",  # gpt_cache_redis
+                "cache_params": {
+                    "host": os.getenv("REDIS_HOST", "localhost"),
+                    "port": os.getenv("REDIS_PORT", 6379),
+                    "password": os.getenv("REDIS_PASSWORD", ""),
+                    "similarity_threshold": 0.8,
+                    "redis_semantic_cache_use_async": False,
+                    "redis_semantic_cache_embedding_model": "sentence-transformers/all-mpnet-base-v2",
+                    "eviction_policy": {"policy": "ttl", "max_size": 100, "ttl": 600},
+                },
+            },
+            "model_list": [
+                {
+                    "model_name": "gpt4",
+                    "litellm_params": {
+                        "model": "openai/gpt-3.5-turbo",
+                        "api_key": os.getenv("OPENAI_API_KEY", "dummy"),
+                        "rpm": 100,
+                        "request_timeout": 120,
+                    },
+                    "model_info": {"id": "model_id:123"},
+                },
+                {
+                    "model_name": "gpt4",
+                    "litellm_params": {
+                        "model": "openai/gpt-4",
+                        "api_key": os.getenv("OPENAI_API_KEY", "dummy"),
+                        "tpm": 10000,
+                    },
+                    "model_info": {"id": "model_id:456"},
+                },
+            ],
+        }
+        request._body = json.dumps(request_data).encode("utf-8")
+        return await call_next(request)
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 03ba2e839ed3..4ee4f44b3bd6 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -135,6 +135,7 @@ def generate_feedback_box():
     get_team_models,
 )
 from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm.proxy.budserve_middleware import BudServeMiddleware
 
 ## Import All Misc routes here ##
 from litellm.proxy.caching_routes import router as caching_router
@@ -450,6 +451,7 @@ async def redirect_ui_middleware(request: Request, call_next):
     allow_methods=["*"],
     allow_headers=["*"],
 )
+app.add_middleware(BudServeMiddleware)
 
 
 from typing import Dict
diff --git a/litellm/router.py b/litellm/router.py
index b3a07ad4e56f..3ef14b134440 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -206,6 +206,7 @@ def __init__(
         router_general_settings: Optional[
             RouterGeneralSettings
         ] = RouterGeneralSettings(),
+        endpoint_cache_settings: Optional[dict] = None,
     ) -> None:
         """
         Initialize the Router class with the given parameters for caching, reliability, and routing strategy.
@@ -307,7 +308,13 @@ def __init__(
             and redis_port is not None
             and redis_password is not None
         ):
-            cache_type = "redis"
+            cache_type = (
+                "redis"
+                if endpoint_cache_settings is None
+                else endpoint_cache_settings.get("cache_params", {}).get(
+                    "type", "redis"
+                )
+            )
 
             if redis_url is not None:
                 cache_config["url"] = redis_url
@@ -326,9 +333,16 @@ def __init__(
             redis_cache = RedisCache(**cache_config)
 
         if cache_responses:
-            if litellm.cache is None:
+            if litellm.cache is None and endpoint_cache_settings is not None:
                 # the cache can be initialized on the proxy server. We should not overwrite it
-                litellm.cache = litellm.Cache(type=cache_type, **cache_config)  # type: ignore
+                # user_config : enabled cache
+                enable_cache = endpoint_cache_settings.get("cache", False)
+                if enable_cache:
+                    endpoint_cache_config = endpoint_cache_settings.get(
+                        "cache_params", {}
+                    )
+                    if endpoint_cache_config:
+                        litellm.cache = litellm.Cache(type=cache_type, **endpoint_cache_config)  # type: ignore
             self.cache_responses = cache_responses
         self.cache = DualCache(
             redis_cache=redis_cache, in_memory_cache=InMemoryCache()

From 792dc8b5a460f3f600cab0bb3e59d3277c4705c2 Mon Sep 17 00:00:00 2001
From: sonalibud <sonali@bud.studio>
Date: Thu, 26 Sep 2024 05:59:11 +0000
Subject: [PATCH 5/6] feat: added retrive credential details API in custom auth

---
 litellm/custom_auth.py | 64 +++++++++++++++++++++++++-----------------
 1 file changed, 39 insertions(+), 25 deletions(-)

diff --git a/litellm/custom_auth.py b/litellm/custom_auth.py
index 91613ff9439c..fd21bbe223f4 100644
--- a/litellm/custom_auth.py
+++ b/litellm/custom_auth.py
@@ -1,5 +1,8 @@
+import asyncio
+import os
 from datetime import datetime, timedelta, timezone
 
+import httpx
 from fastapi import HTTPException, Request, status
 from pydantic import BaseModel
 
@@ -10,6 +13,15 @@
 from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
 
 
+async def fetch_data(url: str):
+    async with httpx.AsyncClient() as client:
+        response = await client.get(url)
+        return response
+
+
+budserve_app_baseurl = os.getenv("BUDSERVE_APP_BASEURL", "http://localhost:9000")
+
+
 async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
     """
     Custom Auth dependency for User API Key Authentication
@@ -51,28 +63,30 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
             await user_api_key_cache.async_get_cache(key=hashed_token)
         )
 
-        verbose_proxy_logger.debug(
+        verbose_proxy_logger.info(
             f"Valid token from cache for key : {hashed_token} >>> {valid_token}"
         )
-        # OR
-        # valid_token: Optional[UserAPIKeyAuth] = user_api_key_cache.get_cache(  # type: ignore
-        #     key=api_key
-        # )
         if valid_token is None:
             # getting token details from authentication service
-            credential_dict = {
-                "key": api_key.removeprefix("sk-"),
-                "expiry": (datetime.now() + timedelta(days=1)).strftime(
-                    "%Y-%m-%d %H:%M:%S"
-                ),
-                "max_budget": 1,
-                "model_budgets": {"gpt-4": 0.003, "gpt-3.5-turbo": 0.002},
-            }
+            url = f"{budserve_app_baseurl}/credentials/details/{api_key.removeprefix('sk-')}"
+            credential_details_response = await fetch_data(url)
+            if credential_details_response.status_code != 200:
+                # No token was found when looking up in the DB
+                raise Exception("Invalid api key passed")
+            credential_dict = credential_details_response.json()["result"]
+            # credential_dict = {
+            #     "key": api_key.removeprefix("sk-"),
+            #     "expiry": (datetime.now() + timedelta(days=1)).strftime(
+            #         "%Y-%m-%d %H:%M:%S"
+            #     ),
+            #     "max_budget": 1,
+            #     "model_budgets": {"gpt-4": 0.003, "gpt-3.5-turbo": 0.002},
+            # }
             valid_token = UserAPIKeyAuth(
                 api_key=f"sk-{credential_dict['key']}",
                 expires=credential_dict["expiry"],
                 max_budget=credential_dict["max_budget"],
-                model_max_budget=credential_dict["model_budgets"],
+                model_max_budget=credential_dict["model_budgets"] or {},
             )
             api_key_spend = await prisma_client.db.litellm_spendlogs.group_by(
                 by=["api_key"],
@@ -90,8 +104,16 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
                 and api_key_spend[0]["_sum"]["spend"]
             ):
                 valid_token.spend = api_key_spend[0]["_sum"]["spend"]
-            verbose_proxy_logger.debug(f"Valid token from DB >>> {valid_token}")
-        verbose_proxy_logger.debug(f"Valid token spend >> {valid_token.spend}")
+            # Add hashed token to cache
+            verbose_proxy_logger.info(
+                f"Valid token storing in cache for key : {valid_token.token}"
+            )
+            await user_api_key_cache.async_set_cache(
+                key=valid_token.token,
+                value=valid_token,
+            )
+            verbose_proxy_logger.info(f"Valid token from DB >>> {valid_token}")
+        verbose_proxy_logger.info(f"Valid token spend >> {valid_token.spend}")
         if valid_token is not None:
             if valid_token.expires is not None:
                 current_time = datetime.now(timezone.utc)
@@ -160,18 +182,10 @@ async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
                                 current_cost=current_model_spend,
                                 max_budget=current_model_budget,
                             )
-            # Add hashed token to cache
-            verbose_proxy_logger.debug(
-                f"Valid token storing in cache for key : {valid_token.token}"
-            )
-            await user_api_key_cache.async_set_cache(
-                key=valid_token.token,
-                value=valid_token,
-            )
             return valid_token
         else:
             # No token was found when looking up in the DB
-            raise Exception("Invalid proxy server token passed")
+            raise Exception("Invalid api key passed")
 
     except Exception as e:
         if isinstance(e, litellm.BudgetExceededError):

From fa1a0f416d442f0b7f208ff338ba4d3754a50dd6 Mon Sep 17 00:00:00 2001
From: sonalibud <sonali@bud.studio>
Date: Thu, 26 Sep 2024 06:07:54 +0000
Subject: [PATCH 6/6] chore: added example env and config for global litellm
 proxy server

---
 .env.example.global | 8 ++++++++
 litellm_config.yaml | 8 ++++++++
 2 files changed, 16 insertions(+)
 create mode 100644 .env.example.global
 create mode 100644 litellm_config.yaml

diff --git a/.env.example.global b/.env.example.global
new file mode 100644
index 000000000000..6ccaa58aa3ed
--- /dev/null
+++ b/.env.example.global
@@ -0,0 +1,8 @@
+export LITELLM_MASTER_KEY=<master-key starting with sk->
+export DATABASE_URL=<db url>
+export STORE_MODEL_IN_DB='True'
+export LITELLM_SALT_KEY=<salt-key>
+export REDIS_HOST=localhost
+export REDIS_PORT=6379
+export REDIS_PASSWORD=<password>
+export BUDSERVE_APP_BASEURL="http://localhost:8000"
\ No newline at end of file
diff --git a/litellm_config.yaml b/litellm_config.yaml
new file mode 100644
index 000000000000..01422da9f8f7
--- /dev/null
+++ b/litellm_config.yaml
@@ -0,0 +1,8 @@
+general_settings:
+  store_model_in_db: True
+  custom_auth: litellm.custom_auth.user_api_key_auth
+router_settings:
+  cache_responses: False
+  redis_host: "os.environ/REDIS_HOST"
+  redis_port: "os.environ/REDIS_PORT"
+  redis_password: "os.environ/REDIS_PASSWORD"
\ No newline at end of file