diff --git a/bolna/agent_types/llama_index_rag_agent.py b/bolna/agent_types/llama_index_rag_agent.py index c8ffc1fb..1ae40578 100644 --- a/bolna/agent_types/llama_index_rag_agent.py +++ b/bolna/agent_types/llama_index_rag_agent.py @@ -1,8 +1,10 @@ import os import time import asyncio +import logging from typing import List, Tuple, Generator, AsyncGenerator -import dotenv + +from dotenv import load_dotenv, find_dotenv from llama_index.core import VectorStoreIndex, StorageContext from llama_index.core.llms import ChatMessage @@ -19,7 +21,8 @@ from bolna.rag.mongodb_rag import MongoDBConfig, MongoDBConnector, RAGEngine as MongoDBRAGEngine -dotenv.load_dotenv() + +load_dotenv(find_dotenv(), override=True) logger = configure_logger(__name__) class LlamaIndexRag(BaseAgent): @@ -62,6 +65,8 @@ def __init__(self, vector_id: str, temperature: float, model: str, buffer: int = self.max_tokens = max_tokens self.provider_config = provider_config self.OPENAI_KEY = os.getenv('OPENAI_API_KEY') + # self.LANCE_DB = "/Users/vipul/Nova/Work/Bolna/LlamaIndex-MutiRAG/DataBase/dev" + self.LANCE_DB = os.getenv(LANCEDB_DIR) self.provider = None self.query_engine = None @@ -81,29 +86,28 @@ def _setup_llm(self): def _setup_provider(self): """Based on the relevant provider config, set up the provider.""" - if self.provider_config: - provider_name = self.provider_config.get('provider') - if provider_name == 'mongodb': - config = MongoDBConfig(**self.provider_config['provider_config']) - connector = MongoDBConnector(config) - connector.connect() - connector.verify_data() - self.provider = MongoDBRAGEngine(connector) - self.provider.setup() - logger.info(f"{provider_name.capitalize()} RAG engine initialized") - # Add more providers here as elif statements - else: - logger.warning(f"Unsupported provider: {provider_name}. Falling back to LanceDB.") - self._setup_lancedb() + provider_name = self.provider_config.get('provider') + logging.info(f"Provider Name : {provider_name}") + + if provider_name == 'mongodb': + logging.info(f"Setting up {provider_name} RAG") + config = MongoDBConfig(**self.provider_config['provider_config']) + connector = MongoDBConnector(config) + connector.connect() + connector.verify_data() + self.provider = MongoDBRAGEngine(connector) + self.provider.setup() + logger.info(f"{provider_name.capitalize()} RAG engine initialized") + # Add more providers here as elif statements else: - self._setup_lancedb() - - def _setup_lancedb(self): - self.vector_store = LanceDBVectorStore(uri=lance_db, table_name=self.vector_id) - storage_context = StorageContext.from_defaults(vector_store=self.vector_store) - self.vector_index = VectorStoreIndex([], storage_context=storage_context) - self.query_engine = self.vector_index.as_query_engine() - logger.info("LanceDB vector store initialized") + logging.info(f"LanceDB RAG") + logging.info(f"URI : LanceDB {self.LANCE_DB}") + self.vector_store = LanceDBVectorStore(uri=self.LANCE_DB , table_name=self.provider_config['provider_config'].get('vector_id')) + logging.info(f"Table params : {self.provider_config['provider_config'].get('vector_id')}") + storage_context = StorageContext.from_defaults(vector_store=self.vector_store) + self.vector_index = VectorStoreIndex([], storage_context=storage_context) + self.query_engine = self.vector_index.as_query_engine() + logger.info("LanceDB vector store initialized") # def _setup_vector_store(self): # """Set up the vector store and index.""" diff --git a/bolna/models.py b/bolna/models.py index a81caebc..513c743b 100644 --- a/bolna/models.py +++ b/bolna/models.py @@ -69,7 +69,10 @@ class StylettsConfig(BaseModel): diffusion_steps: int = 5 embedding_scale: float = 1 - +class AzureConfig(BaseModel): + voice: str + model: str + language: str class Transcriber(BaseModel): model: Optional[str] = "nova-2" @@ -136,24 +139,25 @@ class OpenaiAssistants(BaseModel): buffer_size: Optional[int] = 100 class MongoDBProviderConfig(BaseModel): - connection_string: str - db_name: str - collection_name: str - index_name: str - llm_model: str - embedding_model: str - embedding_dimensions: int + connection_string: Optional[str] = None + db_name: Optional[str] = None + collection_name: Optional[str] = None + index_name: Optional[str] = None + llm_model: Optional[str] = None + embedding_model: Optional[str] = None + embedding_dimensions: Optional[str] = None + +class LanceDBProviderConfig(BaseModel): + vector_id: str class VectorStore(BaseModel): provider: str - provider_config: MongoDBProviderConfig - vector_id: str + provider_config: Union[LanceDBProviderConfig, MongoDBProviderConfig] class ExtraConfig(BaseModel): vector_store : VectorStore class LLM(BaseModel): - vector_id: Optional[str] = "none" model: Optional[str] = "gpt-3.5-turbo" max_tokens: Optional[int] = 100 agent_flow_type: str = "streaming" #It can be llamaindex_rag, simple_llm_agent, router_agent, dag_agent, openai_assistant, custom diff --git a/bolna/rag/main.py b/bolna/rag/main.py index 33d82591..ecf230d2 100644 --- a/bolna/rag/main.py +++ b/bolna/rag/main.py @@ -150,6 +150,6 @@ async def status(): "rag_system_ready": db_connector is not None and search_engine is not None } -if __name__ == "__main__": - import uvicorn - uvicorn.run(app, host="0.0.0.0", port=8000) +# if __name__ == "__main__": +# import uvicorn +# uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/bolna/rag/mongodb_rag.py b/bolna/rag/mongodb_rag.py index 46eef6bd..c41f1578 100644 --- a/bolna/rag/mongodb_rag.py +++ b/bolna/rag/mongodb_rag.py @@ -93,33 +93,33 @@ def query(self, query_text: str, similarity_top_k: int = 5): return response -def main(): - OPENAI_API_KEY = "***" - os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY - - config = MongoDBConfig( - connection_string="mongodb+srv://vipul:qqgr4bwAYl5pZSU9@testing-rag.nqaknom.mongodb.net/", - db_name="movies", - collection_name="movies_records", - index_name="vector_index", - ) - - try: - db_connector = MongoDBConnector(config) - db_connector.connect() - db_connector.verify_data() - - rag_engine = RAGEngine(db_connector) - rag_engine.setup() - - query = "Any romantic movie for me? you can give anything you want?" - rag_engine.query(query) - - except Exception as e: - logger.error(f"An error occurred: {e}") - finally: - if 'db_connector' in locals(): - db_connector.disconnect() - -if __name__ == "__main__": - main() +# def main(): +# OPENAI_API_KEY = "***" +# os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY + +# config = MongoDBConfig( +# connection_string="**", +# db_name="**", +# collection_name="**", +# index_name="**", +# ) + +# try: +# db_connector = MongoDBConnector(config) +# db_connector.connect() +# db_connector.verify_data() + +# rag_engine = RAGEngine(db_connector) +# rag_engine.setup() + +# query = "Any romantic movie for me? you can give anything you want?" +# rag_engine.query(query) + +# except Exception as e: +# logger.error(f"An error occurred: {e}") +# finally: +# if 'db_connector' in locals(): +# db_connector.disconnect() + +# if __name__ == "__main__": +# main() diff --git a/local_setup/ngrok-config.yml b/local_setup/ngrok-config.yml index 5b6594ee..c868f12b 100644 --- a/local_setup/ngrok-config.yml +++ b/local_setup/ngrok-config.yml @@ -3,11 +3,11 @@ version: '2' authtoken: tunnels: twilio-app: - addr: 8001 + addr: twilio-app:8001 proto: http plivo-app: - addr: 8002 + addr: plivo-app:8002 proto: http bolna-app: - addr: 5001 + addr: bolna-app:5001 proto: http diff --git a/requirements.txt b/requirements.txt index 31c4039b..28d5cc71 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,33 +1,104 @@ +# Web Framework and API +fastapi==0.108.0 +uvicorn==0.22.0 +starlette==0.32.0.post1 + +# Data Validation and Settings Management +pydantic==2.8.2 +python-dotenv==1.0.0 + +# Async Libraries +aiohttp==3.9.1 aiobotocore==2.9.0 aiofiles==23.2.1 -aiohttp==3.9.1 -azure-cognitiveservices-speech==1.38.0 -daily-python==0.9.1 -fastapi==0.108.0 -fastembed==0.2.7 -litellm==1.40.20 +aiocsv==1.3.1 +aiodynamo==23.10.1 +aiohttp-retry==2.8.3 +aioitertools==0.11.0 +aiormq==6.8.0 +aiosignal==1.3.1 + +# Database and Storage +redis==5.0.1 +pymongo==4.8.0 +lancedb==0.10.2 +SQLAlchemy==2.0.31 + +# Machine Learning and NLP numpy==1.26.1 +scipy==1.11.4 +scikit-learn==1.5.1 +torch==2.1.2 +torchaudio==2.1.2 +transformers==4.39.3 +sentence-transformers==3.0.1 +fastembed==0.2.7 +onnxruntime==1.18.1 +nltk==3.8.1 + +# LLM and Related Libraries openai>=1.10.0 -pydantic==2.8.2 +litellm==1.40.20 +llama-index==0.10.57 +llama-index-vector-stores-lancedb==0.1.7 +llama-index-vector-stores-mongodb==0.1.8 +cohere==5.3.2 + +# Text Processing +tiktoken>=0.6.0 +tokenizers==0.15.2 + +# Audio Processing +azure-cognitiveservices-speech==1.38.0 pydub==0.25.1 +soundfile==0.12.1 + +# Date and Time Handling python-dateutil==2.8.2 -python-dotenv==1.0.0 -redis==5.0.1 +pytz==2024.1 + +# HTTP and Networking requests==2.31.0 -tiktoken>=0.6.0 -twilio==8.9.0 -uvicorn==0.22.0 +httpx==0.25.2 websockets==10.4 -onnxruntime>=1.16.3 -scipy==1.11.4 + +# File Handling and Parsing +python-multipart==0.0.6 +Pillow==10.4.0 +pypdf==4.3.1 + +# API Clients +twilio==8.9.0 +daily-python==0.9.1 +slack_sdk==3.31.0 +stripe==8.0.0 + +# Task Queue and Job Processing +celery==5.3.6 +taskiq==0.11.0 + +# Logging and Monitoring +loguru==0.7.2 +sentry-sdk==2.9.0 + +# Development and Testing uvloop==0.19.0 -tokenizers -huggingface-hub -semantic-router -sentence_transformers -optimum[onnxruntime] -llama-index -llama-index-vector-stores-lancedb -lancedb -torchaudio -python-multipart +pytest==7.4.0 + +# Data Processing and Analysis +pandas==2.2.2 +pyarrow==15.0.0 + +# Utilities +python-dateutil==2.8.2 +tenacity==8.5.0 +ratelimiter==1.2.0.post0 + +# Optional Dependencies (uncomment if needed) +huggingface-hub==0.23.2 +semantic-router==0.0.46 +supabase==2.3.3 + +# Version Control (keep at the end) +pip-compile==6.0.0 +pip-tools==6.13.0 \ No newline at end of file