Skip to content

Commit

Permalink
requirements, server, lancedb and mongodb addition, some refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
vipul-maheshwari committed Jul 24, 2024
1 parent f696c38 commit db7f468
Show file tree
Hide file tree
Showing 6 changed files with 174 additions and 95 deletions.
52 changes: 28 additions & 24 deletions bolna/agent_types/llama_index_rag_agent.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import os
import time
import asyncio
import logging
from typing import List, Tuple, Generator, AsyncGenerator
import dotenv

from dotenv import load_dotenv, find_dotenv

from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.core.llms import ChatMessage
Expand All @@ -19,7 +21,8 @@

from bolna.rag.mongodb_rag import MongoDBConfig, MongoDBConnector, RAGEngine as MongoDBRAGEngine

dotenv.load_dotenv()

load_dotenv(find_dotenv(), override=True)
logger = configure_logger(__name__)

class LlamaIndexRag(BaseAgent):
Expand Down Expand Up @@ -62,6 +65,8 @@ def __init__(self, vector_id: str, temperature: float, model: str, buffer: int =
self.max_tokens = max_tokens
self.provider_config = provider_config
self.OPENAI_KEY = os.getenv('OPENAI_API_KEY')
# self.LANCE_DB = "/Users/vipul/Nova/Work/Bolna/LlamaIndex-MutiRAG/DataBase/dev"
self.LANCE_DB = os.getenv(LANCEDB_DIR)
self.provider = None
self.query_engine = None

Expand All @@ -81,29 +86,28 @@ def _setup_llm(self):
def _setup_provider(self):
"""Based on the relevant provider config, set up the provider."""

if self.provider_config:
provider_name = self.provider_config.get('provider')
if provider_name == 'mongodb':
config = MongoDBConfig(**self.provider_config['provider_config'])
connector = MongoDBConnector(config)
connector.connect()
connector.verify_data()
self.provider = MongoDBRAGEngine(connector)
self.provider.setup()
logger.info(f"{provider_name.capitalize()} RAG engine initialized")
# Add more providers here as elif statements
else:
logger.warning(f"Unsupported provider: {provider_name}. Falling back to LanceDB.")
self._setup_lancedb()
provider_name = self.provider_config.get('provider')
logging.info(f"Provider Name : {provider_name}")

if provider_name == 'mongodb':
logging.info(f"Setting up {provider_name} RAG")
config = MongoDBConfig(**self.provider_config['provider_config'])
connector = MongoDBConnector(config)
connector.connect()
connector.verify_data()
self.provider = MongoDBRAGEngine(connector)
self.provider.setup()
logger.info(f"{provider_name.capitalize()} RAG engine initialized")
# Add more providers here as elif statements
else:
self._setup_lancedb()

def _setup_lancedb(self):
self.vector_store = LanceDBVectorStore(uri=lance_db, table_name=self.vector_id)
storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
self.vector_index = VectorStoreIndex([], storage_context=storage_context)
self.query_engine = self.vector_index.as_query_engine()
logger.info("LanceDB vector store initialized")
logging.info(f"LanceDB RAG")
logging.info(f"URI : LanceDB {self.LANCE_DB}")
self.vector_store = LanceDBVectorStore(uri=self.LANCE_DB , table_name=self.provider_config['provider_config'].get('vector_id'))
logging.info(f"Table params : {self.provider_config['provider_config'].get('vector_id')}")
storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
self.vector_index = VectorStoreIndex([], storage_context=storage_context)
self.query_engine = self.vector_index.as_query_engine()
logger.info("LanceDB vector store initialized")

# def _setup_vector_store(self):
# """Set up the vector store and index."""
Expand Down
26 changes: 15 additions & 11 deletions bolna/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@ class StylettsConfig(BaseModel):
diffusion_steps: int = 5
embedding_scale: float = 1


class AzureConfig(BaseModel):
voice: str
model: str
language: str

class Transcriber(BaseModel):
model: Optional[str] = "nova-2"
Expand Down Expand Up @@ -136,24 +139,25 @@ class OpenaiAssistants(BaseModel):
buffer_size: Optional[int] = 100

class MongoDBProviderConfig(BaseModel):
connection_string: str
db_name: str
collection_name: str
index_name: str
llm_model: str
embedding_model: str
embedding_dimensions: int
connection_string: Optional[str] = None
db_name: Optional[str] = None
collection_name: Optional[str] = None
index_name: Optional[str] = None
llm_model: Optional[str] = None
embedding_model: Optional[str] = None
embedding_dimensions: Optional[str] = None

class LanceDBProviderConfig(BaseModel):
vector_id: str

class VectorStore(BaseModel):
provider: str
provider_config: MongoDBProviderConfig
vector_id: str
provider_config: Union[LanceDBProviderConfig, MongoDBProviderConfig]

class ExtraConfig(BaseModel):
vector_store : VectorStore

class LLM(BaseModel):
vector_id: Optional[str] = "none"
model: Optional[str] = "gpt-3.5-turbo"
max_tokens: Optional[int] = 100
agent_flow_type: str = "streaming" #It can be llamaindex_rag, simple_llm_agent, router_agent, dag_agent, openai_assistant, custom
Expand Down
6 changes: 3 additions & 3 deletions bolna/rag/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,6 @@ async def status():
"rag_system_ready": db_connector is not None and search_engine is not None
}

if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
# if __name__ == "__main__":
# import uvicorn
# uvicorn.run(app, host="0.0.0.0", port=8000)
60 changes: 30 additions & 30 deletions bolna/rag/mongodb_rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,33 +93,33 @@ def query(self, query_text: str, similarity_top_k: int = 5):

return response

def main():
OPENAI_API_KEY = "***"
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

config = MongoDBConfig(
connection_string="mongodb+srv://vipul:[email protected]/",
db_name="movies",
collection_name="movies_records",
index_name="vector_index",
)

try:
db_connector = MongoDBConnector(config)
db_connector.connect()
db_connector.verify_data()

rag_engine = RAGEngine(db_connector)
rag_engine.setup()

query = "Any romantic movie for me? you can give anything you want?"
rag_engine.query(query)

except Exception as e:
logger.error(f"An error occurred: {e}")
finally:
if 'db_connector' in locals():
db_connector.disconnect()

if __name__ == "__main__":
main()
# def main():
# OPENAI_API_KEY = "***"
# os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

# config = MongoDBConfig(
# connection_string="**",
# db_name="**",
# collection_name="**",
# index_name="**",
# )

# try:
# db_connector = MongoDBConnector(config)
# db_connector.connect()
# db_connector.verify_data()

# rag_engine = RAGEngine(db_connector)
# rag_engine.setup()

# query = "Any romantic movie for me? you can give anything you want?"
# rag_engine.query(query)

# except Exception as e:
# logger.error(f"An error occurred: {e}")
# finally:
# if 'db_connector' in locals():
# db_connector.disconnect()

# if __name__ == "__main__":
# main()
6 changes: 3 additions & 3 deletions local_setup/ngrok-config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ version: '2'
authtoken: <your-auth-token>
tunnels:
twilio-app:
addr: 8001
addr: twilio-app:8001
proto: http
plivo-app:
addr: 8002
addr: plivo-app:8002
proto: http
bolna-app:
addr: 5001
addr: bolna-app:5001
proto: http
119 changes: 95 additions & 24 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,33 +1,104 @@
# Web Framework and API
fastapi==0.108.0
uvicorn==0.22.0
starlette==0.32.0.post1

# Data Validation and Settings Management
pydantic==2.8.2
python-dotenv==1.0.0

# Async Libraries
aiohttp==3.9.1
aiobotocore==2.9.0
aiofiles==23.2.1
aiohttp==3.9.1
azure-cognitiveservices-speech==1.38.0
daily-python==0.9.1
fastapi==0.108.0
fastembed==0.2.7
litellm==1.40.20
aiocsv==1.3.1
aiodynamo==23.10.1
aiohttp-retry==2.8.3
aioitertools==0.11.0
aiormq==6.8.0
aiosignal==1.3.1

# Database and Storage
redis==5.0.1
pymongo==4.8.0
lancedb==0.10.2
SQLAlchemy==2.0.31

# Machine Learning and NLP
numpy==1.26.1
scipy==1.11.4
scikit-learn==1.5.1
torch==2.1.2
torchaudio==2.1.2
transformers==4.39.3
sentence-transformers==3.0.1
fastembed==0.2.7
onnxruntime==1.18.1
nltk==3.8.1

# LLM and Related Libraries
openai>=1.10.0
pydantic==2.8.2
litellm==1.40.20
llama-index==0.10.57
llama-index-vector-stores-lancedb==0.1.7
llama-index-vector-stores-mongodb==0.1.8
cohere==5.3.2

# Text Processing
tiktoken>=0.6.0
tokenizers==0.15.2

# Audio Processing
azure-cognitiveservices-speech==1.38.0
pydub==0.25.1
soundfile==0.12.1

# Date and Time Handling
python-dateutil==2.8.2
python-dotenv==1.0.0
redis==5.0.1
pytz==2024.1

# HTTP and Networking
requests==2.31.0
tiktoken>=0.6.0
twilio==8.9.0
uvicorn==0.22.0
httpx==0.25.2
websockets==10.4
onnxruntime>=1.16.3
scipy==1.11.4

# File Handling and Parsing
python-multipart==0.0.6
Pillow==10.4.0
pypdf==4.3.1

# API Clients
twilio==8.9.0
daily-python==0.9.1
slack_sdk==3.31.0
stripe==8.0.0

# Task Queue and Job Processing
celery==5.3.6
taskiq==0.11.0

# Logging and Monitoring
loguru==0.7.2
sentry-sdk==2.9.0

# Development and Testing
uvloop==0.19.0
tokenizers
huggingface-hub
semantic-router
sentence_transformers
optimum[onnxruntime]
llama-index
llama-index-vector-stores-lancedb
lancedb
torchaudio
python-multipart
pytest==7.4.0

# Data Processing and Analysis
pandas==2.2.2
pyarrow==15.0.0

# Utilities
python-dateutil==2.8.2
tenacity==8.5.0
ratelimiter==1.2.0.post0

# Optional Dependencies (uncomment if needed)
huggingface-hub==0.23.2
semantic-router==0.0.46
supabase==2.3.3

# Version Control (keep at the end)
pip-compile==6.0.0
pip-tools==6.13.0

0 comments on commit db7f468

Please sign in to comment.