Skip to content

Commit

Permalink
add melo synthesizer
Browse files Browse the repository at this point in the history
  • Loading branch information
marmikcfc committed Jun 10, 2024
1 parent e5da076 commit 84ea46e
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 5 deletions.
10 changes: 9 additions & 1 deletion bolna/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,12 @@
"that's enough", "shush", "listen", "excuse me", "hold up", "not now", "stop there", "stop speaking"
]

PRE_FUNCTIONAL_CALL_MESSAGE = "Just give me a moment, I'll do that and will be back with you"
PRE_FUNCTIONAL_CALL_MESSAGE = "Just give me a moment, I'll do that and will be back with you"

FILLER_PHRASES = [
"No worries.", "It's fine.", "I'm here.", "No rush.", "Take your time.",
"Great!", "Awesome!", "Fantastic!", "Wonderful!", "Perfect!", "Excellent!",
"I get it.", "Noted.", "Alright.", "I understand.", "Understood.", "Got it.",
"Sure.", "Okay.", "Right.", "Absolutely.", "Sure thing.",
"I see.", "Gotcha.", "Makes sense."
]
14 changes: 12 additions & 2 deletions bolna/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ class FourieConfig(BaseModel):
class DeepgramConfig(BaseModel):
voice: str

class MeloConfig(BaseModel):
voice:str
sample_rate:int
sdp_ratio:float = 0.2
noise_scale:float = 0.6
noise_scale_w:float = 0.8
speed:float = 1.0

class Transcriber(BaseModel):
model: str
Expand All @@ -70,15 +77,15 @@ def validate_language(cls, value):

class Synthesizer(BaseModel):
provider: str
provider_config: Union[PollyConfig, XTTSConfig, ElevenLabsConfig, OpenAIConfig, FourieConfig, DeepgramConfig]
provider_config: Union[PollyConfig, XTTSConfig, ElevenLabsConfig, OpenAIConfig, FourieConfig, MeloConfig, DeepgramConfig]
stream: bool = False
buffer_size: Optional[int] = 40 # 40 characters in a buffer
audio_format: Optional[str] = "pcm"
caching: Optional[bool] = True

@validator("provider")
def validate_model(cls, value):
return validate_attribute(value, ["polly", "xtts", "elevenlabs", "openai", "deepgram"])
return validate_attribute(value, ["polly", "xtts", "elevenlabs", "openai", "deepgram", "meloTTS"])


class IOModel(BaseModel):
Expand Down Expand Up @@ -181,6 +188,9 @@ class Task(BaseModel):
task_type: Optional[str] = "conversation" # extraction, summarization, notification
task_config: ConversationConfig = dict()

@validator('voice')
def check_voice(cls, value):
return validate_attribute(value, ['Jason','Oscar','Travis','Joseph','Ram'])

class AgentModel(BaseModel):
agent_name: str
Expand Down
5 changes: 3 additions & 2 deletions bolna/providers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .synthesizer import PollySynthesizer, XTTSSynthesizer, ElevenlabsSynthesizer, OPENAISynthesizer, FourieSynthesizer, DeepgramSynthesizer
from .synthesizer import PollySynthesizer, XTTSSynthesizer, ElevenlabsSynthesizer, OPENAISynthesizer, FourieSynthesizer, DeepgramSynthesizer, MeloSynthesizer
from .transcriber import DeepgramTranscriber, WhisperTranscriber
from .input_handlers import DefaultInputHandler, TwilioInputHandler, ExotelInputHandler, PlivoInputHandler
from .output_handlers import DefaultOutputHandler, TwilioOutputHandler, ExotelOutputHandler, PlivoOutputHandler
Expand All @@ -10,7 +10,8 @@
'elevenlabs': ElevenlabsSynthesizer,
'openai': OPENAISynthesizer,
'fourie': FourieSynthesizer,
'deepgram': DeepgramSynthesizer
'deepgram': DeepgramSynthesizer,
'meloTTS': MeloSynthesizer
}
SUPPORTED_TRANSCRIBER_MODELS = {
'deepgram': DeepgramTranscriber,
Expand Down
1 change: 1 addition & 0 deletions bolna/synthesizer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
from .openai_synthesizer import OPENAISynthesizer
from .fourie_synthesizer import FourieSynthesizer
from .deepgram_synthesizer import DeepgramSynthesizer
from .melo_synthesizer import MeloSynthesizer
83 changes: 83 additions & 0 deletions bolna/synthesizer/melo_synthesizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@

import aiohttp
import os
from dotenv import load_dotenv
from bolna.helpers.logger_config import configure_logger
from bolna.helpers.utils import create_ws_data_packet
from .base_synthesizer import BaseSynthesizer
import json
import base64

load_dotenv()
logger = configure_logger(__name__)



class MeloSynthesizer(BaseSynthesizer):
def __init__(self, audio_format="pcm", sampling_rate="8000", stream=False, buffer_size=400,
**kwargs):
super().__init__(stream, buffer_size)
self.format = "linear16" if audio_format == "pcm" else audio_format
self.sample_rate = int(sampling_rate)
self.first_chunk_generated = False
self.url = os.getenv('MELO_TTS')

self.voice = kwargs.get('voice')
self.sample_rate = kwargs.get('sample_rate')
self.sdp_ratio = kwargs.get('sdp_ratio')
self.noise_scale=kwargs.get('noise_scale')
self.noise_scale_w = kwargs.get('noise_scale_w')
self.speed = kwargs.get('speed')
# self.voice_id

async def __generate_http(self, text):
payload = {
"voice_id": self.voice,
"text": text,
"sr": self.sample_rate,
"sdp_ratio" : self.sdp_ratio,
"noise_scale" : self.noise_scale,
"noise_scale_w" : self.noise_scale_w,
"speed" : self.speed
}

headers = {
'Content-Type': 'application/json'
}

async with aiohttp.ClientSession() as session:
if payload is not None:
async with session.post(self.url, headers=headers, json=payload) as response:
if response.status == 200:
res_json:dict = json.loads(await response.text())
chunk = base64.b64decode(res_json["audio"])
yield chunk
else:
logger.info("Payload was null")

async def open_connection(self):
pass

async def generate(self):
while True:
message = await self.internal_queue.get()
logger.info(f"Generating TTS response for message: {message}")

meta_info, text = message.get("meta_info"), message.get("data")
async for message in self.__generate_http(text):
if not self.first_chunk_generated:
meta_info["is_first_chunk"] = True
self.first_chunk_generated = True
else:
meta_info["is_first_chunk"] = False
if "end_of_llm_stream" in meta_info and meta_info["end_of_llm_stream"]:
meta_info["end_of_synthesizer_stream"] = True
self.first_chunk_generated = False

meta_info['text'] = text
meta_info['format'] = self.format
yield create_ws_data_packet(message, meta_info)

async def push(self, message):
logger.info("Pushed message to internal queue")
self.internal_queue.put_nowait(message)

0 comments on commit 84ea46e

Please sign in to comment.