-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbrain.py
211 lines (169 loc) · 8.79 KB
/
brain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
import requests
import numpy as np
import os
import openai
import speech_recognition as sr
from urllib.error import URLError
from dotenv import load_dotenv
import time
import tiktoken
# setup
load_dotenv() # load environment variables from .env file
sleep_time = 0.1 # in seconds
sampling_frequency = 16000 # 16 kHz
number_of_samples_per_chunk = 1365
time_between_audio_chunks = number_of_samples_per_chunk / sampling_frequency # in seconds
corrected_time_between_audio_chunks = time_between_audio_chunks*0.8 # considering other delays
max_response_tokens = 250
token_limit = 4096
openai.api_key = os.getenv("AZURE_OPENAI_KEY")
openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT")
openai.api_type = 'azure'
openai.api_version = '2023-05-15'
BODY_URL = "http://localhost:5004" # assuming it runs locally
class NaoStream:
def __init__(self, audio_generator):
self.audio_generator = audio_generator
def read(self, size=-1): # added size parameter, default -1
try:
return next(self.audio_generator)
except StopIteration:
return b''
class NaoAudioSource(sr.AudioSource):
def __init__(self, server_url=BODY_URL):
self.server_url = server_url
self.stream = None
self.is_listening = False
self.CHUNK = 1365 # number of samples per audio chunk
self.SAMPLE_RATE = 16000 # 16 kHz
self.SAMPLE_WIDTH = 2 # each audio sample is 2 bytes
def __enter__(self): # this is called when using the "with" statement
requests.post(f"{self.server_url}/start_listening")
self.is_listening = True
self.stream = NaoStream(self.audio_generator()) # wrap the generator
return self # return object (self) to be used in the with statement
def audio_generator(self): # generator function that continuously fetches audio chunks from the server as long as 'self.is_listening' is True
while self.is_listening:
response = requests.get(f"{self.server_url}/get_audio_chunk")
yield response.content # yield is used to return a value from a generator function, but unlike return, it doesn't terminate the function -> instead, it suspends the function and saves its state for later resumption
current_buffer_length = requests.get(f"{self.server_url}/get_server_buffer_length").json()["length"]
correcting_factor = 1.0 / (1.0 + np.exp(current_buffer_length - np.pi)) # if buffer becomes long, the time between audio chunks is decreased
corrected_time_between_audio_chunks = time_between_audio_chunks * correcting_factor
time.sleep(corrected_time_between_audio_chunks) # wait for the next audio chunk to be available
def __exit__(self, exc_type, exc_val, exc_tb):
self.is_listening = False
requests.post(f"{self.server_url}/stop_listening")
def get_user_text():
recognizer = sr.Recognizer()
recognizer.pause_threshold = 1 # seconds of non-speaking audio before a phrase is considered complete
recognizer.operation_timeout = 4 # increasing the timeout duration
audio_data = None
filename = "input.wav"
while True:
# record audio only if it hasn't been recorded yet
if audio_data is None:
with NaoAudioSource() as source:
print("Recording...")
start_time = time.time()
audio_data = recognizer.listen(source, phrase_time_limit=10, timeout=None)
with open(filename, "wb") as f:
f.write(audio_data.get_wav_data())
print(f"Recording took {time.time() - start_time} seconds")
# transcribe audio to text
try:
print("Transcribing...")
start_time = time.time()
text = recognizer.recognize_google(audio_data)
print(f"Transcribing took {time.time() - start_time} seconds")
print("You said: " + text)
return text
except (sr.RequestError, URLError, ConnectionResetError) as e:
print(f"Network error: {e}, retrying after a short delay...")
time.sleep(sleep_time) # adding a delay before retrying
except sr.UnknownValueError:
print("Google Speech Recognition could not understand audio, retrying...")
audio_data = None # reset audio_data to record again
except TimeoutError as e:
print(f"Operation timed out: {e}, retrying after a short delay...")
audio_data = None # reset audio_data to record again
def get_gpt_text(conversation_context):
# trim the conversation context to fit the token limit
conversation_context = trim_context(conversation_context)
# process the received input with GPT
start = time.time()
response = openai.ChatCompletion.create(
engine="NAO35",
messages=conversation_context
)
end = time.time()
print(f"{response.engine} took {end - start} seconds to respond")
# xtract the GPT response
gpt_message = response['choices'][0]['message']['content']
print(f"Nao: {gpt_message}")
return gpt_message
def send_gpt_text_to_body(gpt_message):
requests.post(f"{BODY_URL}/talk", json={"message": gpt_message}) # send the GPT response to the body
def save_conversation(context, filename):
with open("conversation_context.txt", "w") as f:
for entry in conversation_context:
role = entry['role'].capitalize() # capitalize the role for formatting
content = entry['content']
f.write(f"{role}:\n{content}\n\n")
def trim_context(context):
"""see https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/chatgpt?tabs=python-new&pivots=programming-language-chat-completions for more details."""
def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613"):
"""Return the number of tokens used by a list of messages."""
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
print("Warning: model not found. Using cl100k_base encoding.")
encoding = tiktoken.get_encoding("cl100k_base")
if model in {
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k-0613",
"gpt-4-0314",
"gpt-4-32k-0314",
"gpt-4-0613",
"gpt-4-32k-0613",
}:
tokens_per_message = 3
tokens_per_name = 1
elif model == "gpt-3.5-turbo-0301":
tokens_per_message = 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n
tokens_per_name = -1 # if there's a name, the role is omitted
elif "gpt-3.5-turbo" in model:
print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613")
elif "gpt-4" in model:
print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
return num_tokens_from_messages(messages, model="gpt-4-0613")
else:
raise NotImplementedError(
f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
)
num_tokens = 0
for message in messages:
num_tokens += tokens_per_message
for key, value in message.items():
num_tokens += len(encoding.encode(value))
if key == "name":
num_tokens += tokens_per_name
num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
return num_tokens
conv_history_tokens = num_tokens_from_messages(context)
while conv_history_tokens + max_response_tokens >= token_limit:
del context[1]
conv_history_tokens = num_tokens_from_messages(context)
return context
# conversation loop ====================================================================================================
with open("system_prompt.txt", "r") as f:
system_prompt = f.read() # read system prompt from file
conversation_context = [{"role": "system", "content": system_prompt}] # initialize conversation context with system prompt
running = True
while running:
user_message = get_user_text() # get the user's message
conversation_context.append({"role": "user", "content": user_message}) # add the user's message to the conversation context
gpt_message = get_gpt_text(conversation_context)
send_gpt_text_to_body(gpt_message)
conversation_context.append({"role": "assistant", "content": gpt_message}) # add the GPT-4 response to the conversation context
save_conversation(context=conversation_context, filename="conversation_context.txt") # write conversation context to file for easier debugging etc.