-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgpt.py
185 lines (158 loc) · 5.73 KB
/
gpt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import helpers
import prompts
import settings
import logging
import requests
import shutil
import threading
import time
from datetime import datetime
from openai import OpenAI
from PIL import Image
from pathlib import Path
client = ""
assistant = ""
assistant_thread = ""
image_thread = None
def setup():
global client, assistant, assistant_thread
client = OpenAI(api_key=settings.openai_api_key)
assistant = client.beta.assistants.retrieve(settings.openai_assistant_id)
logging.info(assistant)
assistant_thread = client.beta.threads.create()
# Save the assistant thread to a text file, so we can use it in our
# scheduled image cronjob
with open("assistant_thread.txt", "w") as assistant_thread_file:
assistant_thread_file.write(assistant_thread.id)
def whisper_text_to_speech(text_to_say, insert_audio_path=False):
"""
Text to speech using OpenAI's Whisper API.
insert_audio_path: A filepath of an audio file to play before playing the
speech_to_text. For example, we might want to request the speech to text,
then play the family bell audio, and then play the text to speech. This
reduces the delay between family bell audio and speech to text.
"""
speech_file_path = Path(__file__).parent / "speech.mp3"
response = client.audio.speech.create(
model="tts-1", voice="nova", input=text_to_say
)
response.stream_to_file(speech_file_path)
if insert_audio_path:
helpers.play_audio(insert_audio_path)
helpers.play_audio(speech_file_path)
def _generate_chatgpt_image(user_text, assistant_output_text):
"""
Generates a dall-e image based on given text (usually the output of the
GPT assistant)
"""
logging.info("Generating image")
image_prompt = (
f"{prompts.assistant_image_prompt}\n{user_text}\n{assistant_output_text}"
)
response = client.images.generate(
model="dall-e-3",
prompt=image_prompt,
size="1024x1024",
quality="standard",
n=1,
)
image_url = response.data[0].url
logging.info(image_url)
# Download the image
response = requests.get(image_url, stream=True)
if response.ok:
with open("dalle_image.png", "wb") as image_file:
response.raw.decode_content = True
shutil.copyfileobj(response.raw, image_file)
# Resize the image to display on the smaller, 800x480 display. This
# doesn't maintain the aspect ratio.
image = Image.open("dalle_image.png")
resized_image = image.resize((800, 480))
resized_image.save("resized.png")
helpers.display_image("resized.png")
def start_image_thread(input_text, assistant_output):
global image_thread
image_thread = threading.Thread(
target=_generate_chatgpt_image,
args=(input_text, assistant_output),
)
image_thread.should_abort_immediately = True
image_thread.start()
def send_to_assistant(input_text, text_to_speech=True):
"""
Send text to an OpenAI Assistant and gets the response to pass to Whisper
and Dall-E.
"""
# Encourage the GPT3 response to be brief. This is usually set on
# the assistant prompt, however I've found responses can still be
# rather long.
current_datetime = datetime.now().strftime("%c")
brief_prompt = "Remember to keep responses brief."
amended_input_text = f"The date and time is {current_datetime}.\n{input_text}\n{brief_prompt}"
logging.info(f"Input text: {amended_input_text}")
message = client.beta.threads.messages.create(
thread_id=assistant_thread.id, role="user", content=amended_input_text
)
run = client.beta.threads.runs.create(
thread_id=assistant_thread.id,
assistant_id=assistant.id,
)
run_completed = False
timeout_limit = 300
timeout_counter = 0
while not run_completed:
if timeout_counter >= timeout_limit:
logging.info("Timeout exceeded")
timeout_counter = 0
break
run = client.beta.threads.runs.retrieve(
thread_id=assistant_thread.id,
run_id=run.id,
)
if run.status == "completed":
run_completed = True
time.sleep(1)
timeout_counter += 1
if timeout_counter >= timeout_limit:
assistant_output = (
"Sorry, it looks like something went wrong. Try again in a moment or two."
)
else:
thread_messages = client.beta.threads.messages.list(assistant_thread.id)
# The most recent assistant's response will be the first item in the list
assistant_output = thread_messages.data[0].content[0].text.value
logging.info(f"Assistant output: {assistant_output}")
start_image_thread(input_text, assistant_output)
if text_to_speech:
whisper_text_to_speech(assistant_output)
def send_image_to_chatgpt(base64_image, prompt):
"""
Sends an image to ChatGPT Vision API for analysis. The Assistant API doesn't
support images yet, so this workaround until then.
"""
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {settings.openai_api_key}"
}
payload = {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
return response.json()["choices"][0]["message"]["content"]