-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
87 lines (72 loc) · 2.25 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from IPython.display import display, Image, Audio
import cv2 # We're using OpenCV to read video, to install !pip install opencv-python
import base64
import time
from openai import (
OpenAI,
) # We're using OpenAI to create transaltion, to install !pip install openai
import os
import requests # install !pip install requests
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
# Using GPT's visual capabilities to get a description of a video
video = cv2.VideoCapture("data/bison.mp4")
base64Frames = []
while video.isOpened():
success, frame = video.read()
if not success:
break
_, buffer = cv2.imencode(".jpg", frame)
base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
video.release()
print(len(base64Frames), "frames read.")
PROMPT_MESSAGES = [
{
"role": "user",
"content": [
"These are frames from a video that I want to upload. Generate a compelling description that I can upload along with the video.",
*map(lambda x: {"image": x, "resize": 768}, base64Frames[0::50]),
],
},
]
params = {
"model": "gpt-4o",
"messages": PROMPT_MESSAGES,
"max_tokens": 200,
}
result = client.chat.completions.create(**params)
print(result.choices[0].message.content)
PROMPT_MESSAGES = [
{
"role": "user",
"content": [
"These are frames of a video. Create a short voiceover script in the style of David Attenborough. Only include the narration.",
*map(lambda x: {"image": x, "resize": 768}, base64Frames[0::60]),
],
},
]
params = {
"model": "gpt-4o",
"messages": PROMPT_MESSAGES,
"max_tokens": 500,
}
result = client.chat.completions.create(**params)
print(result.choices[0].message.content)
response = requests.post(
"https://api.openai.com/v1/audio/speech",
headers={
"Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}",
},
json={
"model": "tts-1-1106",
"input": result.choices[0].message.content,
"voice": "onyx",
},
)
audio = b""
for chunk in response.iter_content(chunk_size=1024 * 1024):
audio += chunk
Audio(audio)
# Save the audio to a file
with open("generated_audio.mp3", "wb") as f:
f.write(audio)
print("Audio saved as 'generated_audio.mp3'")