-
Notifications
You must be signed in to change notification settings - Fork 919
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Azure] Support Realtime API - Standalone client #1283
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: I think these examples should mention realtime in the path somewhere, e.g.
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,61 @@ | ||||||
import { AzureOpenAIRealtimeWebSocket } from 'openai/beta/realtime/websocket'; | ||||||
import { AzureOpenAI } from 'openai'; | ||||||
import { DefaultAzureCredential, getBearerTokenProvider } from '@azure/identity'; | ||||||
import 'dotenv/config'; | ||||||
|
||||||
async function main() { | ||||||
const cred = new DefaultAzureCredential(); | ||||||
const scope = 'https://cognitiveservices.azure.com/.default'; | ||||||
const deploymentName = 'gpt-4o-realtime-preview-1001'; | ||||||
const azureADTokenProvider = getBearerTokenProvider(cred, scope); | ||||||
const client = new AzureOpenAI({ | ||||||
azureADTokenProvider, | ||||||
apiVersion: '2024-10-01-preview', | ||||||
deployment: deploymentName, | ||||||
}); | ||||||
const rt = new AzureOpenAIRealtimeWebSocket(client); | ||||||
await rt.open(); | ||||||
|
||||||
// access the underlying `ws.WebSocket` instance | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
rt.socket.addEventListener('open', () => { | ||||||
console.log('Connection opened!'); | ||||||
rt.send({ | ||||||
type: 'session.update', | ||||||
session: { | ||||||
modalities: ['text'], | ||||||
model: 'gpt-4o-realtime-preview', | ||||||
}, | ||||||
}); | ||||||
|
||||||
rt.send({ | ||||||
type: 'conversation.item.create', | ||||||
item: { | ||||||
type: 'message', | ||||||
role: 'user', | ||||||
content: [{ type: 'input_text', text: 'Say a couple paragraphs!' }], | ||||||
}, | ||||||
}); | ||||||
|
||||||
rt.send({ type: 'response.create' }); | ||||||
}); | ||||||
|
||||||
rt.on('error', (err) => { | ||||||
// in a real world scenario this should be logged somewhere as you | ||||||
// likely want to continue procesing events regardless of any errors | ||||||
throw err; | ||||||
}); | ||||||
|
||||||
rt.on('session.created', (event) => { | ||||||
console.log('session created!', event.session); | ||||||
console.log(); | ||||||
}); | ||||||
|
||||||
rt.on('response.text.delta', (event) => process.stdout.write(event.delta)); | ||||||
rt.on('response.text.done', () => console.log()); | ||||||
|
||||||
rt.on('response.done', () => rt.close()); | ||||||
|
||||||
rt.socket.addEventListener('close', () => console.log('\nConnection closed!')); | ||||||
} | ||||||
|
||||||
main(); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import { DefaultAzureCredential, getBearerTokenProvider } from '@azure/identity'; | ||
import { AzureOpenAIRealtimeWS } from 'openai/beta/realtime/ws'; | ||
import { AzureOpenAI } from 'openai'; | ||
import 'dotenv/config'; | ||
|
||
async function main() { | ||
const cred = new DefaultAzureCredential(); | ||
const scope = 'https://cognitiveservices.azure.com/.default'; | ||
const deploymentName = 'gpt-4o-realtime-preview-1001'; | ||
const azureADTokenProvider = getBearerTokenProvider(cred, scope); | ||
const client = new AzureOpenAI({ | ||
azureADTokenProvider, | ||
apiVersion: '2024-10-01-preview', | ||
deployment: deploymentName, | ||
}); | ||
const rt = new AzureOpenAIRealtimeWS(client); | ||
await rt.open(); | ||
|
||
// access the underlying `ws.WebSocket` instance | ||
rt.socket.on('open', () => { | ||
console.log('Connection opened!'); | ||
rt.send({ | ||
type: 'session.update', | ||
session: { | ||
modalities: ['text'], | ||
model: 'gpt-4o-realtime-preview', | ||
}, | ||
}); | ||
rt.send({ | ||
type: 'session.update', | ||
session: { | ||
modalities: ['text'], | ||
model: 'gpt-4o-realtime-preview', | ||
}, | ||
}); | ||
|
||
rt.send({ | ||
type: 'conversation.item.create', | ||
item: { | ||
type: 'message', | ||
role: 'user', | ||
content: [{ type: 'input_text', text: 'Say a couple paragraphs!' }], | ||
}, | ||
}); | ||
|
||
rt.send({ type: 'response.create' }); | ||
}); | ||
|
||
rt.on('error', (err) => { | ||
// in a real world scenario this should be logged somewhere as you | ||
// likely want to continue procesing events regardless of any errors | ||
throw err; | ||
}); | ||
|
||
rt.on('session.created', (event) => { | ||
console.log('session created!', event.session); | ||
console.log(); | ||
}); | ||
|
||
rt.on('response.text.delta', (event) => process.stdout.write(event.delta)); | ||
rt.on('response.text.done', () => console.log()); | ||
|
||
rt.on('response.done', () => rt.close()); | ||
|
||
rt.socket.on('close', () => console.log('\nConnection closed!')); | ||
} | ||
|
||
main(); |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @kwhinnery-openai curious if you have any opinions on the location of the azure classes, is it fine / good for them to be in the same file as the OpenAI ones? It does feel a bit verbose to have to do
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
import { OpenAI } from '../../index'; | ||
import { AzureOpenAI, OpenAI } from '../../index'; | ||
import { OpenAIError } from '../../error'; | ||
import * as Core from '../../core'; | ||
import type { RealtimeClientEvent, RealtimeServerEvent } from '../../resources/beta/realtime/realtime'; | ||
|
@@ -95,3 +95,106 @@ export class OpenAIRealtimeWebSocket extends OpenAIRealtimeEmitter { | |
} | ||
} | ||
} | ||
|
||
export class AzureOpenAIRealtimeWebSocket extends OpenAIRealtimeEmitter { | ||
socket: _WebSocket; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question: does the Azure API support ephemeral session tokens? Asking as the OpenAI API client requires |
||
|
||
constructor( | ||
private client: AzureOpenAI, | ||
private options: { | ||
deploymentName?: string; | ||
} = {}, | ||
) { | ||
super(); | ||
} | ||
|
||
async open(): Promise<void> { | ||
async function getUrl({ | ||
apiVersion, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: can you move this function outside of this class? would be much easier to read |
||
baseURL, | ||
deploymentName, | ||
apiKey, | ||
token, | ||
}: { | ||
baseURL: string; | ||
deploymentName: string; | ||
apiVersion: string; | ||
apiKey: string; | ||
token: string | undefined; | ||
}): Promise<URL> { | ||
const path = '/realtime'; | ||
const url = new URL(baseURL + (baseURL.endsWith('/') ? path.slice(1) : path)); | ||
url.protocol = 'wss'; | ||
url.searchParams.set('api-version', apiVersion); | ||
url.searchParams.set('deployment', deploymentName); | ||
if (apiKey !== '<Missing Key>') { | ||
url.searchParams.set('api-key', apiKey); | ||
} else { | ||
if (token) { | ||
url.searchParams.set('Authorization', `Bearer ${token}`); | ||
} else { | ||
throw new Error('AzureOpenAI is not instantiated correctly. No API key or token provided.'); | ||
} | ||
} | ||
return url; | ||
} | ||
const deploymentName = this.client.deploymentName ?? this.options.deploymentName; | ||
if (!deploymentName) { | ||
throw new Error('No deployment name provided'); | ||
} | ||
const url = await getUrl({ | ||
apiVersion: this.client.apiVersion, | ||
baseURL: this.client.baseURL, | ||
deploymentName, | ||
apiKey: this.client.apiKey, | ||
token: await this.client.getAzureADToken(), | ||
}); | ||
// @ts-ignore | ||
this.socket = new WebSocket(url, ['realtime', 'openai-beta.realtime-v1']); | ||
|
||
this.socket.addEventListener('message', (websocketEvent: MessageEvent) => { | ||
const event = (() => { | ||
try { | ||
return JSON.parse(websocketEvent.data.toString()) as RealtimeServerEvent; | ||
} catch (err) { | ||
this._onError(null, 'could not parse websocket event', err); | ||
return null; | ||
} | ||
})(); | ||
|
||
if (event) { | ||
this._emit('event', event); | ||
|
||
if (event.type === 'error') { | ||
this._onError(event); | ||
} else { | ||
// @ts-expect-error TS isn't smart enough to get the relationship right here | ||
this._emit(event.type, event); | ||
} | ||
} | ||
}); | ||
|
||
this.socket.addEventListener('error', (event: any) => { | ||
this._onError(null, event.message, null); | ||
}); | ||
} | ||
|
||
send(event: RealtimeClientEvent) { | ||
if (!this.socket) { | ||
throw new Error('Socket is not open, call open() first'); | ||
} | ||
try { | ||
this.socket.send(JSON.stringify(event)); | ||
} catch (err) { | ||
this._onError(null, 'could not send data', err); | ||
} | ||
} | ||
|
||
close(props?: { code: number; reason: string }) { | ||
try { | ||
this.socket?.close(props?.code ?? 1000, props?.reason ?? 'OK'); | ||
} catch (err) { | ||
this._onError(null, 'could not close the connection', err); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: I think it'd be better to reduce the amount of deps a user would need to add if they copy-paste this example file.
I don't feel super strongly here though, and
dotenv
is very standard... maybe @kwhinnery-openai has thoughts?