Skip to content

Commit

Permalink
audio apis (#238)
Browse files Browse the repository at this point in the history
* do not merge - audio api init

* Get audio stuff working. (#245)

* Initially getting things working.

* More closely match spec

* Formatting fixes.

* Adjust handling of different types to make linter happy.

* Add type definition

* Decode bytes in ternary

* bump to version 1.3.14

---------

Co-authored-by: jdreamerz <[email protected]>
Co-authored-by: Justin Driemeyer <[email protected]>
  • Loading branch information
3 people authored Jan 27, 2025
1 parent c185015 commit 82dba38
Show file tree
Hide file tree
Showing 9 changed files with 325 additions and 9 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"

[tool.poetry]
name = "together"
version = "1.3.13"
version = "1.3.14"
authors = [
"Together AI <[email protected]>"
]
Expand Down
22 changes: 15 additions & 7 deletions src/together/abstract/api_requestor.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def parse_stream_helper(line: bytes) -> str | None:
line = line[len(b"data: ") :]
else:
line = line[len(b"data:") :]
if line.strip() == b"[DONE]":
if line.strip().upper() == b"[DONE]":
# return here will cause GeneratorExit exception in urllib3
# and it will close http connection with TCP Reset
return None
Expand Down Expand Up @@ -620,17 +620,22 @@ def _interpret_response(
self, result: requests.Response, stream: bool
) -> Tuple[TogetherResponse | Iterator[TogetherResponse], bool]:
"""Returns the response(s) and a bool indicating whether it is a stream."""
if stream and "text/event-stream" in result.headers.get("Content-Type", ""):
content_type = result.headers.get("Content-Type", "")
if stream and "text/event-stream" in content_type:
return (
self._interpret_response_line(
line, result.status_code, result.headers, stream=True
)
for line in parse_stream(result.iter_lines())
), True
else:
if content_type in ["application/octet-stream", "audio/wav", "audio/mpeg"]:
content = result.content
else:
content = result.content.decode("utf-8")
return (
self._interpret_response_line(
result.content.decode("utf-8"),
content,
result.status_code,
result.headers,
stream=False,
Expand Down Expand Up @@ -670,7 +675,7 @@ async def _interpret_async_response(
)

def _interpret_response_line(
self, rbody: str, rcode: int, rheaders: Any, stream: bool
self, rbody: str | bytes, rcode: int, rheaders: Any, stream: bool
) -> TogetherResponse:
# HTTP 204 response code does not have any content in the body.
if rcode == 204:
Expand All @@ -684,13 +689,16 @@ def _interpret_response_line(
)

try:
if "text/plain" in rheaders.get("Content-Type", ""):
data: Dict[str, Any] = {"message": rbody}
content_type = rheaders.get("Content-Type", "")
if isinstance(rbody, bytes):
data: Dict[str, Any] | bytes = rbody
elif "text/plain" in content_type:
data = {"message": rbody}
else:
data = json.loads(rbody)
except (JSONDecodeError, UnicodeDecodeError) as e:
raise error.APIError(
f"Error code: {rcode} -{rbody}",
f"Error code: {rcode} -{rbody if isinstance(rbody, str) else rbody.decode()}",
http_status=rcode,
headers=rheaders,
) from e
Expand Down
2 changes: 2 additions & 0 deletions src/together/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class Together:
models: resources.Models
fine_tuning: resources.FineTuning
rerank: resources.Rerank
audio: resources.Audio

# client options
client: TogetherClient
Expand Down Expand Up @@ -79,6 +80,7 @@ def __init__(
self.models = resources.Models(self.client)
self.fine_tuning = resources.FineTuning(self.client)
self.rerank = resources.Rerank(self.client)
self.audio = resources.Audio(self.client)


class AsyncTogether:
Expand Down
3 changes: 3 additions & 0 deletions src/together/resources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from together.resources.images import AsyncImages, Images
from together.resources.models import AsyncModels, Models
from together.resources.rerank import AsyncRerank, Rerank
from together.resources.audio import AsyncAudio, Audio


__all__ = [
Expand All @@ -25,4 +26,6 @@
"Models",
"AsyncRerank",
"Rerank",
"AsyncAudio",
"Audio",
]
24 changes: 24 additions & 0 deletions src/together/resources/audio/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from functools import cached_property

from together.resources.audio.speech import AsyncSpeech, Speech
from together.types import (
TogetherClient,
)


class Audio:
def __init__(self, client: TogetherClient) -> None:
self._client = client

@cached_property
def speech(self) -> Speech:
return Speech(self._client)


class AsyncAudio:
def __init__(self, client: TogetherClient) -> None:
self._client = client

@cached_property
def speech(self) -> AsyncSpeech:
return AsyncSpeech(self._client)
153 changes: 153 additions & 0 deletions src/together/resources/audio/speech.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
from __future__ import annotations

from typing import Any, AsyncGenerator, Dict, Iterator, List, Union

from together.abstract import api_requestor
from together.together_response import TogetherResponse
from together.types import (
AudioSpeechRequest,
AudioResponseFormat,
AudioLanguage,
AudioResponseEncoding,
AudioSpeechStreamChunk,
AudioSpeechStreamEvent,
AudioSpeechStreamResponse,
TogetherClient,
TogetherRequest,
)


class Speech:
def __init__(self, client: TogetherClient) -> None:
self._client = client

def create(
self,
*,
model: str,
input: str,
voice: str | None = None,
response_format: str = "wav",
language: str = "en",
response_encoding: str = "pcm_f32le",
sample_rate: int = 44100,
stream: bool = False,
**kwargs: Any,
) -> AudioSpeechStreamResponse:
"""
Method to generate audio from input text using a specified model.
Args:
model (str): The name of the model to query.
input (str): Input text to generate the audio for.
voice (str, optional): The voice to use for generating the audio.
Defaults to None.
response_format (str, optional): The format of audio output.
Defaults to "wav".
language (str, optional): Language of input text.
Defaults to "en".
response_encoding (str, optional): Audio encoding of response.
Defaults to "pcm_f32le".
sample_rate (int, optional): Sampling rate to use for the output audio.
Defaults to 44100.
stream (bool, optional): If true, output is streamed for several characters at a time.
Defaults to False.
Returns:
Union[bytes, Iterator[AudioSpeechStreamChunk]]: The generated audio as bytes or an iterator over audio stream chunks.
"""

requestor = api_requestor.APIRequestor(
client=self._client,
)

parameter_payload = AudioSpeechRequest(
model=model,
input=input,
voice=voice,
response_format=AudioResponseFormat(response_format),
language=AudioLanguage(language),
response_encoding=AudioResponseEncoding(response_encoding),
sample_rate=sample_rate,
stream=stream,
**kwargs,
).model_dump(exclude_none=True)

response, streamed, _ = requestor.request(
options=TogetherRequest(
method="POST",
url="audio/speech",
params=parameter_payload,
),
stream=stream,
)

return AudioSpeechStreamResponse(response=response)


class AsyncSpeech:
def __init__(self, client: TogetherClient) -> None:
self._client = client

async def create(
self,
*,
model: str,
input: str,
voice: str | None = None,
response_format: str = "wav",
language: str = "en",
response_encoding: str = "pcm_f32le",
sample_rate: int = 44100,
stream: bool = False,
**kwargs: Any,
) -> AudioSpeechStreamResponse:
"""
Async method to generate audio from input text using a specified model.
Args:
model (str): The name of the model to query.
input (str): Input text to generate the audio for.
voice (str, optional): The voice to use for generating the audio.
Defaults to None.
response_format (str, optional): The format of audio output.
Defaults to "wav".
language (str, optional): Language of input text.
Defaults to "en".
response_encoding (str, optional): Audio encoding of response.
Defaults to "pcm_f32le".
sample_rate (int, optional): Sampling rate to use for the output audio.
Defaults to 44100.
stream (bool, optional): If true, output is streamed for several characters at a time.
Defaults to False.
Returns:
Union[bytes, AsyncGenerator[AudioSpeechStreamChunk, None]]: The generated audio as bytes or an async generator over audio stream chunks.
"""

requestor = api_requestor.APIRequestor(
client=self._client,
)

parameter_payload = AudioSpeechRequest(
model=model,
input=input,
voice=voice,
response_format=AudioResponseFormat(response_format),
language=AudioLanguage(language),
response_encoding=AudioResponseEncoding(response_encoding),
sample_rate=sample_rate,
stream=stream,
**kwargs,
).model_dump(exclude_none=True)

response, _, _ = await requestor.arequest(
options=TogetherRequest(
method="POST",
url="audio/speech",
params=parameter_payload,
),
stream=stream,
)

return AudioSpeechStreamResponse(response=response)
2 changes: 1 addition & 1 deletion src/together/together_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class TogetherResponse:
API Response class. Stores headers and response data.
"""

def __init__(self, data: Dict[str, Any], headers: Dict[str, Any]):
def __init__(self, data: Any, headers: Dict[str, Any]):
self._headers = headers
self.data = data

Expand Down
16 changes: 16 additions & 0 deletions src/together/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@
RerankRequest,
RerankResponse,
)
from together.types.audio_speech import (
AudioSpeechRequest,
AudioResponseFormat,
AudioLanguage,
AudioResponseEncoding,
AudioSpeechStreamChunk,
AudioSpeechStreamEvent,
AudioSpeechStreamResponse,
)

__all__ = [
"TogetherClient",
Expand Down Expand Up @@ -77,4 +86,11 @@
"RerankRequest",
"RerankResponse",
"FinetuneTrainingLimits",
"AudioSpeechRequest",
"AudioResponseFormat",
"AudioLanguage",
"AudioResponseEncoding",
"AudioSpeechStreamChunk",
"AudioSpeechStreamEvent",
"AudioSpeechStreamResponse",
]
Loading

0 comments on commit 82dba38

Please sign in to comment.