From 3fca11c4f72c3d6e33a2061156396fdf03b0beb2 Mon Sep 17 00:00:00 2001
From: Stephen Hodgson <rage.against.the.pixel@gmail.com>
Date: Mon, 25 Nov 2024 01:01:53 -0500
Subject: [PATCH] com.rest.elevenlabs 3.4.0 (#100)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- com.utilities.rest -> 3.3.0
- com.utilities.encoder.ogg -> 4.0.2
- Added additional request properties for TextToSpeechRequest
  - `previous_text`, `next_text`, `previous_request_ids`, `next_request_ids`, `languageCode`, `withTimestamps`
  - `cacheFormat` which can be `None`, `Wav`, or `Ogg`
- Added support for transcription timestamps by @tomkail
- Added support for language code in TextToSpeechRequest @Mylan719
- Refactored `VoiceClip`
  - clip samples and data are now prioritized over the `AudioClip`
    - audioClip will not be created until you access the `VoiceClip.AudioClip` property
    - if an audio clip is not loaded, you can load it with `LoadCachedAudioClipAsync`
 - Refactored demo scene to use `OnAudioFilterRead` to better quality stream playback

---------

Co-authored-by: Milan Mikuš <mylan719@gmail.com>
Co-authored-by: Milan Mikuš <milan.mikus@riganti.cz>
Co-authored-by: Tom Kail <thomas.kail@betterup.co>
Co-authored-by: Tom Kail <tkail92@gmail.com>
---
 .github/workflows/unity.yml                   |  16 +-
 ElevenLabs/.editorconfig                      |   3 +
 .../Documentation~/README.md                  |  41 +-
 .../Editor/ElevenLabsDashboard.cs             |   6 +-
 .../Runtime/Common/CacheFormat.cs             |  11 +
 ...Extensions.cs.meta => CacheFormat.cs.meta} |   2 +-
 .../Runtime/Common/GeneratedClip.cs           |  81 ++-
 .../Runtime/Common/OutputFormatExtensions.cs  |  24 -
 .../Common/TimestampedTranscriptCharacter.cs  |  44 ++
 .../TimestampedTranscriptCharacter.cs.meta    |  11 +
 .../Runtime/Common/VoiceClip.cs               |  12 +-
 .../Runtime/Dubbing/DubbingEndpoint.cs        |   2 -
 .../Runtime/Extensions/Extensions.cs          |  16 +
 .../Runtime/Extensions/Extensions.cs.meta     |  11 +
 .../Runtime/History/HistoryEndpoint.cs        |  17 +-
 .../Runtime/TextToSpeech/Alignment.cs         |  52 ++
 .../Runtime/TextToSpeech/Alignment.cs.meta    |  11 +
 .../TextToSpeech/TextToSpeechEndpoint.cs      | 599 ++++++++++--------
 .../TextToSpeech/TextToSpeechRequest.cs       | 107 +++-
 .../TextToSpeech/TranscriptionResponse.cs     |  30 +
 .../TranscriptionResponse.cs.meta             |  11 +
 .../Runtime/Voices/Voice.cs                   |   2 +-
 .../Runtime/Voices/VoicesEndpoint.cs          |  13 +-
 .../TextToSpeech/ElevenLabs.Demo.asmdef       |   3 +-
 .../Samples~/TextToSpeech/TextToSpeechDemo.cs |  77 ++-
 .../TextToSpeech/TextToSpeechDemo.unity       |  30 +-
 .../Test_Fixture_04_TextToSpeechEndpoint.cs   |  92 ++-
 .../Packages/com.rest.elevenlabs/package.json |   8 +-
 ElevenLabs/Packages/manifest.json             |   4 +-
 ElevenLabs/ProjectSettings/ProjectVersion.txt |   4 +-
 README.md                                     |  41 +-
 31 files changed, 926 insertions(+), 455 deletions(-)
 create mode 100644 ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/CacheFormat.cs
 rename ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/{OutputFormatExtensions.cs.meta => CacheFormat.cs.meta} (86%)
 delete mode 100644 ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/OutputFormatExtensions.cs
 create mode 100644 ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/TimestampedTranscriptCharacter.cs
 create mode 100644 ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/TimestampedTranscriptCharacter.cs.meta
 create mode 100644 ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Extensions/Extensions.cs
 create mode 100644 ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Extensions/Extensions.cs.meta
 create mode 100644 ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/Alignment.cs
 create mode 100644 ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/Alignment.cs.meta
 create mode 100644 ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/TranscriptionResponse.cs
 create mode 100644 ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/TranscriptionResponse.cs.meta

diff --git a/.github/workflows/unity.yml b/.github/workflows/unity.yml
index c8a4036..1872865 100644
--- a/.github/workflows/unity.yml
+++ b/.github/workflows/unity.yml
@@ -11,26 +11,24 @@ on:
   workflow_dispatch:
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: ${{ ( github.event_name == 'pull_request' || github.event.action == 'synchronize' ) }}
-permissions:
-  checks: write
-  pull-requests: write
+  cancel-in-progress: ${{ (github.event_name == 'pull_request' || github.event.action == 'synchronize') }}
 jobs:
   build:
-    env:
-      UNITY_PROJECT_PATH: ''
+    permissions:
+      checks: write
+      pull-requests: write
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest, windows-latest, macos-13]
+        os: [ubuntu-latest, windows-latest, macos-15]
         unity-versions: [2021.x, 2022.x, 6000.x]
         include:
           - os: ubuntu-latest
             build-target: StandaloneLinux64
           - os: windows-latest
             build-target: StandaloneWindows64
-          - os: macos-13
+          - os: macos-15
             build-target: StandaloneOSX
     steps:
       - uses: actions/checkout@v4
@@ -46,11 +44,13 @@ jobs:
       - uses: RageAgainstThePixel/unity-action@v1
         name: '${{ matrix.build-target }}-Validate'
         with:
+          build-target: ${{ matrix.build-target }}
           log-name: '${{ matrix.build-target }}-Validate'
           args: '-quit -nographics -batchmode -executeMethod Utilities.Editor.BuildPipeline.UnityPlayerBuildTools.ValidateProject -importTMProEssentialsAsset'
       - uses: RageAgainstThePixel/unity-action@v1
         name: '${{ matrix.build-target }}-Build'
         with:
+          build-target: ${{ matrix.build-target }}
           log-name: '${{ matrix.build-target }}-Build'
           args: '-quit -nographics -batchmode -executeMethod Utilities.Editor.BuildPipeline.UnityPlayerBuildTools.StartCommandLineBuild'
       - uses: actions/upload-artifact@v4
diff --git a/ElevenLabs/.editorconfig b/ElevenLabs/.editorconfig
index ac4472f..caa477f 100644
--- a/ElevenLabs/.editorconfig
+++ b/ElevenLabs/.editorconfig
@@ -34,6 +34,9 @@ dotnet_style_predefined_type_for_locals_parameters_members = true
 # Code Style
 csharp_style_var_when_type_is_apparent = true
 
+dotnet_diagnostic.IDE0051.severity = none
+dotnet_diagnostic.CS0649.severity = none
+
 #### Resharper/Rider Rules ####
 # https://www.jetbrains.com/help/resharper/EditorConfig_Properties.html
 
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Documentation~/README.md b/ElevenLabs/Packages/com.rest.elevenlabs/Documentation~/README.md
index 75937d6..57ccf1c 100644
--- a/ElevenLabs/Packages/com.rest.elevenlabs/Documentation~/README.md
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Documentation~/README.md
@@ -40,6 +40,7 @@ The recommended installation method is though the unity package manager and [Ope
   - [com.utilities.extensions](https://github.com/RageAgainstThePixel/com.utilities.extensions)
   - [com.utilities.audio](https://github.com/RageAgainstThePixel/com.utilities.audio)
   - [com.utilities.encoder.ogg](https://github.com/RageAgainstThePixel/com.utilities.encoder.ogg)
+  - [com.utilities.encoder.wav](https://github.com/RageAgainstThePixel/com.utilities.encoder.wav)
   - [com.utilities.rest](https://github.com/RageAgainstThePixel/com.utilities.rest)
 
 ---
@@ -59,7 +60,7 @@ The recommended installation method is though the unity package manager and [Ope
 - [Text to Speech](#text-to-speech)
   - [Stream Text To Speech](#stream-text-to-speech)
 - [Voices](#voices)
-  - [Get Shared Voices](#get-shared-voices) :new:
+  - [Get Shared Voices](#get-shared-voices)
   - [Get All Voices](#get-all-voices)
   - [Get Default Voice Settings](#get-default-voice-settings)
   - [Get Voice](#get-voice)
@@ -70,13 +71,13 @@ The recommended installation method is though the unity package manager and [Ope
   - [Samples](#samples)
     - [Download Voice Sample](#download-voice-sample)
     - [Delete Voice Sample](#delete-voice-sample)
-- [Dubbing](#dubbing) :new:
-  - [Dub](#dub) :new:
-  - [Get Dubbing Metadata](#get-dubbing-metadata) :new:
-  - [Get Transcript for Dub](#get-transcript-for-dub) :new:
-  - [Get dubbed file](#get-dubbed-file) :new:
-  - [Delete Dubbing Project](#delete-dubbing-project) :new:
-- [SFX Generation](#sfx-generation) :new:
+- [Dubbing](#dubbing)
+  - [Dub](#dub)
+  - [Get Dubbing Metadata](#get-dubbing-metadata)
+  - [Get Transcript for Dub](#get-transcript-for-dub)
+  - [Get dubbed file](#get-dubbed-file)
+  - [Delete Dubbing Project](#delete-dubbing-project)
+- [SFX Generation](#sfx-generation)
 - [History](#history)
   - [Get History](#get-history)
   - [Get History Item](#get-history-item)
@@ -265,8 +266,8 @@ Convert text to speech.
 var api = new ElevenLabsClient();
 var text = "The quick brown fox jumps over the lazy dog.";
 var voice = (await api.VoicesEndpoint.GetAllVoicesAsync()).FirstOrDefault();
-var defaultVoiceSettings = await api.VoicesEndpoint.GetDefaultVoiceSettingsAsync();
-var voiceClip = await api.TextToSpeechEndpoint.TextToSpeechAsync(text, voice, defaultVoiceSettings);
+var request = new TextToSpeechRequest(voice, text);
+var voiceClip = await api.TextToSpeechEndpoint.TextToSpeechAsync(request);
 audioSource.PlayOneShot(voiceClip.AudioClip);
 ```
 
@@ -284,18 +285,14 @@ Stream text to speech.
 var api = new ElevenLabsClient();
 var text = "The quick brown fox jumps over the lazy dog.";
 var voice = (await api.VoicesEndpoint.GetAllVoicesAsync()).FirstOrDefault();
-var partialClips = new Queue<AudioClip>();
-var voiceClip = await api.TextToSpeechEndpoint.StreamTextToSpeechAsync(
-    text,
-    voice,
-    partialClip =>
-    {
-        // Note: Best to queue them and play them in update loop!
-        // See TextToSpeech sample demo for details
-        partialClips.Enqueue(partialClip);
-    });
-// The full completed clip:
-audioSource.clip = voiceClip.AudioClip;
+var partialClips = new Queue<VoiceClip>();
+var request = new TextToSpeechRequest(voice, message, model: Model.EnglishTurboV2, outputFormat: OutputFormat.PCM_44100);
+var voiceClip = await api.TextToSpeechEndpoint.StreamTextToSpeechAsync(request, partialClip =>
+{
+    // Note: check demo scene for best practices
+    // on how to handle playback with OnAudioFilterRead
+    partialClips.Enqueue(partialClip);
+});
 ```
 
 ### [Voices](https://docs.elevenlabs.io/api-reference/voices)
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Editor/ElevenLabsDashboard.cs b/ElevenLabs/Packages/com.rest.elevenlabs/Editor/ElevenLabsDashboard.cs
index 0797c00..61fc154 100644
--- a/ElevenLabs/Packages/com.rest.elevenlabs/Editor/ElevenLabsDashboard.cs
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Editor/ElevenLabsDashboard.cs
@@ -1106,7 +1106,7 @@ private async void GenerateSynthesizedText()
                     Directory.CreateDirectory(downloadDir);
                 }
 
-                voiceClip = await api.TextToSpeechEndpoint.TextToSpeechAsync(speechSynthesisTextInput, currentVoiceOption, currentVoiceSettings, currentModelOption);
+                voiceClip = await api.TextToSpeechEndpoint.TextToSpeechAsync(new(currentVoiceOption, speechSynthesisTextInput, voiceSettings: currentVoiceSettings, model: currentModelOption));
                 voiceClip.CopyIntoProject(editorDownloadDirectory);
             }
             catch (Exception e)
@@ -1225,7 +1225,7 @@ private void RenderVoiceLab()
                 EditorGUILayout.Space(EndWidth);
                 EditorGUILayout.EndHorizontal();
                 EditorGUI.indentLevel++;
-                
+
                 EditorGUILayout.BeginHorizontal();
                 {
                     EditorGUILayout.LabelField(voice.Id, EditorStyles.boldLabel);
@@ -1242,7 +1242,7 @@ private void RenderVoiceLab()
                 EditorGUILayout.Space(EndWidth);
                 EditorGUILayout.EndHorizontal();
                 EditorGUI.indentLevel++;
-                
+
                 if (!voiceLabels.TryGetValue(voice.Id, out var cachedLabels))
                 {
                     cachedLabels = new Dictionary<string, string>();
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/CacheFormat.cs b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/CacheFormat.cs
new file mode 100644
index 0000000..7e09111
--- /dev/null
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/CacheFormat.cs
@@ -0,0 +1,11 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+namespace ElevenLabs
+{
+    public enum CacheFormat
+    {
+        None,
+        Ogg,
+        Wav
+    }
+}
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/OutputFormatExtensions.cs.meta b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/CacheFormat.cs.meta
similarity index 86%
rename from ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/OutputFormatExtensions.cs.meta
rename to ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/CacheFormat.cs.meta
index f21b387..820d7d1 100644
--- a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/OutputFormatExtensions.cs.meta
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/CacheFormat.cs.meta
@@ -1,5 +1,5 @@
 fileFormatVersion: 2
-guid: 91fcef5ec7d363740b7a354d26952229
+guid: 477e464b1c1991e439b1a2a3c46ed761
 MonoImporter:
   externalObjects: {}
   serializedVersion: 2
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/GeneratedClip.cs b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/GeneratedClip.cs
index 1965774..8676262 100644
--- a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/GeneratedClip.cs
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/GeneratedClip.cs
@@ -2,8 +2,12 @@
 
 using ElevenLabs.Extensions;
 using System;
+using System.Threading;
+using System.Threading.Tasks;
 using UnityEngine;
 using UnityEngine.Scripting;
+using Utilities.Audio;
+using Utilities.WebRequestRest;
 
 namespace ElevenLabs
 {
@@ -12,16 +16,30 @@ namespace ElevenLabs
     public class GeneratedClip : ISerializationCallbackReceiver
     {
         [Preserve]
-        internal GeneratedClip(string id, string text, AudioClip audioClip, string cachedPath)
+        internal GeneratedClip(string id, string text, AudioClip audioClip, string cachedPath = null)
         {
             this.id = id;
             this.text = text;
             TextHash = $"{id}{text}".GenerateGuid();
             textHash = TextHash.ToString();
-            this.audioClip = audioClip;
             this.cachedPath = cachedPath;
+            SampleRate = audioClip.frequency;
         }
 
+        [Preserve]
+        internal GeneratedClip(string id, string text, ReadOnlyMemory<byte> clipData, int sampleRate, string cachedPath = null)
+        {
+            this.id = id;
+            this.text = text;
+            TextHash = $"{id}{text}".GenerateGuid();
+            textHash = TextHash.ToString();
+            this.cachedPath = cachedPath;
+            ClipData = clipData;
+            SampleRate = sampleRate;
+        }
+
+        private readonly ReadOnlyMemory<byte> audioData;
+
         [SerializeField]
         private string id;
 
@@ -44,7 +62,25 @@ internal GeneratedClip(string id, string text, AudioClip audioClip, string cache
         private AudioClip audioClip;
 
         [Preserve]
-        public AudioClip AudioClip => audioClip;
+        public AudioClip AudioClip
+        {
+            get
+            {
+                if (audioClip == null && !audioData.IsEmpty)
+                {
+                    var pcmData = PCMEncoder.Decode(audioData.ToArray());
+                    audioClip = AudioClip.Create(Id, pcmData.Length, 1, SampleRate, false);
+                    audioClip.SetData(pcmData, 0);
+                }
+
+                if (audioClip == null)
+                {
+                    Debug.LogError($"{nameof(audioClip)} is null, try loading it with LoadCachedAudioClipAsync");
+                }
+
+                return audioClip;
+            }
+        }
 
         [SerializeField]
         private string cachedPath;
@@ -52,8 +88,47 @@ internal GeneratedClip(string id, string text, AudioClip audioClip, string cache
         [Preserve]
         public string CachedPath => cachedPath;
 
+        public ReadOnlyMemory<byte> ClipData { get; }
+
+        private float[] clipSamples;
+
+        public float[] ClipSamples
+        {
+            get
+            {
+                if (!ClipData.IsEmpty)
+                {
+                    clipSamples ??= PCMEncoder.Decode(ClipData.ToArray());
+                }
+                else if (audioClip != null)
+                {
+                    clipSamples = new float[audioClip.samples];
+                    audioClip.GetData(clipSamples, 0);
+                }
+
+                return clipSamples;
+            }
+        }
+
+        public int SampleRate { get; }
+
         public void OnBeforeSerialize() => textHash = TextHash.ToString();
 
         public void OnAfterDeserialize() => TextHash = Guid.Parse(textHash);
+
+        public static implicit operator AudioClip(GeneratedClip clip) => clip?.AudioClip;
+
+        public async Task<AudioClip> LoadCachedAudioClipAsync(CancellationToken cancellationToken = default)
+        {
+            var audioType = cachedPath switch
+            {
+                var path when path.EndsWith(".ogg") => AudioType.OGGVORBIS,
+                var path when path.EndsWith(".wav") => AudioType.WAV,
+                var path when path.EndsWith(".mp3") => AudioType.MPEG,
+                _ => AudioType.UNKNOWN
+            };
+
+            return await Rest.DownloadAudioClipAsync($"file://{cachedPath}", audioType, cancellationToken: cancellationToken);
+        }
     }
 }
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/OutputFormatExtensions.cs b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/OutputFormatExtensions.cs
deleted file mode 100644
index e8b4729..0000000
--- a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/OutputFormatExtensions.cs
+++ /dev/null
@@ -1,24 +0,0 @@
-// Licensed under the MIT License. See LICENSE in the project root for license information.
-
-using System;
-using UnityEngine;
-
-namespace ElevenLabs
-{
-    public static class OutputFormatExtensions
-    {
-        public static AudioType GetAudioType(this OutputFormat format)
-            => format switch
-            {
-                OutputFormat.MP3_44100_64 => AudioType.MPEG,
-                OutputFormat.MP3_44100_96 => AudioType.MPEG,
-                OutputFormat.MP3_44100_128 => AudioType.MPEG,
-                OutputFormat.MP3_44100_192 => AudioType.MPEG,
-                OutputFormat.PCM_16000 => AudioType.OGGVORBIS,
-                OutputFormat.PCM_22050 => AudioType.OGGVORBIS,
-                OutputFormat.PCM_24000 => AudioType.OGGVORBIS,
-                OutputFormat.PCM_44100 => AudioType.OGGVORBIS,
-                _ => throw new ArgumentOutOfRangeException(nameof(format), format, null)
-            };
-    }
-}
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/TimestampedTranscriptCharacter.cs b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/TimestampedTranscriptCharacter.cs
new file mode 100644
index 0000000..543208a
--- /dev/null
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/TimestampedTranscriptCharacter.cs
@@ -0,0 +1,44 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using Newtonsoft.Json;
+using UnityEngine.Scripting;
+
+namespace ElevenLabs
+{
+    /// <summary>
+    /// Represents timing information for a single character in the transcript
+    /// </summary>
+    [Preserve]
+    public class TimestampedTranscriptCharacter
+    {
+        [Preserve]
+        [JsonConstructor]
+        internal TimestampedTranscriptCharacter(string character, double startTime, double endTime)
+        {
+            Character = character;
+            StartTime = startTime;
+            EndTime = endTime;
+        }
+
+        /// <summary>
+        /// The character being spoken
+        /// </summary>
+        [Preserve]
+        [JsonProperty("character")]
+        public string Character { get; }
+
+        /// <summary>
+        /// The time in seconds when this character starts being spoken
+        /// </summary>
+        [Preserve]
+        [JsonProperty("character_start_times_seconds")]
+        public double StartTime { get; }
+
+        /// <summary>
+        /// The time in seconds when this character finishes being spoken
+        /// </summary>
+        [Preserve]
+        [JsonProperty("character_end_times_seconds")]
+        public double EndTime { get; }
+    }
+}
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/TimestampedTranscriptCharacter.cs.meta b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/TimestampedTranscriptCharacter.cs.meta
new file mode 100644
index 0000000..dc6646f
--- /dev/null
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/TimestampedTranscriptCharacter.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 569bb5fe3607d4d18b6bcb3e200f1107
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/VoiceClip.cs b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/VoiceClip.cs
index 24803d8..71d3bea 100644
--- a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/VoiceClip.cs
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Common/VoiceClip.cs
@@ -12,16 +12,26 @@ namespace ElevenLabs
     public sealed class VoiceClip : GeneratedClip
     {
         [Preserve]
-        internal VoiceClip(string id, string text, Voice voice, AudioClip audioClip, string cachedPath)
+        internal VoiceClip(string id, string text, Voice voice, AudioClip audioClip, string cachedPath = null)
             : base(id, text, audioClip, cachedPath)
         {
             this.voice = voice;
         }
 
+        [Preserve]
+        internal VoiceClip(string id, string text, Voice voice, ReadOnlyMemory<byte> clipData, int sampleRate, string cachedPath = null)
+            : base(id, text, clipData, sampleRate, cachedPath)
+        {
+            this.voice = voice;
+        }
+
         [SerializeField]
         private Voice voice;
 
         [Preserve]
         public Voice Voice => voice;
+
+        [Preserve]
+        public TimestampedTranscriptCharacter[] TimestampedTranscriptCharacters { get; internal set; }
     }
 }
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Dubbing/DubbingEndpoint.cs b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Dubbing/DubbingEndpoint.cs
index 0228dc2..c22d3cc 100644
--- a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Dubbing/DubbingEndpoint.cs
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Dubbing/DubbingEndpoint.cs
@@ -7,11 +7,9 @@
 using System.Diagnostics;
 using System.Globalization;
 using System.IO;
-using System.Linq;
 using System.Threading;
 using System.Threading.Tasks;
 using UnityEngine;
-using UnityEngine.Networking;
 using Utilities.WebRequestRest;
 using Debug = UnityEngine.Debug;
 
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Extensions/Extensions.cs b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Extensions/Extensions.cs
new file mode 100644
index 0000000..c6bf1a1
--- /dev/null
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Extensions/Extensions.cs
@@ -0,0 +1,16 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+namespace ElevenLabs.Extensions
+{
+    public static class Extensions
+    {
+        public static int GetSampleRate(this OutputFormat format) => format switch
+        {
+            OutputFormat.PCM_16000 => 16000,
+            OutputFormat.PCM_22050 => 22050,
+            OutputFormat.PCM_24000 => 24000,
+            OutputFormat.PCM_44100 => 44100,
+            _ => 44100
+        };
+    }
+}
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Extensions/Extensions.cs.meta b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Extensions/Extensions.cs.meta
new file mode 100644
index 0000000..4f7b194
--- /dev/null
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Extensions/Extensions.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 9679edc3d5fee9e4a892f93a65389087
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/History/HistoryEndpoint.cs b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/History/HistoryEndpoint.cs
index 7841626..50d6ba0 100644
--- a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/History/HistoryEndpoint.cs
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/History/HistoryEndpoint.cs
@@ -11,8 +11,7 @@
 using System.Threading.Tasks;
 using UnityEngine;
 using Utilities.Async;
-using Utilities.Audio;
-using Utilities.Encoding.OggVorbis;
+using Utilities.Encoding.Wav;
 using Utilities.WebRequestRest;
 
 namespace ElevenLabs.History
@@ -82,11 +81,11 @@ public async Task<VoiceClip> DownloadHistoryAudioAsync(HistoryItem historyItem,
                 .CreateNewDirectory(nameof(History))
                 .CreateNewDirectory(historyItem.VoiceId);
             var voice = await client.VoicesEndpoint.GetVoiceAsync(historyItem.VoiceId, cancellationToken: cancellationToken);
-            var audioType = historyItem.ContentType.Contains("mpeg") ? AudioType.MPEG : AudioType.OGGVORBIS;
+            var audioType = historyItem.ContentType.Contains("mpeg") ? AudioType.MPEG : AudioType.WAV;
             var extension = audioType switch
             {
                 AudioType.MPEG => "mp3",
-                AudioType.OGGVORBIS => "ogg",
+                AudioType.WAV => "wav",
                 _ => throw new ArgumentOutOfRangeException($"Unsupported {nameof(AudioType)}: {audioType}")
             };
             var cachedPath = Path.Combine(voiceDirectory, $"{historyItem.Id}.{extension}");
@@ -101,11 +100,9 @@ public async Task<VoiceClip> DownloadHistoryAudioAsync(HistoryItem historyItem,
                     case AudioType.MPEG:
                         await File.WriteAllBytesAsync(cachedPath, response.Data, cancellationToken).ConfigureAwait(false);
                         break;
-                    case AudioType.OGGVORBIS:
-                        var pcmData = PCMEncoder.Decode(response.Data, PCMFormatSize.SixteenBit);
-                        var sampleRate = 44100; // TODO unknown sample rate.
-                        var oggBytes = await OggEncoder.ConvertToBytesAsync(pcmData, sampleRate, 1, cancellationToken: cancellationToken).ConfigureAwait(false);
-                        await File.WriteAllBytesAsync(cachedPath, oggBytes, cancellationToken: cancellationToken).ConfigureAwait(false);
+                    case AudioType.WAV:
+                        var sampleRate = 44100; // TODO unknown sample rate. how do we figure it out?
+                        await WavEncoder.WriteToFileAsync(cachedPath, response.Data, 1, sampleRate, cancellationToken: cancellationToken);
                         break;
                     default:
                         throw new ArgumentOutOfRangeException($"Unsupported {nameof(AudioType)}: {audioType}");
@@ -113,7 +110,7 @@ public async Task<VoiceClip> DownloadHistoryAudioAsync(HistoryItem historyItem,
             }
 
             await Awaiters.UnityMainThread;
-            var audioClip = await Rest.DownloadAudioClipAsync($"file://{cachedPath}", AudioType.UNKNOWN, parameters: new RestParameters(debug: EnableDebug), cancellationToken: cancellationToken);
+            var audioClip = await Rest.DownloadAudioClipAsync($"file://{cachedPath}", audioType, parameters: new RestParameters(debug: EnableDebug), cancellationToken: cancellationToken);
             return new VoiceClip(historyItem.Id, historyItem.Text, voice, audioClip, cachedPath);
         }
 
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/Alignment.cs b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/Alignment.cs
new file mode 100644
index 0000000..9971f8c
--- /dev/null
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/Alignment.cs
@@ -0,0 +1,52 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using Newtonsoft.Json;
+using UnityEngine.Scripting;
+
+namespace ElevenLabs.TextToSpeech
+{
+    [Preserve]
+    internal sealed class Alignment
+    {
+        [Preserve]
+        [JsonConstructor]
+        internal Alignment(
+            [JsonProperty("characters")] string[] characters,
+            [JsonProperty("character_start_times_seconds")] double[] startTimes,
+            [JsonProperty("character_end_times_seconds")] double[] endTimes)
+        {
+            Characters = characters;
+            StartTimes = startTimes;
+            EndTimes = endTimes;
+        }
+
+        [Preserve]
+        [JsonProperty("characters")]
+        public string[] Characters { get; }
+
+        [Preserve]
+        [JsonProperty("character_start_times_seconds")]
+        public double[] StartTimes { get; }
+
+        [Preserve]
+        [JsonProperty("character_end_times_seconds")]
+        public double[] EndTimes { get; }
+
+        [Preserve]
+        public static implicit operator TimestampedTranscriptCharacter[](Alignment alignment)
+        {
+            if (alignment == null) { return null; }
+            var characters = alignment.Characters;
+            var startTimes = alignment.StartTimes;
+            var endTimes = alignment.EndTimes;
+            var timestampedTranscriptCharacters = new TimestampedTranscriptCharacter[characters.Length];
+
+            for (var i = 0; i < characters.Length; i++)
+            {
+                timestampedTranscriptCharacters[i] = new TimestampedTranscriptCharacter(characters[i], startTimes[i], endTimes[i]);
+            }
+
+            return timestampedTranscriptCharacters;
+        }
+    }
+}
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/Alignment.cs.meta b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/Alignment.cs.meta
new file mode 100644
index 0000000..f45af4c
--- /dev/null
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/Alignment.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: fb7cd218df63a7746b866b1182146ab4
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/TextToSpeechEndpoint.cs b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/TextToSpeechEndpoint.cs
index 5032a63..ddb8669 100644
--- a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/TextToSpeechEndpoint.cs
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/TextToSpeechEndpoint.cs
@@ -1,268 +1,331 @@
-// Licensed under the MIT License. See LICENSE in the project root for license information.
-
-using ElevenLabs.Extensions;
-using ElevenLabs.Models;
-using ElevenLabs.Voices;
-using Newtonsoft.Json;
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Text;
-using System.Threading;
-using System.Threading.Tasks;
-using UnityEngine;
-using Utilities.Async;
-using Utilities.Audio;
-using Utilities.Encoding.OggVorbis;
-using Utilities.WebRequestRest;
-
-namespace ElevenLabs.TextToSpeech
-{
-    /// <summary>
-    /// Access to convert text to synthesized speech.
-    /// </summary>
-    public sealed class TextToSpeechEndpoint : ElevenLabsBaseEndPoint
-    {
-        private const string HistoryItemId = "history-item-id";
-        private const string OutputFormatParameter = "output_format";
-        private const string OptimizeStreamingLatencyParameter = "optimize_streaming_latency";
-
-        public TextToSpeechEndpoint(ElevenLabsClient client) : base(client) { }
-
-        protected override string Root => "text-to-speech";
-
-        /// <summary>
-        /// Converts text into speech using a voice of your choice and returns audio.
-        /// </summary>
-        /// <param name="text">
-        /// Text input to synthesize speech for. Maximum 5000 characters.
-        /// </param>
-        /// <param name="voice">
-        /// <see cref="Voice"/> to use.
-        /// </param>
-        /// <param name="voiceSettings">
-        /// Optional, <see cref="VoiceSettings"/> that will override the default settings in <see cref="Voice.Settings"/>.
-        /// </param>
-        /// <param name="model">
-        /// Optional, <see cref="Model"/> to use. Defaults to <see cref="Model.MonoLingualV1"/>.
-        /// </param>
-        /// <param name="outputFormat">
-        /// Output format of the generated audio.<br/>
-        /// Defaults to <see cref="OutputFormat.MP3_44100_128"/>
-        /// </param>
-        /// <param name="optimizeStreamingLatency">
-        /// Optional, You can turn on latency optimizations at some cost of quality.
-        /// The best possible final latency varies by model.<br/>
-        /// Possible values:<br/>
-        /// 0 - default mode (no latency optimizations)<br/>
-        /// 1 - normal latency optimizations (about 50% of possible latency improvement of option 3)<br/>
-        /// 2 - strong latency optimizations (about 75% of possible latency improvement of option 3)<br/>
-        /// 3 - max latency optimizations<br/>
-        /// 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings
-        /// (best latency, but can mispronounce eg numbers and dates).
-        /// </param>
-        /// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
-        /// <returns><see cref="VoiceClip"/>.</returns>
-        public async Task<VoiceClip> TextToSpeechAsync(string text, Voice voice, VoiceSettings voiceSettings = null, Model model = null, OutputFormat outputFormat = OutputFormat.MP3_44100_128, int? optimizeStreamingLatency = null, CancellationToken cancellationToken = default)
-            => await TextToSpeechAsync(new TextToSpeechRequest(voice, text, Encoding.UTF8, voiceSettings ?? voice.Settings ?? await client.VoicesEndpoint.GetDefaultVoiceSettingsAsync(cancellationToken), outputFormat, optimizeStreamingLatency, model), cancellationToken);
-
-        /// <summary>
-        /// Converts text into speech using a voice of your choice and returns audio.
-        /// </summary>
-        /// <param name="request"><see cref="TextToSpeechRequest"/>.</param>
-        /// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
-        /// <returns><see cref="VoiceClip"/>.</returns>
-        public async Task<VoiceClip> TextToSpeechAsync(TextToSpeechRequest request, CancellationToken cancellationToken = default)
-        {
-            var payload = JsonConvert.SerializeObject(request, ElevenLabsClient.JsonSerializationOptions);
-            var parameters = new Dictionary<string, string>
-            {
-                { OutputFormatParameter, request.OutputFormat.ToString().ToLower() }
-            };
-
-            if (request.OptimizeStreamingLatency.HasValue)
-            {
-                parameters.Add(OptimizeStreamingLatencyParameter, request.OptimizeStreamingLatency.Value.ToString());
-            }
-
-            var response = await Rest.PostAsync(GetUrl($"/{request.Voice.Id}", parameters), payload, new RestParameters(client.DefaultRequestHeaders), cancellationToken);
-            response.Validate(EnableDebug);
-
-            if (!response.Headers.TryGetValue(HistoryItemId, out var clipId))
-            {
-                throw new ArgumentException("Failed to parse clip id!");
-            }
-
-            var audioType = request.OutputFormat.GetAudioType();
-            var extension = audioType switch
-            {
-                AudioType.MPEG => "mp3",
-                AudioType.OGGVORBIS => "ogg",
-                _ => throw new ArgumentOutOfRangeException($"Unsupported {nameof(AudioType)}: {audioType}")
-            };
-            var downloadDirectory = await GetCacheDirectoryAsync(request.Voice);
-            var cachedPath = $"{downloadDirectory}/{clipId}.{extension}";
-
-            if (!File.Exists(cachedPath))
-            {
-                switch (audioType)
-                {
-                    case AudioType.MPEG:
-                        await File.WriteAllBytesAsync(cachedPath, response.Data, cancellationToken).ConfigureAwait(false);
-                        break;
-                    case AudioType.OGGVORBIS:
-                        var pcmData = PCMEncoder.Decode(response.Data, PCMFormatSize.SixteenBit);
-                        var frequency = request.OutputFormat switch
-                        {
-                            OutputFormat.PCM_16000 => 16000,
-                            OutputFormat.PCM_22050 => 22050,
-                            OutputFormat.PCM_24000 => 24000,
-                            OutputFormat.PCM_44100 => 44100,
-                            _ => throw new ArgumentOutOfRangeException(nameof(request.OutputFormat), request.OutputFormat, null)
-                        };
-                        var oggBytes = await OggEncoder.ConvertToBytesAsync(pcmData, frequency, 1, cancellationToken: cancellationToken).ConfigureAwait(false);
-                        await File.WriteAllBytesAsync(cachedPath, oggBytes, cancellationToken).ConfigureAwait(false);
-                        break;
-                    default:
-                        throw new ArgumentOutOfRangeException($"Unsupported {nameof(AudioType)}: {audioType}");
-                }
-            }
-
-            await Awaiters.UnityMainThread;
-            var audioClip = await Rest.DownloadAudioClipAsync($"file://{cachedPath}", audioType, parameters: new RestParameters(debug: EnableDebug), cancellationToken: cancellationToken);
-            return new VoiceClip(clipId, request.Text, request.Voice, audioClip, cachedPath);
-        }
-
-        /// <summary>
-        /// Converts text into speech using a voice of your choice and returns audio as an audio stream.
-        /// </summary>
-        /// <param name="text">
-        /// Text input to synthesize speech for. Maximum 5000 characters.
-        /// </param>
-        /// <param name="voice">
-        /// <see cref="Voice"/> to use.
-        /// </param>
-        /// <param name="partialClipCallback">
-        /// Optional, Callback to enable streaming audio as it comes in.<br/>
-        /// Returns partial <see cref="VoiceClip"/>.
-        /// </param>
-        /// <param name="voiceSettings">
-        /// Optional, <see cref="VoiceSettings"/> that will override the default settings in <see cref="Voice.Settings"/>.
-        /// </param>
-        /// <param name="model">
-        /// Optional, <see cref="Model"/> to use. Defaults to <see cref="Model.MonoLingualV1"/>.
-        /// </param>
-        /// <param name="outputFormat">
-        /// Output format of the generated audio.<br/>
-        /// Note: Must be PCM format to stream audio in Unity!<br/>
-        /// Defaults to <see cref="OutputFormat.PCM_24000"/>.
-        /// </param>
-        /// <param name="optimizeStreamingLatency">
-        /// Optional, You can turn on latency optimizations at some cost of quality.
-        /// The best possible final latency varies by model.<br/>
-        /// Possible values:<br/>
-        /// 0 - default mode (no latency optimizations)<br/>
-        /// 1 - normal latency optimizations (about 50% of possible latency improvement of option 3)<br/>
-        /// 2 - strong latency optimizations (about 75% of possible latency improvement of option 3)<br/>
-        /// 3 - max latency optimizations<br/>
-        /// 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings
-        /// (best latency, but can mispronounce eg numbers and dates).
-        /// </param>
-        /// <param name="cancellationToken">
-        /// Optional, <see cref="CancellationToken"/>.
-        /// </param>
-        /// <returns>Downloaded clip path, and the loaded audio clip.</returns>
-        public async Task<VoiceClip> StreamTextToSpeechAsync(string text, Voice voice, Action<AudioClip> partialClipCallback, VoiceSettings voiceSettings = null, Model model = null, OutputFormat outputFormat = OutputFormat.PCM_24000, int? optimizeStreamingLatency = null, CancellationToken cancellationToken = default)
-            => await StreamTextToSpeechAsync(new TextToSpeechRequest(voice, text, Encoding.UTF8, voiceSettings ?? voice.Settings ?? await client.VoicesEndpoint.GetDefaultVoiceSettingsAsync(cancellationToken), outputFormat, optimizeStreamingLatency, model), partialClipCallback, cancellationToken);
-
-        /// <summary>
-        /// Converts text into speech using a voice of your choice and returns audio as an audio stream.
-        /// </summary>
-        /// <param name="request"><see cref="TextToSpeechRequest"/>.</param>
-        /// <param name="partialClipCallback">
-        /// Optional, Callback to enable streaming audio as it comes in.<br/>
-        /// Returns partial <see cref="VoiceClip"/>.
-        /// </param>
-        /// <param name="cancellationToken">
-        /// Optional, <see cref="CancellationToken"/>.
-        /// </param>
-        /// <returns>Downloaded clip path, and the loaded audio clip.</returns>
-        public async Task<VoiceClip> StreamTextToSpeechAsync(TextToSpeechRequest request, Action<AudioClip> partialClipCallback, CancellationToken cancellationToken = default)
-        {
-            var frequency = request.OutputFormat switch
-            {
-                OutputFormat.MP3_44100_64 => throw new InvalidOperationException($"{nameof(request.OutputFormat)} must be a PCM format for streaming!"),
-                OutputFormat.MP3_44100_96 => throw new InvalidOperationException($"{nameof(request.OutputFormat)} must be a PCM format for streaming!"),
-                OutputFormat.MP3_44100_128 => throw new InvalidOperationException($"{nameof(request.OutputFormat)} must be a PCM format for streaming!"),
-                OutputFormat.MP3_44100_192 => throw new InvalidOperationException($"{nameof(request.OutputFormat)} must be a PCM format for streaming!"),
-                OutputFormat.PCM_16000 => 16000,
-                OutputFormat.PCM_22050 => 22050,
-                OutputFormat.PCM_24000 => 24000,
-                OutputFormat.PCM_44100 => 44100,
-                _ => throw new ArgumentOutOfRangeException(nameof(request.OutputFormat), request.OutputFormat, null)
-            };
-            var payload = JsonConvert.SerializeObject(request, ElevenLabsClient.JsonSerializationOptions);
-            var parameters = new Dictionary<string, string>
-            {
-                { OutputFormatParameter, request.OutputFormat.ToString().ToLower() }
-            };
-
-            if (request.OptimizeStreamingLatency.HasValue)
-            {
-                parameters.Add(OptimizeStreamingLatencyParameter, request.OptimizeStreamingLatency.Value.ToString());
-            }
-
-            var part = 0;
-            var response = await Rest.PostAsync(GetUrl($"/{request.Voice.Id}/stream", parameters), payload, StreamCallback, eventChunkSize: 8192, new RestParameters(client.DefaultRequestHeaders), cancellationToken).ConfigureAwait(true);
-            response.Validate(EnableDebug);
-
-            if (!response.Headers.TryGetValue(HistoryItemId, out var clipId))
-            {
-                throw new ArgumentException("Failed to parse clip id!");
-            }
-
-            var pcmData = PCMEncoder.Decode(response.Data);
-            var downloadDirectory = await GetCacheDirectoryAsync(request.Voice);
-            var cachedPath = $"{downloadDirectory}/{clipId}.ogg";
-            var oggBytes = await OggEncoder.ConvertToBytesAsync(pcmData, frequency, 1, cancellationToken: cancellationToken);
-            await File.WriteAllBytesAsync(cachedPath, oggBytes, cancellationToken: cancellationToken);
-            var fullClip = await Rest.DownloadAudioClipAsync($"file://{cachedPath}", AudioType.OGGVORBIS, parameters: new RestParameters(debug: EnableDebug), compressed: false, streamingAudio: true, cancellationToken: cancellationToken);
-            return new VoiceClip(clipId, request.Text, request.Voice, fullClip, cachedPath);
-
-            void StreamCallback(Response partialResponse)
-            {
-                try
-                {
-                    if (!partialResponse.Headers.TryGetValue(HistoryItemId, out clipId))
-                    {
-                        throw new ArgumentException("Failed to parse clip id!");
-                    }
-
-                    var chunk = PCMEncoder.Decode(partialResponse.Data);
-                    var audioClip = AudioClip.Create($"{clipId}_{++part}", chunk.Length, 1, frequency, false);
-
-                    if (!audioClip.SetData(chunk, 0))
-                    {
-                        Debug.LogError("Failed to set pcm data to partial clip.");
-                        return;
-                    }
-
-                    partialClipCallback.Invoke(audioClip);
-                }
-                catch (Exception e)
-                {
-                    Debug.LogError(e);
-                }
-            }
-        }
-
-        private static async Task<string> GetCacheDirectoryAsync(Voice voice)
-        {
-            await Rest.ValidateCacheDirectoryAsync();
-            return Rest.DownloadCacheDirectory
-                .CreateNewDirectory(nameof(ElevenLabs))
-                .CreateNewDirectory(nameof(TextToSpeech))
-                .CreateNewDirectory(voice.Id);
-        }
-    }
-}
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using ElevenLabs.Extensions;
+using ElevenLabs.Models;
+using ElevenLabs.Voices;
+using Newtonsoft.Json;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+using UnityEngine;
+using Utilities.Audio;
+using Utilities.Encoding.OggVorbis;
+using Utilities.Encoding.Wav;
+using Utilities.WebRequestRest;
+
+namespace ElevenLabs.TextToSpeech
+{
+    /// <summary>
+    /// Access to convert text to synthesized speech.
+    /// </summary>
+    public sealed class TextToSpeechEndpoint : ElevenLabsBaseEndPoint
+    {
+        private const string HistoryItemId = "history-item-id";
+        private const string OutputFormatParameter = "output_format";
+        private const string OptimizeStreamingLatencyParameter = "optimize_streaming_latency";
+
+        public TextToSpeechEndpoint(ElevenLabsClient client) : base(client) { }
+
+        protected override string Root => "text-to-speech";
+
+        [Obsolete("use overload with TextToSpeechRequest")]
+        public async Task<VoiceClip> TextToSpeechAsync(string text, Voice voice, VoiceSettings voiceSettings = null, Model model = null, OutputFormat outputFormat = OutputFormat.MP3_44100_128, int? optimizeStreamingLatency = null, CancellationToken cancellationToken = default)
+            => await TextToSpeechAsync(
+                new TextToSpeechRequest(
+                    voice,
+                    text,
+                    Encoding.UTF8,
+                    voiceSettings ?? voice.Settings ?? await client.VoicesEndpoint.GetDefaultVoiceSettingsAsync(cancellationToken),
+                    outputFormat,
+                    optimizeStreamingLatency,
+                    model),
+                cancellationToken);
+
+        [Obsolete("use TextToSpeechAsync with VoiceClip partialClipCallback")]
+        public async Task<VoiceClip> StreamTextToSpeechAsync(string text, Voice voice, Action<AudioClip> partialClipCallback, VoiceSettings voiceSettings = null, Model model = null, OutputFormat outputFormat = OutputFormat.PCM_24000, int? optimizeStreamingLatency = null, CancellationToken cancellationToken = default)
+            => await StreamTextToSpeechAsync(
+                new TextToSpeechRequest(
+                    voice,
+                    text,
+                    Encoding.UTF8,
+                    voiceSettings ?? voice.Settings ?? await client.VoicesEndpoint.GetDefaultVoiceSettingsAsync(cancellationToken),
+                    outputFormat,
+                    optimizeStreamingLatency,
+                    model),
+                partialClipCallback, cancellationToken);
+
+        [Obsolete("use TextToSpeechAsync with VoiceClip partialClipCallback")]
+        public async Task<VoiceClip> StreamTextToSpeechAsync(TextToSpeechRequest request, Action<AudioClip> partialClipCallback, CancellationToken cancellationToken = default)
+            => await TextToSpeechAsync(request, voiceClip =>
+            {
+                partialClipCallback.Invoke(voiceClip.AudioClip);
+            }, cancellationToken);
+
+        /// <summary>
+        /// Converts text to synthesized speech.
+        /// </summary>
+        /// <param name="request"><see cref="TextToSpeechRequest"/>.</param>
+        /// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
+        /// <returns><see cref="VoiceClip"/>.</returns>
+        public async Task<VoiceClip> TextToSpeechAsync(TextToSpeechRequest request, CancellationToken cancellationToken = default)
+        {
+            request.VoiceSettings ??= await client.VoicesEndpoint.GetDefaultVoiceSettingsAsync(cancellationToken);
+            var payload = JsonConvert.SerializeObject(request, ElevenLabsClient.JsonSerializationOptions);
+            var parameters = CreateRequestParameters(request);
+            var endpoint = $"/{request.Voice}";
+
+            if (request.WithTimestamps)
+            {
+                endpoint += "/with-timestamps";
+            }
+
+            var response = await Rest.PostAsync(GetUrl(endpoint, parameters), payload, new RestParameters(client.DefaultRequestHeaders), cancellationToken);
+            response.Validate(EnableDebug);
+
+            if (!response.Headers.TryGetValue(HistoryItemId, out var clipId))
+            {
+                throw new ArgumentException("Failed to parse clip id!");
+            }
+
+            byte[] audioData;
+            TimestampedTranscriptCharacter[] transcriptionCharacters = null;
+
+            if (request.WithTimestamps)
+            {
+                var transcriptResponse = JsonConvert.DeserializeObject<TranscriptionResponse>(response.Body, ElevenLabsClient.JsonSerializationOptions);
+                audioData = transcriptResponse.AudioBytes;
+                transcriptionCharacters = transcriptResponse.Alignment;
+            }
+            else
+            {
+                audioData = response.Data;
+            }
+
+            string cachedPath = null;
+
+            if (request.CacheFormat != CacheFormat.None)
+            {
+                cachedPath = await SaveAudioToCache(audioData, clipId, request.Voice, request.OutputFormat, request.CacheFormat, cancellationToken).ConfigureAwait(true);
+            }
+
+            return new VoiceClip(clipId, request.Text, request.Voice, new ReadOnlyMemory<byte>(audioData), request.OutputFormat.GetSampleRate(), cachedPath)
+            {
+                TimestampedTranscriptCharacters = transcriptionCharacters
+            };
+        }
+
+        /// <summary>
+        /// Converts text to synthesized speech.
+        /// </summary>
+        /// <param name="request"><see cref="TextToSpeechRequest"/>.</param>
+        /// <param name="partialClipCallback">Partial <see cref="VoiceClip"/> callback with streaming data.</param>
+        /// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
+        /// <returns><see cref="VoiceClip"/>.</returns>
+        public async Task<VoiceClip> TextToSpeechAsync(TextToSpeechRequest request, Action<VoiceClip> partialClipCallback, CancellationToken cancellationToken = default)
+        {
+            if (request.OutputFormat is not OutputFormat.PCM_16000 and not OutputFormat.PCM_22050 and not OutputFormat.PCM_24000 and not OutputFormat.PCM_44100)
+            {
+                Debug.LogWarning($"{nameof(request.OutputFormat)} must be a PCM format! defaulting to 24000");
+                request.OutputFormat = OutputFormat.PCM_24000;
+            }
+
+            request.VoiceSettings ??= await client.VoicesEndpoint.GetDefaultVoiceSettingsAsync(cancellationToken);
+
+            var frequency = request.OutputFormat.GetSampleRate();
+            var payload = JsonConvert.SerializeObject(request, ElevenLabsClient.JsonSerializationOptions);
+            var parameters = CreateRequestParameters(request);
+            var endpoint = $"/{request.Voice.Id}/stream";
+
+            var part = 0;
+            StringBuilder textBuffer;
+            List<byte> accumulatedPCMData = null;
+            List<TimestampedTranscriptCharacter> accumulatedTranscriptData = null;
+            Action<Response> streamCallback;
+
+            if (request.WithTimestamps)
+            {
+                endpoint += "/with-timestamps";
+                textBuffer = new StringBuilder();
+                accumulatedPCMData = new List<byte>();
+                accumulatedTranscriptData = new List<TimestampedTranscriptCharacter>();
+                streamCallback = TranscriptionStreamCallback;
+            }
+            else
+            {
+                streamCallback = StreamCallback;
+            }
+
+            var response = await Rest.PostAsync(GetUrl(endpoint, parameters), payload, streamCallback, 8192, new RestParameters(client.DefaultRequestHeaders), cancellationToken);
+            response.Validate(EnableDebug);
+
+            if (!response.Headers.TryGetValue(HistoryItemId, out var clipId))
+            {
+                throw new ArgumentException("Failed to parse clip id!");
+            }
+
+            var audioData = request.WithTimestamps ? accumulatedPCMData!.ToArray() : response.Data;
+            string cachedPath = null;
+
+            if (request.CacheFormat != CacheFormat.None)
+            {
+                cachedPath = await SaveAudioToCache(audioData, clipId, request.Voice, request.OutputFormat, request.CacheFormat, cancellationToken).ConfigureAwait(true);
+            }
+
+            return new VoiceClip(clipId, request.Text, request.Voice, new ReadOnlyMemory<byte>(audioData), request.OutputFormat.GetSampleRate(), cachedPath)
+            {
+                TimestampedTranscriptCharacters = accumulatedTranscriptData?.ToArray() ?? Array.Empty<TimestampedTranscriptCharacter>()
+            };
+
+            void StreamCallback(Response partialResponse)
+            {
+                try
+                {
+                    if (!partialResponse.Headers.TryGetValue(HistoryItemId, out clipId))
+                    {
+                        throw new ArgumentException("Failed to parse clip id!");
+                    }
+
+                    partialClipCallback.Invoke(new VoiceClip($"{clipId}_{++part}", request.Text, request.Voice, new ReadOnlyMemory<byte>(partialResponse.Data), frequency));
+                }
+                catch (Exception e)
+                {
+                    Debug.LogError(e);
+                }
+            }
+
+            void TranscriptionStreamCallback(Response partialResponse)
+            {
+                try
+                {
+                    if (!partialResponse.Headers.TryGetValue(HistoryItemId, out clipId))
+                    {
+                        throw new ArgumentException("Failed to parse clip id!");
+                    }
+
+                    var chunkText = Encoding.UTF8.GetString(partialResponse.Data);
+                    textBuffer.Append(chunkText);
+
+                    // Process any complete lines
+                    var text = textBuffer.ToString();
+                    var lines = text.Split('\n');
+
+                    // Keep the last potentially incomplete line
+                    textBuffer.Clear();
+                    textBuffer.Append(lines[^1]);
+
+                    // Process all complete lines
+                    for (var i = 0; i < lines.Length - 1; i++)
+                    {
+                        ProcessTranscribedVoiceClip(lines[i].Trim());
+                    }
+                }
+                catch (Exception e)
+                {
+                    Debug.LogError(e);
+                }
+            }
+
+            void ProcessTranscribedVoiceClip(string line)
+            {
+                if (string.IsNullOrEmpty(line)) { return; }
+
+                try
+                {
+                    var partialTranscription = JsonConvert.DeserializeObject<TranscriptionResponse>(line, ElevenLabsClient.JsonSerializationOptions);
+                    var timestampedTranscriptCharacters = (TimestampedTranscriptCharacter[])partialTranscription.Alignment ?? Array.Empty<TimestampedTranscriptCharacter>();
+
+                    try
+                    {
+                        partialClipCallback.Invoke(new VoiceClip($"{clipId}_{++part}", request.Text, request.Voice, new ReadOnlyMemory<byte>(partialTranscription.AudioBytes), frequency)
+                        {
+                            TimestampedTranscriptCharacters = timestampedTranscriptCharacters
+                        });
+                    }
+                    finally
+                    {
+                        accumulatedPCMData.AddRange(partialTranscription.AudioBytes);
+                        accumulatedTranscriptData.AddRange(timestampedTranscriptCharacters);
+                    }
+                }
+                catch (JsonReaderException e)
+                {
+                    Debug.LogWarning($"Failed to parse line as JSON: {e.Message}");
+                }
+            }
+        }
+
+        private static Dictionary<string, string> CreateRequestParameters(TextToSpeechRequest request)
+        {
+            var parameters = new Dictionary<string, string>
+            {
+                { OutputFormatParameter, request.OutputFormat.ToString().ToLower() }
+            };
+
+            if (request.OptimizeStreamingLatency.HasValue)
+            {
+                parameters.Add(OptimizeStreamingLatencyParameter, request.OptimizeStreamingLatency.Value.ToString());
+            }
+
+            return parameters;
+        }
+
+        private static async Task<string> SaveAudioToCache(byte[] audioData, string clipId, Voice voice, OutputFormat outputFormat, CacheFormat cacheFormat, CancellationToken cancellationToken)
+        {
+            string extension;
+            AudioType audioType;
+
+            if (outputFormat is OutputFormat.MP3_44100_64 or OutputFormat.MP3_44100_96 or OutputFormat.MP3_44100_128 or OutputFormat.MP3_44100_128)
+            {
+                extension = "mp3";
+                audioType = AudioType.MPEG;
+            }
+            else
+            {
+                switch (cacheFormat)
+                {
+                    case CacheFormat.Wav:
+                        extension = "wav";
+                        audioType = AudioType.WAV;
+                        break;
+                    case CacheFormat.Ogg:
+                        extension = "ogg";
+                        audioType = AudioType.OGGVORBIS;
+                        break;
+                    case CacheFormat.None:
+                    default:
+                        throw new ArgumentOutOfRangeException(nameof(cacheFormat), cacheFormat, null);
+                }
+            }
+
+            await Rest.ValidateCacheDirectoryAsync();
+            var downloadDirectory = Rest.DownloadCacheDirectory
+                .CreateNewDirectory(nameof(ElevenLabs))
+                .CreateNewDirectory(nameof(TextToSpeech))
+                .CreateNewDirectory(voice.Id);
+            var cachedPath = $"{downloadDirectory}/{clipId}.{extension}";
+
+            if (!File.Exists(cachedPath))
+            {
+                switch (audioType)
+                {
+                    case AudioType.MPEG:
+                        await File.WriteAllBytesAsync(cachedPath, audioData, cancellationToken).ConfigureAwait(false);
+                        break;
+                    case AudioType.OGGVORBIS:
+                        var oggBytes = await OggEncoder.ConvertToBytesAsync(PCMEncoder.Decode(audioData), sampleRate: outputFormat.GetSampleRate(), channels: 1, cancellationToken: cancellationToken).ConfigureAwait(false);
+                        await File.WriteAllBytesAsync(cachedPath, oggBytes, cancellationToken).ConfigureAwait(false);
+                        break;
+                    case AudioType.WAV:
+                        await WavEncoder.WriteToFileAsync(cachedPath, audioData, sampleRate: outputFormat.GetSampleRate(), channels: 1, cancellationToken: cancellationToken).ConfigureAwait(false);
+                        break;
+                }
+            }
+
+            return cachedPath;
+        }
+
+    }
+}
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/TextToSpeechRequest.cs b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/TextToSpeechRequest.cs
index 0beb36f..5ab224a 100644
--- a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/TextToSpeechRequest.cs
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/TextToSpeechRequest.cs
@@ -12,6 +12,47 @@ namespace ElevenLabs.TextToSpeech
     [Preserve]
     public sealed class TextToSpeechRequest
     {
+        /// <summary>
+        /// Constructor.
+        /// </summary>
+        /// <param name="voice">
+        /// <see cref="Voice"/> to use.
+        /// </param>
+        /// <param name="text">
+        /// Text input to synthesize speech for. Maximum 5000 characters.
+        /// </param>
+        /// <param name="encoding"><see cref="Encoding"/> to use for <see cref="text"/>.</param>
+        /// <param name="voiceSettings">
+        /// Optional, <see cref="VoiceSettings"/> that will override the default settings in <see cref="Voice.Settings"/>.
+        /// </param>
+        /// <param name="model">
+        /// Optional, <see cref="Model"/> to use. Defaults to <see cref="Model.TurboV2_5"/>.
+        /// </param>
+        /// <param name="outputFormat">
+        /// Output format of the generated audio.<br/>
+        /// Defaults to <see cref="OutputFormat.MP3_44100_128"/>
+        /// </param>
+        /// <param name="optimizeStreamingLatency">
+        /// Optional, You can turn on latency optimizations at some cost of quality.
+        /// The best possible final latency varies by model.<br/>
+        /// Possible values:<br/>
+        /// 0 - default mode (no latency optimizations)<br/>
+        /// 1 - normal latency optimizations (about 50% of possible latency improvement of option 3)<br/>
+        /// 2 - strong latency optimizations (about 75% of possible latency improvement of option 3)<br/>
+        /// 3 - max latency optimizations<br/>
+        /// 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings
+        /// (best latency, but can mispronounce e.g. numbers and dates).
+        /// </param>
+        /// <param name="previousText"></param>
+        /// <param name="nextText"></param>
+        /// <param name="previousRequestIds"></param>
+        /// <param name="nextRequestIds"></param>
+        /// <param name="languageCode">
+        /// Optional, Language code (ISO 639-1) used to enforce a language for the model. Currently only <see cref="Model.TurboV2_5"/> supports language enforcement. 
+        /// For other models, an error will be returned if language code is provided.
+        /// </param>
+        /// <param name="cacheFormat"></param>
+        /// <param name="withTimestamps"></param>
         [Preserve]
         public TextToSpeechRequest(
             Voice voice,
@@ -21,7 +62,13 @@ public TextToSpeechRequest(
             OutputFormat outputFormat = OutputFormat.MP3_44100_128,
             int? optimizeStreamingLatency = null,
             Model model = null,
-            string previousText = null)
+            string previousText = null,
+            string nextText = null,
+            string[] previousRequestIds = null,
+            string[] nextRequestIds = null,
+            string languageCode = null,
+            CacheFormat cacheFormat = CacheFormat.Wav,
+            bool withTimestamps = false)
         {
             if (string.IsNullOrWhiteSpace(text))
             {
@@ -45,12 +92,26 @@ public TextToSpeechRequest(
             }
 
             Text = text;
-            Model = model ?? Models.Model.MultiLingualV2;
-            Voice = voice;
-            VoiceSettings = voiceSettings ?? voice.Settings ?? throw new ArgumentNullException(nameof(voiceSettings));
-            PreviousText = previousText;
+            Model = model ?? Models.Model.TurboV2_5;
+            Voice = string.IsNullOrWhiteSpace(voice) ? Voice.Adam : voice;
+            VoiceSettings = voiceSettings ?? voice.Settings;
             OutputFormat = outputFormat;
             OptimizeStreamingLatency = optimizeStreamingLatency;
+            PreviousText = previousText;
+            NextText = nextText;
+            if (previousRequestIds?.Length > 3)
+            {
+                previousRequestIds = previousRequestIds[..3];
+            }
+            PreviousRequestIds = previousRequestIds;
+            if (nextRequestIds?.Length > 3)
+            {
+                nextRequestIds = nextRequestIds[..3];
+            }
+            NextRequestIds = nextRequestIds;
+            LanguageCode = languageCode;
+            CacheFormat = cacheFormat;
+            WithTimestamps = withTimestamps;
         }
 
         [Preserve]
@@ -70,15 +131,45 @@ public TextToSpeechRequest(
         public VoiceSettings VoiceSettings { get; internal set; }
 
         [Preserve]
-        [JsonProperty("previous_text")]
-        public string PreviousText { get; }
+        [JsonIgnore]
+        public OutputFormat OutputFormat { get; internal set; }
 
         [Preserve]
         [JsonIgnore]
-        public OutputFormat OutputFormat { get; }
+        public CacheFormat CacheFormat { get; internal set; }
 
         [Preserve]
         [JsonIgnore]
         public int? OptimizeStreamingLatency { get; }
+
+        [Preserve]
+        [JsonProperty("previous_text")]
+        public string PreviousText { get; }
+
+        [Preserve]
+        [JsonProperty("next_text")]
+        public string NextText { get; }
+
+        /// <remarks>
+        /// A maximum of three next or previous history item ids can be sent
+        /// </remarks>
+        [Preserve]
+        [JsonProperty("previous_request_ids")]
+        public string[] PreviousRequestIds { get; }
+
+        /// <remarks>
+        /// A maximum of three next or previous history item ids can be sent
+        /// </remarks>
+        [Preserve]
+        [JsonProperty("next_request_ids")]
+        public string[] NextRequestIds { get; }
+
+        [Preserve]
+        [JsonProperty("language_code")]
+        public string LanguageCode { get; }
+
+        [Preserve]
+        [JsonIgnore]
+        public bool WithTimestamps { get; }
     }
 }
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/TranscriptionResponse.cs b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/TranscriptionResponse.cs
new file mode 100644
index 0000000..52176a4
--- /dev/null
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/TranscriptionResponse.cs
@@ -0,0 +1,30 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using Newtonsoft.Json;
+using UnityEngine.Scripting;
+
+namespace ElevenLabs.TextToSpeech
+{
+    [Preserve]
+    internal sealed class TranscriptionResponse
+    {
+        [Preserve]
+        [JsonConstructor]
+        public TranscriptionResponse(
+            [JsonProperty("audio_base64")] string audioBase64,
+            [JsonProperty("alignment")] Alignment alignment)
+        {
+            AudioBase64 = audioBase64;
+            Alignment = alignment;
+        }
+
+        [JsonProperty("audio_base64")]
+        public string AudioBase64 { get; }
+
+        [JsonIgnore]
+        public byte[] AudioBytes => System.Convert.FromBase64String(AudioBase64);
+
+        [JsonProperty("alignment")]
+        public Alignment Alignment { get; }
+    }
+}
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/TranscriptionResponse.cs.meta b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/TranscriptionResponse.cs.meta
new file mode 100644
index 0000000..4653e23
--- /dev/null
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/TextToSpeech/TranscriptionResponse.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 317e44226b6324a47bc7b2dde458e978
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Voices/Voice.cs b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Voices/Voice.cs
index 1bcc405..206e73e 100644
--- a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Voices/Voice.cs
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Voices/Voice.cs
@@ -94,7 +94,7 @@ public string Id
         public VoiceSettings Settings { get; internal set; }
 
         [Preserve]
-        public static implicit operator string(Voice voice) => voice.ToString();
+        public static implicit operator string(Voice voice) => voice?.ToString();
 
         [Preserve]
         public override string ToString() => Id;
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Voices/VoicesEndpoint.cs b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Voices/VoicesEndpoint.cs
index ad67c0a..0f8b142 100644
--- a/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Voices/VoicesEndpoint.cs
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Voices/VoicesEndpoint.cs
@@ -11,8 +11,7 @@
 using UnityEngine;
 using UnityEngine.Scripting;
 using Utilities.Async;
-using Utilities.Audio;
-using Utilities.Encoding.OggVorbis;
+using Utilities.Encoding.Wav;
 using Utilities.WebRequestRest;
 
 namespace ElevenLabs.Voices
@@ -329,11 +328,11 @@ public async Task<VoiceClip> DownloadVoiceSampleAudioAsync(Voice voice, Sample s
                 .CreateNewDirectory(voice.Id)
                 .CreateNewDirectory("Samples");
             // TODO possibly handle other types?
-            var audioType = sample.MimeType.Contains("mpeg") ? AudioType.MPEG : AudioType.OGGVORBIS;
+            var audioType = sample.MimeType.Contains("mpeg") ? AudioType.MPEG : AudioType.WAV;
             var extension = audioType switch
             {
                 AudioType.MPEG => "mp3",
-                AudioType.OGGVORBIS => "ogg",
+                AudioType.WAV => "wav",
                 _ => throw new ArgumentOutOfRangeException($"Unsupported {nameof(AudioType)}: {audioType}")
             };
             var cachedPath = Path.Combine(downloadDirectory, $"{sample.Id}.{extension}");
@@ -348,11 +347,9 @@ public async Task<VoiceClip> DownloadVoiceSampleAudioAsync(Voice voice, Sample s
                     case AudioType.MPEG:
                         await File.WriteAllBytesAsync(cachedPath, response.Data, cancellationToken).ConfigureAwait(false);
                         break;
-                    case AudioType.OGGVORBIS:
-                        var pcmData = PCMEncoder.Decode(response.Data, PCMFormatSize.SixteenBit);
+                    case AudioType.WAV:
                         var sampleRate = 44100; // TODO unknown sample rate.
-                        var oggBytes = await OggEncoder.ConvertToBytesAsync(pcmData, sampleRate, 1, cancellationToken: cancellationToken).ConfigureAwait(false);
-                        await File.WriteAllBytesAsync(cachedPath, oggBytes, cancellationToken: cancellationToken).ConfigureAwait(false);
+                        await WavEncoder.WriteToFileAsync(cachedPath, response.Data, 1, sampleRate, cancellationToken: cancellationToken).ConfigureAwait(false);
                         break;
                     default:
                         throw new ArgumentOutOfRangeException($"Unsupported {nameof(AudioType)}: {audioType}");
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Samples~/TextToSpeech/ElevenLabs.Demo.asmdef b/ElevenLabs/Packages/com.rest.elevenlabs/Samples~/TextToSpeech/ElevenLabs.Demo.asmdef
index 9e57626..37adb79 100644
--- a/ElevenLabs/Packages/com.rest.elevenlabs/Samples~/TextToSpeech/ElevenLabs.Demo.asmdef
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Samples~/TextToSpeech/ElevenLabs.Demo.asmdef
@@ -4,7 +4,8 @@
     "references": [
         "GUID:85797d0f100db6c43bf118660781167b",
         "GUID:a6609af893242c7438d701ddd4cce46a",
-        "GUID:7958db66189566541a6363568aee1575"
+        "GUID:7958db66189566541a6363568aee1575",
+        "GUID:d25c28436b1dcc9408d86f49a0f5210b"
     ],
     "includePlatforms": [],
     "excludePlatforms": [],
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Samples~/TextToSpeech/TextToSpeechDemo.cs b/ElevenLabs/Packages/com.rest.elevenlabs/Samples~/TextToSpeech/TextToSpeechDemo.cs
index 0e13968..c3fc85d 100644
--- a/ElevenLabs/Packages/com.rest.elevenlabs/Samples~/TextToSpeech/TextToSpeechDemo.cs
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Samples~/TextToSpeech/TextToSpeechDemo.cs
@@ -1,13 +1,16 @@
 // Licensed under the MIT License. See LICENSE in the project root for license information.
 
 using ElevenLabs.Models;
+using ElevenLabs.TextToSpeech;
 using ElevenLabs.Voices;
 using System;
 using System.Collections.Generic;
 using System.Linq;
 using System.Threading;
+using System.Threading.Tasks;
 using UnityEngine;
 using Utilities.Async;
+using Utilities.Audio;
 
 namespace ElevenLabs.Demo
 {
@@ -30,10 +33,11 @@ public class TextToSpeechDemo : MonoBehaviour
         [SerializeField]
         private AudioSource audioSource;
 
-        private readonly Queue<AudioClip> streamClipQueue = new();
+        private readonly Queue<float> sampleQueue = new();
 
 #if !UNITY_2022_3_OR_NEWER
         private readonly CancellationTokenSource lifetimeCts = new();
+        // ReSharper disable once InconsistentNaming
         private CancellationToken destroyCancellationToken => lifetimeCts.Token;
 #endif
 
@@ -61,16 +65,22 @@ private async void Start()
                     voice = (await api.VoicesEndpoint.GetAllVoicesAsync(destroyCancellationToken)).FirstOrDefault();
                 }
 
-                streamClipQueue.Clear();
-                var streamQueueCts = CancellationTokenSource.CreateLinkedTokenSource(destroyCancellationToken);
-                PlayStreamQueue(streamQueueCts.Token);
-                var voiceClip = await api.TextToSpeechEndpoint.StreamTextToSpeechAsync(message, voice, partialClip =>
+
+                sampleQueue.Clear();
+                var request = new TextToSpeechRequest(voice, message, model: Model.EnglishTurboV2, outputFormat: OutputFormat.PCM_24000);
+                var voiceClip = await api.TextToSpeechEndpoint.TextToSpeechAsync(request, partialClip =>
                 {
-                    streamClipQueue.Enqueue(partialClip);
-                }, model: Model.EnglishTurboV2, cancellationToken: destroyCancellationToken);
+                    const int sampleRate = 44100; // default unity audio clip sample rate
+                    // we have to resample the partial clip to the unity audio clip sample rate. Ideally use PCM_44100
+                    var resampled = PCMEncoder.Resample(partialClip.ClipSamples, partialClip.SampleRate, sampleRate);
+                    foreach (var sample in resampled)
+                    {
+                        sampleQueue.Enqueue(sample);
+                    }
+                }, cancellationToken: destroyCancellationToken);
                 audioSource.clip = voiceClip.AudioClip;
-                await new WaitUntil(() => streamClipQueue.Count == 0 && !audioSource.isPlaying);
-                streamQueueCts.Cancel();
+                await new WaitUntil(() => sampleQueue.Count == 0 || destroyCancellationToken.IsCancellationRequested);
+                destroyCancellationToken.ThrowIfCancellationRequested();
 
                 if (debug)
                 {
@@ -79,41 +89,38 @@ private async void Start()
             }
             catch (Exception e)
             {
-                Debug.LogError(e);
+                switch (e)
+                {
+                    case TaskCanceledException:
+                    case OperationCanceledException:
+                        break;
+                    default:
+                        Debug.LogException(e);
+                        break;
+                }
             }
         }
 
-#if !UNITY_2022_3_OR_NEWER
-        private void OnDestroy()
+        private void OnAudioFilterRead(float[] data, int channels)
         {
-            lifetimeCts.Cancel();
-            lifetimeCts.Dispose();
-        }
-#endif
+            if (sampleQueue.Count <= 0) { return; }
 
-        private async void PlayStreamQueue(CancellationToken cancellationToken)
-        {
-            try
+            for (var i = 0; i < data.Length; i += channels)
             {
-                await new WaitUntil(() => streamClipQueue.Count > 0);
-                var endOfFrame = new WaitForEndOfFrame();
+                var sample = sampleQueue.Dequeue();
 
-                do
+                for (var j = 0; j < channels; j++)
                 {
-                    if (!audioSource.isPlaying &&
-                        streamClipQueue.TryDequeue(out var clip))
-                    {
-                        Debug.Log($"playing partial clip: {clip.name}");
-                        audioSource.PlayOneShot(clip);
-                    }
-
-                    await endOfFrame;
-                } while (!cancellationToken.IsCancellationRequested);
-            }
-            catch (Exception e)
-            {
-                Debug.LogError(e);
+                    data[i + j] = sample;
+                }
             }
         }
+
+#if !UNITY_2022_3_OR_NEWER
+        private void OnDestroy()
+        {
+            lifetimeCts.Cancel();
+        }
+#endif
     }
 }
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Samples~/TextToSpeech/TextToSpeechDemo.unity b/ElevenLabs/Packages/com.rest.elevenlabs/Samples~/TextToSpeech/TextToSpeechDemo.unity
index 8f63880..a46f34b 100644
--- a/ElevenLabs/Packages/com.rest.elevenlabs/Samples~/TextToSpeech/TextToSpeechDemo.unity
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Samples~/TextToSpeech/TextToSpeechDemo.unity
@@ -38,7 +38,6 @@ RenderSettings:
   m_ReflectionIntensity: 1
   m_CustomReflection: {fileID: 0}
   m_Sun: {fileID: 0}
-  m_IndirectSpecularColor: {r: 0.44657898, g: 0.4964133, b: 0.5748178, a: 1}
   m_UseRadianceAmbientProbe: 0
 --- !u!157 &3
 LightmapSettings:
@@ -104,7 +103,7 @@ NavMeshSettings:
   serializedVersion: 2
   m_ObjectHideFlags: 0
   m_BuildSettings:
-    serializedVersion: 3
+    serializedVersion: 2
     agentTypeID: 0
     agentRadius: 0.5
     agentHeight: 2
@@ -117,7 +116,7 @@ NavMeshSettings:
     cellSize: 0.16666667
     manualTileSize: 0
     tileSize: 256
-    buildHeightMesh: 0
+    accuratePlacement: 0
     maxJobWorkers: 0
     preserveTilesOutsideBounds: 0
     debug:
@@ -209,13 +208,13 @@ Transform:
   m_PrefabInstance: {fileID: 0}
   m_PrefabAsset: {fileID: 0}
   m_GameObject: {fileID: 541418635}
-  serializedVersion: 2
   m_LocalRotation: {x: 0.40821788, y: -0.23456968, z: 0.10938163, w: 0.8754261}
   m_LocalPosition: {x: 0, y: 3, z: 0}
   m_LocalScale: {x: 1, y: 1, z: 1}
   m_ConstrainProportionsScale: 1
   m_Children: []
   m_Father: {fileID: 0}
+  m_RootOrder: 1
   m_LocalEulerAnglesHint: {x: 50, y: -30, z: 0}
 --- !u!1 &1232673082
 GameObject:
@@ -242,13 +241,13 @@ Transform:
   m_PrefabInstance: {fileID: 0}
   m_PrefabAsset: {fileID: 0}
   m_GameObject: {fileID: 1232673082}
-  serializedVersion: 2
   m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
   m_LocalPosition: {x: 0, y: 0, z: 0}
   m_LocalScale: {x: 1, y: 1, z: 1}
   m_ConstrainProportionsScale: 1
   m_Children: []
   m_Father: {fileID: 0}
+  m_RootOrder: 0
   m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
 --- !u!114 &1232673085
 MonoBehaviour:
@@ -262,6 +261,8 @@ MonoBehaviour:
   m_Script: {fileID: 11500000, guid: b1dff7556fcd4ac4d8514517487cdbb3, type: 3}
   m_Name: 
   m_EditorClassIdentifier: 
+  configuration: {fileID: 0}
+  debug: 1
   voice:
     name: Bella
     id: EXAVITQu4vr4xnSDxMaL
@@ -410,17 +411,9 @@ Camera:
   m_projectionMatrixMode: 1
   m_GateFitMode: 2
   m_FOVAxisMode: 0
-  m_Iso: 200
-  m_ShutterSpeed: 0.005
-  m_Aperture: 16
-  m_FocusDistance: 10
-  m_FocalLength: 50
-  m_BladeCount: 5
-  m_Curvature: {x: 2, y: 11}
-  m_BarrelClipping: 0.25
-  m_Anamorphism: 0
   m_SensorSize: {x: 36, y: 24}
   m_LensShift: {x: 0, y: 0}
+  m_FocalLength: 50
   m_NormalizedViewPortRect:
     serializedVersion: 2
     x: 0
@@ -454,18 +447,11 @@ Transform:
   m_PrefabInstance: {fileID: 0}
   m_PrefabAsset: {fileID: 0}
   m_GameObject: {fileID: 1655310072}
-  serializedVersion: 2
   m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
   m_LocalPosition: {x: 0, y: 1, z: -10}
   m_LocalScale: {x: 1, y: 1, z: 1}
   m_ConstrainProportionsScale: 1
   m_Children: []
   m_Father: {fileID: 0}
+  m_RootOrder: 2
   m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
---- !u!1660057539 &9223372036854775807
-SceneRoots:
-  m_ObjectHideFlags: 0
-  m_Roots:
-  - {fileID: 1655310075}
-  - {fileID: 541418637}
-  - {fileID: 1232673084}
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/Tests/Test_Fixture_04_TextToSpeechEndpoint.cs b/ElevenLabs/Packages/com.rest.elevenlabs/Tests/Test_Fixture_04_TextToSpeechEndpoint.cs
index a772e66..8355144 100644
--- a/ElevenLabs/Packages/com.rest.elevenlabs/Tests/Test_Fixture_04_TextToSpeechEndpoint.cs
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/Tests/Test_Fixture_04_TextToSpeechEndpoint.cs
@@ -1,5 +1,6 @@
-// Licensed under the MIT License. See LICENSE in the project root for license information.
+﻿// Licensed under the MIT License. See LICENSE in the project root for license information.
 
+using ElevenLabs.TextToSpeech;
 using NUnit.Framework;
 using System.Collections.Generic;
 using System.Linq;
@@ -14,10 +15,10 @@ internal class Test_Fixture_04_TextToSpeechEndpoint : AbstractTestFixture
         public async Task Test_01_TextToSpeech()
         {
             Assert.NotNull(ElevenLabsClient.TextToSpeechEndpoint);
-            var voice = Voices.Voice.Adam;
+            var voice = (await ElevenLabsClient.VoicesEndpoint.GetAllVoicesAsync()).FirstOrDefault();
             Assert.NotNull(voice);
-            var defaultVoiceSettings = await ElevenLabsClient.VoicesEndpoint.GetDefaultVoiceSettingsAsync();
-            var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync("The quick brown fox jumps over the lazy dog.", voice, defaultVoiceSettings);
+            var request = new TextToSpeechRequest(voice, "The quick brown fox jumps over the lazy dog.");
+            var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request);
             Assert.NotNull(voiceClip);
             Assert.NotNull(voiceClip.AudioClip);
             Debug.Log(voiceClip.Id);
@@ -29,16 +30,91 @@ public async Task Test_02_StreamTextToSpeech()
             Assert.NotNull(ElevenLabsClient.TextToSpeechEndpoint);
             var voice = (await ElevenLabsClient.VoicesEndpoint.GetAllVoicesAsync()).FirstOrDefault();
             Assert.NotNull(voice);
-            var defaultVoiceSettings = await ElevenLabsClient.VoicesEndpoint.GetDefaultVoiceSettingsAsync();
             var partialClips = new Queue<AudioClip>();
-            var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.StreamTextToSpeechAsync("The quick brown fox jumps over the lazy dog.", voice,
-                 clip => partialClips.Enqueue(clip),
-                 defaultVoiceSettings);
+            var request = new TextToSpeechRequest(voice, "The quick brown fox jumps over the lazy dog.", outputFormat: OutputFormat.PCM_24000);
+            var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request, voiceClip => partialClips.Enqueue(voiceClip));
             Assert.NotNull(partialClips);
             Assert.IsNotEmpty(partialClips);
             Assert.NotNull(voiceClip);
             Assert.IsNotNull(voiceClip.AudioClip);
             Debug.Log(voiceClip.Id);
+            Debug.Log(voiceClip.CachedPath);
+        }
+
+        [Test]
+        public async Task Test_03_TextToSpeech_Transcription()
+        {
+            Assert.NotNull(ElevenLabsClient.TextToSpeechEndpoint);
+            var voice = (await ElevenLabsClient.VoicesEndpoint.GetAllVoicesAsync()).FirstOrDefault();
+            Assert.NotNull(voice);
+            var request = new TextToSpeechRequest(voice, "The quick brown fox jumps over the lazy dog.", withTimestamps: true);
+            var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request);
+            Assert.NotNull(voiceClip);
+            Assert.NotNull(voiceClip.AudioClip);
+            Debug.Log(voiceClip.Id);
+            Debug.Log(voiceClip.CachedPath);
+            Assert.NotNull(voiceClip.TimestampedTranscriptCharacters);
+            Assert.IsNotEmpty(voiceClip.TimestampedTranscriptCharacters);
+            Debug.Log("| Character | Start Time | End Time |");
+            Debug.Log("| --------- | ---------- | -------- |");
+            foreach (var character in voiceClip.TimestampedTranscriptCharacters)
+            {
+                Debug.Log($"| {character.Character} | {character.StartTime} | {character.EndTime} |");
+            }
+        }
+
+        [Test]
+        public async Task Test_04_StreamTextToSpeech_Transcription()
+        {
+            Assert.NotNull(ElevenLabsClient.TextToSpeechEndpoint);
+            var voice = Voices.Voice.Adam;
+            Assert.NotNull(voice);
+            voice.Settings ??= await ElevenLabsClient.VoicesEndpoint.GetDefaultVoiceSettingsAsync();
+            var partialClips = new Queue<AudioClip>();
+            var characters = new Queue<TimestampedTranscriptCharacter>();
+            Debug.Log("| Character | Start Time | End Time |");
+            Debug.Log("| --------- | ---------- | -------- |");
+            var request = new TextToSpeechRequest(voice, "The quick brown fox jumps over the lazy dog.", outputFormat: OutputFormat.PCM_24000, withTimestamps: true);
+            var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request, voiceClip =>
+            {
+                partialClips.Enqueue(voiceClip.AudioClip);
+                foreach (var character in voiceClip.TimestampedTranscriptCharacters)
+                {
+                    characters.Enqueue(character);
+                    Debug.Log($"| {character.Character} | {character.StartTime} | {character.EndTime} |");
+                }
+            });
+            Assert.NotNull(partialClips);
+            Assert.NotNull(partialClips);
+            Assert.IsNotEmpty(partialClips);
+            Assert.NotNull(voiceClip);
+            Assert.IsNotNull(voiceClip.AudioClip);
+            Debug.Log(voiceClip.Id);
+            Debug.Log(voiceClip.CachedPath);
+            Assert.AreEqual(characters.ToArray(), voiceClip.TimestampedTranscriptCharacters);
+        }
+
+        [Test]
+        public async Task Test_05_LanguageEnforced_TextToSpeech()
+        {
+            Assert.NotNull(ElevenLabsClient.TextToSpeechEndpoint);
+            var voice = Voices.Voice.Adam;
+            Assert.NotNull(voice);
+            var defaultVoiceSettings = await ElevenLabsClient.VoicesEndpoint.GetDefaultVoiceSettingsAsync();
+            var request = new TextToSpeechRequest(
+                voice: voice,
+                text: "Příliš žluťoučký kůň úpěl ďábelské ódy",
+                voiceSettings: defaultVoiceSettings,
+                model: Models.Model.TurboV2_5,
+                outputFormat: OutputFormat.MP3_44100_192,
+                cacheFormat: CacheFormat.None,
+                languageCode: "cs");
+            var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request);
+            Assert.NotNull(voiceClip);
+            Assert.NotNull(voiceClip.AudioClip);
+            Assert.IsTrue(string.IsNullOrWhiteSpace(voiceClip.CachedPath));
+            Debug.Log(voiceClip.Id);
+            Debug.Log(voiceClip.CachedPath);
         }
     }
 }
diff --git a/ElevenLabs/Packages/com.rest.elevenlabs/package.json b/ElevenLabs/Packages/com.rest.elevenlabs/package.json
index d606fd4..84a4e69 100644
--- a/ElevenLabs/Packages/com.rest.elevenlabs/package.json
+++ b/ElevenLabs/Packages/com.rest.elevenlabs/package.json
@@ -3,7 +3,7 @@
   "displayName": "ElevenLabs",
   "description": "A non-official Eleven Labs voice synthesis RESTful client.",
   "keywords": [],
-  "version": "3.3.1",
+  "version": "3.4.0",
   "unity": "2021.3",
   "documentationUrl": "https://github.com/RageAgainstThePixel/com.rest.elevenlabs#documentation",
   "changelogUrl": "https://github.com/RageAgainstThePixel/com.rest.elevenlabs/releases",
@@ -17,9 +17,9 @@
     "url": "https://github.com/StephenHodgson"
   },
   "dependencies": {
-    "com.utilities.rest": "2.5.7",
-    "com.utilities.encoder.ogg": "3.1.4",
-    "com.utilities.encoder.wav": "1.2.2"
+    "com.utilities.rest": "3.3.0",
+    "com.utilities.encoder.ogg": "4.0.1",
+    "com.utilities.encoder.wav": "2.0.2"
   },
   "samples": [
     {
diff --git a/ElevenLabs/Packages/manifest.json b/ElevenLabs/Packages/manifest.json
index 7f0c52d..e81262e 100644
--- a/ElevenLabs/Packages/manifest.json
+++ b/ElevenLabs/Packages/manifest.json
@@ -1,9 +1,9 @@
 {
   "dependencies": {
-    "com.unity.ide.rider": "3.0.31",
+    "com.unity.ide.rider": "3.0.34",
     "com.unity.ide.visualstudio": "2.0.22",
     "com.unity.test-framework": "1.3.5",
-    "com.utilities.buildpipeline": "1.5.0"
+    "com.utilities.buildpipeline": "1.5.7"
   },
   "scopedRegistries": [
     {
diff --git a/ElevenLabs/ProjectSettings/ProjectVersion.txt b/ElevenLabs/ProjectSettings/ProjectVersion.txt
index 9ba13fd..8386a05 100644
--- a/ElevenLabs/ProjectSettings/ProjectVersion.txt
+++ b/ElevenLabs/ProjectSettings/ProjectVersion.txt
@@ -1,2 +1,2 @@
-m_EditorVersion: 2021.3.42f1
-m_EditorVersionWithRevision: 2021.3.42f1 (f1197811e8ce)
+m_EditorVersion: 2021.3.45f1
+m_EditorVersionWithRevision: 2021.3.45f1 (0da89fac8e79)
diff --git a/README.md b/README.md
index 71adcd7..9b6b5ae 100644
--- a/README.md
+++ b/README.md
@@ -40,6 +40,7 @@ The recommended installation method is though the unity package manager and [Ope
   - [com.utilities.extensions](https://github.com/RageAgainstThePixel/com.utilities.extensions)
   - [com.utilities.audio](https://github.com/RageAgainstThePixel/com.utilities.audio)
   - [com.utilities.encoder.ogg](https://github.com/RageAgainstThePixel/com.utilities.encoder.ogg)
+  - [com.utilities.encoder.wav](https://github.com/RageAgainstThePixel/com.utilities.encoder.wav)
   - [com.utilities.rest](https://github.com/RageAgainstThePixel/com.utilities.rest)
 
 ---
@@ -59,7 +60,7 @@ The recommended installation method is though the unity package manager and [Ope
 - [Text to Speech](#text-to-speech)
   - [Stream Text To Speech](#stream-text-to-speech)
 - [Voices](#voices)
-  - [Get Shared Voices](#get-shared-voices) :new:
+  - [Get Shared Voices](#get-shared-voices)
   - [Get All Voices](#get-all-voices)
   - [Get Default Voice Settings](#get-default-voice-settings)
   - [Get Voice](#get-voice)
@@ -70,13 +71,13 @@ The recommended installation method is though the unity package manager and [Ope
   - [Samples](#samples)
     - [Download Voice Sample](#download-voice-sample)
     - [Delete Voice Sample](#delete-voice-sample)
-- [Dubbing](#dubbing) :new:
-  - [Dub](#dub) :new:
-  - [Get Dubbing Metadata](#get-dubbing-metadata) :new:
-  - [Get Transcript for Dub](#get-transcript-for-dub) :new:
-  - [Get dubbed file](#get-dubbed-file) :new:
-  - [Delete Dubbing Project](#delete-dubbing-project) :new:
-- [SFX Generation](#sfx-generation) :new:
+- [Dubbing](#dubbing)
+  - [Dub](#dub)
+  - [Get Dubbing Metadata](#get-dubbing-metadata)
+  - [Get Transcript for Dub](#get-transcript-for-dub)
+  - [Get dubbed file](#get-dubbed-file)
+  - [Delete Dubbing Project](#delete-dubbing-project)
+- [SFX Generation](#sfx-generation)
 - [History](#history)
   - [Get History](#get-history)
   - [Get History Item](#get-history-item)
@@ -265,8 +266,8 @@ Convert text to speech.
 var api = new ElevenLabsClient();
 var text = "The quick brown fox jumps over the lazy dog.";
 var voice = (await api.VoicesEndpoint.GetAllVoicesAsync()).FirstOrDefault();
-var defaultVoiceSettings = await api.VoicesEndpoint.GetDefaultVoiceSettingsAsync();
-var voiceClip = await api.TextToSpeechEndpoint.TextToSpeechAsync(text, voice, defaultVoiceSettings);
+var request = new TextToSpeechRequest(voice, text);
+var voiceClip = await api.TextToSpeechEndpoint.TextToSpeechAsync(request);
 audioSource.PlayOneShot(voiceClip.AudioClip);
 ```
 
@@ -284,18 +285,14 @@ Stream text to speech.
 var api = new ElevenLabsClient();
 var text = "The quick brown fox jumps over the lazy dog.";
 var voice = (await api.VoicesEndpoint.GetAllVoicesAsync()).FirstOrDefault();
-var partialClips = new Queue<AudioClip>();
-var voiceClip = await api.TextToSpeechEndpoint.StreamTextToSpeechAsync(
-    text,
-    voice,
-    partialClip =>
-    {
-        // Note: Best to queue them and play them in update loop!
-        // See TextToSpeech sample demo for details
-        partialClips.Enqueue(partialClip);
-    });
-// The full completed clip:
-audioSource.clip = voiceClip.AudioClip;
+var partialClips = new Queue<VoiceClip>();
+var request = new TextToSpeechRequest(voice, message, model: Model.EnglishTurboV2, outputFormat: OutputFormat.PCM_44100);
+var voiceClip = await api.TextToSpeechEndpoint.StreamTextToSpeechAsync(request, partialClip =>
+{
+    // Note: check demo scene for best practices
+    // on how to handle playback with OnAudioFilterRead
+    partialClips.Enqueue(partialClip);
+});
 ```
 
 ### [Voices](https://docs.elevenlabs.io/api-reference/voices)