Skip to content

Commit

Permalink
ElevenLabs-DotNet 3.5.0
Browse files Browse the repository at this point in the history
- Added TextToSpeechRequest.ctr overload
  - Added seed property
  - Added applyTextNormalization property
  - Removed deprecated optimizeStreamingLatency property
- Updated Unit Tests
  • Loading branch information
StephenHodgson committed Mar 2, 2025
1 parent f216524 commit dfc4344
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 24 deletions.
35 changes: 33 additions & 2 deletions ElevenLabs-DotNet-Tests/TestFixture_04_TextToSpeechEndpoint.cs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,38 @@ public async Task Test_03_TextToSpeech_Transcription()
}

[Test]
public async Task Test_05_LanguageEnforced_TextToSpeech()
public async Task Test_04_StreamTextToSpeech_Transcription()
{
Assert.NotNull(ElevenLabsClient.TextToSpeechEndpoint);
var voice = Voices.Voice.Adam;
Assert.NotNull(voice);
voice.Settings ??= await ElevenLabsClient.VoicesEndpoint.GetDefaultVoiceSettingsAsync();
var partialClips = new Queue<VoiceClip>();
var characters = new Queue<TimestampedTranscriptCharacter>();
Console.WriteLine("| Character | Start Time | End Time |");
Console.WriteLine("| --------- | ---------- | -------- |");
var request = new TextToSpeechRequest(voice, "The quick brown fox jumps over the lazy dog.", outputFormat: OutputFormat.PCM_24000, withTimestamps: true);
var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request, async partialClip =>
{
Assert.IsNotNull(partialClip);
partialClips.Enqueue(partialClip);
await Task.CompletedTask;
foreach (var character in partialClip.TimestampedTranscriptCharacters)
{
characters.Enqueue(character);
Console.WriteLine($"| {character.Character} | {character.StartTime} | {character.EndTime} |");
}
});
Assert.NotNull(partialClips);
Assert.NotNull(partialClips);
Assert.IsNotEmpty(partialClips);
Assert.NotNull(voiceClip);
Console.WriteLine(voiceClip.Id);
Assert.AreEqual(characters.ToArray(), voiceClip.TimestampedTranscriptCharacters);
}

[Test]
public async Task Test_05_01_LanguageEnforced_TextToSpeech()
{
Assert.NotNull(ElevenLabsClient.TextToSpeechEndpoint);
var voice = Voices.Voice.Adam;
Expand Down Expand Up @@ -93,7 +124,7 @@ public async Task Test_05_LanguageEnforced_TextToSpeech()
}

[Test]
public async Task Test_TurboV2_5_LanguageEnforced_TextToSpeech()
public async Task Test_05_02_TurboV2_5_LanguageEnforced_TextToSpeech()
{
Assert.NotNull(ElevenLabsClient.TextToSpeechEndpoint);
var voice = Voices.Voice.Adam;
Expand Down
7 changes: 6 additions & 1 deletion ElevenLabs-DotNet/ElevenLabs-DotNet.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,13 @@ All copyrights, trademarks, logos, and assets are the property of their respecti
<SignAssembly>false</SignAssembly>
<IncludeSymbols>true</IncludeSymbols>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<Version>3.4.2</Version>
<Version>3.5.0</Version>
<PackageReleaseNotes>
Version 3.5.0
- Added TextToSpeechRequest.ctr overload
- Added seed property
- Added applyTextNormalization property
- Removed deprecated optimizeStreamingLatency property
Version 3.4.2
- Added flash models
- Added stream input support to dubbing endpoint
Expand Down
2 changes: 2 additions & 0 deletions ElevenLabs-DotNet/TextToSpeech/TextToSpeechEndpoint.cs
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,12 @@ public async Task<VoiceClip> TextToSpeechAsync(TextToSpeechRequest request, Func
{ OutputFormatParameter, request.OutputFormat.ToString().ToLower() }
};

#pragma warning disable CS0618 // Type or member is obsolete
if (request.OptimizeStreamingLatency.HasValue)
{
parameters.Add(OptimizeStreamingLatencyParameter, request.OptimizeStreamingLatency.Value.ToString());
}
#pragma warning restore CS0618 // Type or member is obsolete

var endpoint = $"/{request.Voice.Id}";

Expand Down
104 changes: 83 additions & 21 deletions ElevenLabs-DotNet/TextToSpeech/TextToSpeechRequest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,32 @@ namespace ElevenLabs.TextToSpeech
{
public sealed class TextToSpeechRequest
{
[Obsolete]
[Obsolete("use new .ctr overload")]
public TextToSpeechRequest(string text, Model model, VoiceSettings voiceSettings)
: this(null, text, voiceSettings: voiceSettings, model: model)
{
}

[Obsolete("use new .ctr overload")]
public TextToSpeechRequest(
Voice voice,
string text,
Encoding encoding,
VoiceSettings voiceSettings,
OutputFormat outputFormat,
int? optimizeStreamingLatency,
Model model = null,
string previousText = null,
string nextText = null,
string[] previousRequestIds = null,
string[] nextRequestIds = null,
string languageCode = null,
bool withTimestamps = false)
: this(voice, text, encoding, voiceSettings, outputFormat, model, previousText, nextText, previousRequestIds, nextRequestIds, languageCode, withTimestamps)
{
OptimizeStreamingLatency = optimizeStreamingLatency;
}

/// <summary>
/// Constructor.
/// </summary>
Expand All @@ -36,40 +56,70 @@ public TextToSpeechRequest(string text, Model model, VoiceSettings voiceSettings
/// Output format of the generated audio.<br/>
/// Defaults to <see cref="OutputFormat.MP3_44100_128"/>
/// </param>
/// <param name="optimizeStreamingLatency">
/// Optional, You can turn on latency optimizations at some cost of quality.
/// The best possible final latency varies by model.<br/>
/// Possible values:<br/>
/// 0 - default mode (no latency optimizations)<br/>
/// 1 - normal latency optimizations (about 50% of possible latency improvement of option 3)<br/>
/// 2 - strong latency optimizations (about 75% of possible latency improvement of option 3)<br/>
/// 3 - max latency optimizations<br/>
/// 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings
/// (best latency, but can mispronounce e.g. numbers and dates).
/// <param name="previousText">
/// The text that came before the text of the current request.
/// Can be used to improve the speech’s continuity when concatenating together multiple generations or
/// to influence the speech’s continuity in the current generation.
/// </param>
/// <param name="nextText">
/// The text that comes after the text of the current request.
/// Can be used to improve the speech’s continuity when concatenating together multiple generations or
/// to influence the speech’s continuity in the current generation.
/// </param>
/// <param name="previousRequestIds">
/// A list of request_id of the samples that were generated before this generation.
/// Can be used to improve the speech’s continuity when splitting up a large task into multiple requests.
/// The results will be best when the same model is used across the generations. In case both previous_text and previous_request_ids is send,
/// previous_text will be ignored. A maximum of 3 request_ids can be send.
/// </param>
/// <param name="nextRequestIds">
/// A list of request_id of the samples that come after this generation.
/// next_request_ids is especially useful for maintaining the speech’s continuity when regenerating a sample that has had some audio quality issues.
/// For example, if you have generated 3 speech clips, and you want to improve clip 2,
/// passing the request id of clip 3 as a next_request_id (and that of clip 1 as a previous_request_id)
/// will help maintain natural flow in the combined speech.
/// The results will be best when the same model is used across the generations.
/// In case both next_text and next_request_ids is send, next_text will be ignored.
/// A maximum of 3 request_ids can be send.
/// </param>
/// <param name="previousText"></param>
/// <param name="nextText"></param>
/// <param name="previousRequestIds"></param>
/// <param name="nextRequestIds"></param>
/// <param name="languageCode">
/// Optional, Language code (ISO 639-1) used to enforce a language for the model. Currently only <see cref="Model.TurboV2_5"/> supports language enforcement.
/// For other models, an error will be returned if language code is provided.
/// </param>
/// <param name="withTimestamps"></param>
/// <param name="cacheFormat">
/// The audio format to save the audio in.
/// Defaults to <see cref="CacheFormat.Wav"/>
/// </param>
/// <param name="withTimestamps">
/// Generate speech from text with precise character-level timing information for audio-text synchronization.
/// </param>
/// <param name="seed">
/// If specified, our system will make a best effort to sample deterministically,
/// such that repeated requests with the same seed and parameters should return the same result.
/// Determinism is not guaranteed. Must be integer between 0 and 4294967295.
/// </param>
/// <param name="applyTextNormalization">
/// This parameter controls text normalization with three modes: ‘auto’ (null), ‘on’ (true), and ‘off’ (false).
/// When set to ‘null’, the system will automatically decide whether to apply text normalization (e.g., spelling out numbers).
/// With ‘true’, text normalization will always be applied,
/// while with ‘false’, it will be skipped.
/// Cannot be turned on for ‘eleven_turbo_v2_5’ model.
/// </param>
public TextToSpeechRequest(
Voice voice,
string text,
Encoding encoding = null,
VoiceSettings voiceSettings = null,
OutputFormat outputFormat = OutputFormat.MP3_44100_128,
int? optimizeStreamingLatency = null,
Model model = null,
string previousText = null,
string nextText = null,
string[] previousRequestIds = null,
string[] nextRequestIds = null,
string languageCode = null,
bool withTimestamps = false)
bool withTimestamps = false,
int? seed = null,
bool? applyTextNormalization = null)
{
if (string.IsNullOrWhiteSpace(text))
{
Expand All @@ -89,10 +139,9 @@ public TextToSpeechRequest(

Text = text;
Model = model ?? Models.Model.FlashV2;
Voice = voice;
Voice = string.IsNullOrWhiteSpace(voice) ? Voice.Adam : voice;
VoiceSettings = voiceSettings ?? voice.Settings;
OutputFormat = outputFormat;
OptimizeStreamingLatency = optimizeStreamingLatency;
PreviousText = previousText;
NextText = nextText;
if (previousRequestIds?.Length > 3)
Expand All @@ -107,6 +156,12 @@ public TextToSpeechRequest(
NextRequestIds = nextRequestIds;
LanguageCode = languageCode;
WithTimestamps = withTimestamps;
Seed = seed;

if (applyTextNormalization.HasValue)
{
ApplyTextNormalization = applyTextNormalization.Value ? "on" : "off";
}
}

[JsonPropertyName("text")]
Expand All @@ -129,6 +184,7 @@ public TextToSpeechRequest(
public OutputFormat OutputFormat { get; }

[JsonIgnore]
[Obsolete("Deprecated")]
public int? OptimizeStreamingLatency { get; }

[JsonPropertyName("next_text")]
Expand All @@ -146,11 +202,17 @@ public TextToSpeechRequest(
[JsonPropertyName("next_request_ids")]
public string[] NextRequestIds { get; }


[JsonPropertyName("language_code")]
public string LanguageCode { get; }

[JsonIgnore]
public bool WithTimestamps { get; }

[JsonPropertyName("seed")]
public int? Seed { get; }

[JsonPropertyName("apply_text_normalization")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
public string ApplyTextNormalization { get; }
}
}

0 comments on commit dfc4344

Please sign in to comment.