Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add C# API for Kokoro TTS models #1720

Merged
merged 3 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions .github/scripts/test-dot-net.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@

cd dotnet-examples/

cd ./offline-tts
cd ./kokoro-tts
./run-kokoro-en.sh
ls -lh

cd ../offline-tts
./run-matcha-zh.sh
ls -lh *.wav
./run-matcha-en.sh
Expand All @@ -19,7 +23,8 @@ pushd ../..

mkdir tts

cp dotnet-examples/offline-tts/*.wav ./tts
cp -v dotnet-examples/kokoro-tts/*.wav ./tts
cp -v dotnet-examples/offline-tts/*.wav ./tts
popd

cd ../offline-speaker-diarization
Expand Down
189 changes: 189 additions & 0 deletions dotnet-examples/kokoro-tts-play/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
// Copyright (c) 2025 Xiaomi Corporation
//
// This file shows how to use a non-streaming Kokoro TTS model
// for text-to-speech
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
// and
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// to download pre-trained models
using PortAudioSharp;
using SherpaOnnx;
using System.Collections.Concurrent;
using System.Runtime.InteropServices;

class OfflineTtsDemo
{
static void Main(string[] args)
{
var config = new OfflineTtsConfig();
config.Model.Kokoro.Model = "./kokoro-en-v0_19/model.onnx";
config.Model.Kokoro.Voices = "./kokoro-en-v0_19/voices.bin";
config.Model.Kokoro.Tokens = "./kokoro-en-v0_19/tokens.txt";
config.Model.Kokoro.DataDir = "./kokoro-en-v0_19/espeak-ng-data";

config.Model.NumThreads = 2;
config.Model.Debug = 1;
config.Model.Provider = "cpu";

var tts = new OfflineTts(config);
var speed = 1.0f;
var text = "Today as always, men fall into two groups: slaves and free men. Whoever " +
"does not have two-thirds of his day for himself, is a slave, whatever " +
"he may be: a statesman, a businessman, an official, or a scholar. " +
"Friends fell out often because life was changing so fast. The easiest " +
"thing in the world was to lose touch with someone.";

// mapping of sid to voice name
// 0->af, 1->af_bella, 2->af_nicole, 3->af_sarah, 4->af_sky, 5->am_adam
// 6->am_michael, 7->bf_emma, 8->bf_isabella, 9->bm_george, 10->bm_lewis
var sid = 0;


Console.WriteLine(PortAudio.VersionInfo.versionText);
PortAudio.Initialize();
Console.WriteLine($"Number of devices: {PortAudio.DeviceCount}");

for (int i = 0; i != PortAudio.DeviceCount; ++i)
{
Console.WriteLine($" Device {i}");
DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i);
Console.WriteLine($" Name: {deviceInfo.name}");
Console.WriteLine($" Max output channels: {deviceInfo.maxOutputChannels}");
Console.WriteLine($" Default sample rate: {deviceInfo.defaultSampleRate}");
}
int deviceIndex = PortAudio.DefaultOutputDevice;
if (deviceIndex == PortAudio.NoDevice)
{
Console.WriteLine("No default output device found. Please use ../offline-tts instead");
Environment.Exit(1);
}

var info = PortAudio.GetDeviceInfo(deviceIndex);
Console.WriteLine();
Console.WriteLine($"Use output default device {deviceIndex} ({info.name})");

var param = new StreamParameters();
param.device = deviceIndex;
param.channelCount = 1;
param.sampleFormat = SampleFormat.Float32;
param.suggestedLatency = info.defaultLowOutputLatency;
param.hostApiSpecificStreamInfo = IntPtr.Zero;

// https://learn.microsoft.com/en-us/dotnet/standard/collections/thread-safe/blockingcollection-overview
var dataItems = new BlockingCollection<float[]>();

var MyCallback = (IntPtr samples, int n, float progress) =>
{
Console.WriteLine($"Progress {progress*100}%");

float[] data = new float[n];

Marshal.Copy(samples, data, 0, n);

dataItems.Add(data);

// 1 means to keep generating
// 0 means to stop generating
return 1;
};

var playFinished = false;

float[]? lastSampleArray = null;
int lastIndex = 0; // not played

PortAudioSharp.Stream.Callback playCallback = (IntPtr input, IntPtr output,
UInt32 frameCount,
ref StreamCallbackTimeInfo timeInfo,
StreamCallbackFlags statusFlags,
IntPtr userData
) =>
{
if (dataItems.IsCompleted && lastSampleArray == null && lastIndex == 0)
{
Console.WriteLine($"Finished playing");
playFinished = true;
return StreamCallbackResult.Complete;
}

int expected = Convert.ToInt32(frameCount);
int i = 0;

while ((lastSampleArray != null || dataItems.Count != 0) && (i < expected))
{
int needed = expected - i;

if (lastSampleArray != null)
{
int remaining = lastSampleArray.Length - lastIndex;
if (remaining >= needed)
{
float[] this_block = lastSampleArray.Skip(lastIndex).Take(needed).ToArray();
lastIndex += needed;
if (lastIndex == lastSampleArray.Length)
{
lastSampleArray = null;
lastIndex = 0;
}

Marshal.Copy(this_block, 0, IntPtr.Add(output, i * sizeof(float)), needed);
return StreamCallbackResult.Continue;
}

float[] this_block2 = lastSampleArray.Skip(lastIndex).Take(remaining).ToArray();
lastIndex = 0;
lastSampleArray = null;

Marshal.Copy(this_block2, 0, IntPtr.Add(output, i * sizeof(float)), remaining);
i += remaining;
continue;
}

if (dataItems.Count != 0)
{
lastSampleArray = dataItems.Take();
lastIndex = 0;
}
}

if (i < expected)
{
int sizeInBytes = (expected - i) * 4;
Marshal.Copy(new byte[sizeInBytes], 0, IntPtr.Add(output, i * sizeof(float)), sizeInBytes);
}

return StreamCallbackResult.Continue;
};

PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: null, outParams: param, sampleRate: tts.SampleRate,
framesPerBuffer: 0,
streamFlags: StreamFlags.ClipOff,
callback: playCallback,
userData: IntPtr.Zero
);

stream.Start();

var callback = new OfflineTtsCallbackProgress(MyCallback);

var audio = tts.GenerateWithCallbackProgress(text, speed, sid, callback);
var outputFilename = "./generated-kokoro-0.wav";
var ok = audio.SaveToWaveFile(outputFilename);

if (ok)
{
Console.WriteLine($"Wrote to {outputFilename} succeeded!");
}
else
{
Console.WriteLine($"Failed to write {outputFilename}");
}
dataItems.CompleteAdding();

while (!playFinished)
{
Thread.Sleep(100); // 100ms
}
}
}
19 changes: 19 additions & 0 deletions dotnet-examples/kokoro-tts-play/kokoro-tts-play.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<RootNamespace>kokoro_tts_play</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="PortAudioSharp2" Version="*" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\Common\Common.csproj" />
</ItemGroup>

</Project>
10 changes: 10 additions & 0 deletions dotnet-examples/kokoro-tts-play/run-kokoro-en.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env bash
set -ex

if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
tar xf kokoro-en-v0_19.tar.bz2
rm kokoro-en-v0_19.tar.bz2
fi

dotnet run
70 changes: 70 additions & 0 deletions dotnet-examples/kokoro-tts/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright (c) 2025 Xiaomi Corporation
//
// This file shows how to use a non-streaming Kokoro TTS model
// for text-to-speech
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
// and
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// to download pre-trained models
using SherpaOnnx;
using System.Runtime.InteropServices;

class OfflineTtsDemo
{
static void Main(string[] args)
{
var config = new OfflineTtsConfig();
config.Model.Kokoro.Model = "./kokoro-en-v0_19/model.onnx";
config.Model.Kokoro.Voices = "./kokoro-en-v0_19/voices.bin";
config.Model.Kokoro.Tokens = "./kokoro-en-v0_19/tokens.txt";
config.Model.Kokoro.DataDir = "./kokoro-en-v0_19/espeak-ng-data";

config.Model.NumThreads = 2;
config.Model.Debug = 1;
config.Model.Provider = "cpu";

var tts = new OfflineTts(config);
var speed = 1.0f;
var text = "Today as always, men fall into two groups: slaves and free men. Whoever " +
"does not have two-thirds of his day for himself, is a slave, whatever " +
"he may be: a statesman, a businessman, an official, or a scholar. " +
"Friends fell out often because life was changing so fast. The easiest " +
"thing in the world was to lose touch with someone.";

// mapping of sid to voice name
// 0->af, 1->af_bella, 2->af_nicole, 3->af_sarah, 4->af_sky, 5->am_adam
// 6->am_michael, 7->bf_emma, 8->bf_isabella, 9->bm_george, 10->bm_lewis
var sid = 0;

var MyCallback = (IntPtr samples, int n, float progress) =>
{
float[] data = new float[n];
Marshal.Copy(samples, data, 0, n);
// You can process samples here, e.g., play them.
// See ../kokoro-tts-playback for how to play them
Console.WriteLine($"Progress {progress*100}%");

// 1 means to keep generating
// 0 means to stop generating
return 1;
};

var callback = new OfflineTtsCallbackProgress(MyCallback);

var audio = tts.GenerateWithCallbackProgress(text, speed, sid, callback);

var outputFilename = "./generated-kokoro-0.wav";
var ok = audio.SaveToWaveFile(outputFilename);

if (ok)
{
Console.WriteLine($"Wrote to {outputFilename} succeeded!");
}
else
{
Console.WriteLine($"Failed to write {outputFilename}");
}
}
}

15 changes: 15 additions & 0 deletions dotnet-examples/kokoro-tts/kokoro-tts.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<RootNamespace>kokoro_tts</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\Common\Common.csproj" />
</ItemGroup>

</Project>
10 changes: 10 additions & 0 deletions dotnet-examples/kokoro-tts/run-kokoro-en.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env bash
set -ex

if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
tar xf kokoro-en-v0_19.tar.bz2
rm kokoro-en-v0_19.tar.bz2
fi

dotnet run
12 changes: 12 additions & 0 deletions dotnet-examples/sherpa-onnx.sln
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-micro
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "offline-speaker-diarization", "offline-speaker-diarization\offline-speaker-diarization.csproj", "{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts", "kokoro-tts\kokoro-tts.csproj", "{9C0ABE6C-1F54-42B5-804E-C3FED6668F52}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts-play", "kokoro-tts-play\kokoro-tts-play.csproj", "{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -93,6 +97,14 @@ Global
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.ActiveCfg = Release|Any CPU
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.Build.0 = Release|Any CPU
{9C0ABE6C-1F54-42B5-804E-C3FED6668F52}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{9C0ABE6C-1F54-42B5-804E-C3FED6668F52}.Debug|Any CPU.Build.0 = Debug|Any CPU
{9C0ABE6C-1F54-42B5-804E-C3FED6668F52}.Release|Any CPU.ActiveCfg = Release|Any CPU
{9C0ABE6C-1F54-42B5-804E-C3FED6668F52}.Release|Any CPU.Build.0 = Release|Any CPU
{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
Loading
Loading