Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add multi-function audio processing features #461

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 166 additions & 0 deletions scripts/recording_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
import argparse
import time
from pydub import AudioSegment
from scipy.signal import resample
from tqdm import tqdm
import os
import sounddevice as sd
import soundfile as sf

DURATION = 10.0

def countdown(count: int):
"""Create a countdown for specified number of seconds.

Args:
count (int): The amount of milliseconds to wait for.
"""
for elapsed_ms in range(count):
print(f'{(count-elapsed_ms)/1000}', end='\r')
time.sleep(0.001)

def record_until_keypress(
voice_name: str,
samplerate: int=22050,
channels: int=1):
"""Record audio in chunks until interrupted, then chop into 10s fragments.

Args:
voice_name (str): The name of the voice that records.
samplerate (int, optional): Target sample rate. Defaults to 22050.
channels (int, optional): The number of channels (1=mono, 2=stereo). Defaults to 1.
"""
recordings = []
print("·You are recording! Press Ctrl+C to stop.")
i=0
try:
while True:
recording = sd.rec(
int(samplerate * DURATION), samplerate=samplerate, channels=channels, blocking=True)
i+=1
print(f"Chunk #{i} was recorded")
recordings.append(recording)
except KeyboardInterrupt:
print("Recording stopped.")

output_folder = f'../tortoise/voices/{voice_name}/'
try:
os.mkdir(output_folder)
except FileExistsError:
pass
for i, rec in enumerate(recordings):
fname = f'{output_folder}/{i+1}.wav'
try:
sf.write(fname, rec, samplerate, subtype='FLOAT')
except Exception:
print(f"Error saving chunk #{i+1}.")

def record_audio(
file_path: str,
num_samples: int=3,
samplerate: int=22050,
channels: int=1,
timeout: int=5):
"""Record audio with the specified parameters.

Args:
file_path (str): Path to the voice folder where the samples will be saved.
num_samples (int, optional): The amount of samples to save. Defaults to 3.
samplerate (int, optional): Target sample rate. Defaults to 22050.
channels (int, optional): The number of channels (1=mono, 2=stereo). Defaults to 1.
timeout (int, optional): The seconds to wait between samples. Defaults to 5.
"""
for i in range(num_samples):
print(f"Preparing to record sample {i+1} in {timeout} seconds...")
countdown(timeout * 1000)

print("Recording...")
recording = sd.rec(int(samplerate * DURATION), samplerate=samplerate,
channels=channels, blocking=True)

fname = f'{file_path}/{i+1}.wav'
sf.write(fname, recording, samplerate, subtype='FLOAT')

print(f"Recording of sample {i+1} finished and saved as '{fname}'.")

def chop_audio(input_path: str,
output_folder: str,
no_conversion: bool=True,
samplerate: int=22050):
"""Chop an audio file into chunks with specified duration.

Args:
input_path (str): Path to the original voice sample.
output_folder (str): Path to the voice folder where the sample's chunks will be saved.
no_conversion (bool, optional): Ignore the conversion to WAV format. Defaults to True.
samplerate (int, optional): Target sample rate. Defaults to 22050.
"""
if no_conversion:
intermediate_path = input_path
else:
print("Loading file...")
audio = AudioSegment.from_file(input_path)

print("Converting to WAV...")
intermediate_path = "intermediate.wav"
audio.export(intermediate_path, format="wav")

print("Loading WAV file...")
data, og_samplerate = sf.read(intermediate_path)

print("Resampling audio data...")
data = resample(data, len(data) * samplerate // og_samplerate)

num_chunks = len(data) // int(samplerate * DURATION) + 1

print("Saving chunks...")
for i, _ in tqdm(enumerate(range(0, len(data), int(samplerate * DURATION)), 1),
total=num_chunks):
chunk = data[i:i + int(samplerate * DURATION)]
sf.write(f'{output_folder}/{i}.wav', chunk, samplerate, subtype='FLOAT')

print(f"Conversion and chopping finished. Saved files in '{output_folder}'.")

if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Process some audio.')
parser.add_argument(
'command',
choices=['record', 'chop', 'keypress'],
help='Command to execute')
parser.add_argument(
'--file_path',
help='Path to the file to process')
parser.add_argument(
'--voice_name',
help='The name of the voice that will record')
parser.add_argument(
'--output_folder',
help='Folder to save the chunks')
parser.add_argument(
'--no-convert',
action='store_false', default=True,
help='Convert file to WAV format')
parser.add_argument(
'--num_samples',
type=int, default=1,
help='Number of samples to record')
parser.add_argument(
'--rec_timeout',
type=int, default=5,
help='Seconds between recordings')

args = parser.parse_args()
print(args.no_convert)

if args.command == 'record':
record_audio(
args.file_path,
num_samples=args.num_samples,
timeout=args.rec_timeout)
elif args.command == 'chop':
chop_audio(
args.file_path,
args.output_folder,
no_conversion=args.no_convert)
elif args.command == 'keypress':
record_until_keypress(args.voice_name)