-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmakeSzenario.py
117 lines (91 loc) · 3.33 KB
/
makeSzenario.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import numpy as np
import sys
import io
import os
#Don't let tensorflow hog the GPU
os.environ['CUDA_VISIBLE_DEVICES'] = ''
from config import *
from utils import *
import tensorflow as tf
sys.path.insert(0, nyumaya_basepath + '/python/src/')
from libnyumaya import FeatureExtractor
from auto_platform import default_libpath
from random import shuffle
from os.path import splitext
version='v3.0'
def bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
nyumaya_libpath = os.path.join(nyumaya_basepath, default_libpath)
lib_extractor = FeatureExtractor()
def write_example_to_record(meldata,text,writer):
example = tf.train.Example(
features=tf.train.Features(
feature={
'meldata': bytes_feature(meldata.tobytes()),
'utf_text': bytes_feature(text),
}
)
)
writer.write(example.SerializeToString())
def make_librispeech(in_dir,recordName,normalize,db_change):
promptlist = []
for root, dirs, files in os.walk(in_dir):
for f in files:
if "normalized.txt" in f:
pr = os.path.join(root,f)
with io.open(pr,'r') as prfile:
text = prfile.read()
filename = pr.replace(".normalized.txt", ".wav",1)
promptlist.append(filename +"|" +text)
print("Appending: {}".format(filename))
shuffle(promptlist)
record_name = os.path.join(szenario_basepath,recordName)
with tf.io.TFRecordWriter(record_name) as writer:
for line in promptlist:
fpath,text = line.strip().split("|")
text = text.encode('utf-8')
wavdata,_ = load_audio_file(fpath)
if(normalize):
wavdata = wavdata.apply_gain(db_change)
wavdata = wavdata.get_array_of_samples()
wavdata = np.asarray(wavdata, dtype = np.int16)
meldata = lib_extractor.signalToMel(wavdata.tobytes())
meldata = np.reshape(meldata, (-1,80))
write_example_to_record(meldata,text,writer)
# Take all audio files from a folder and
# write them to a scenario. Watch out, pydub
# might complain about long mp3 files
def make_folder(in_dir,record_name):
promptlist = []
for root, dirs, files in os.walk(in_dir):
for f in files:
extension = splitext(f)[1].lower()
if(not (extension in extension_list)):
continue
pr = os.path.join(root,f)
promptlist.append(pr)
shuffle(promptlist)
record_path = os.path.join(szenario_basepath,record_name)
with tf.io.TFRecordWriter(record_path) as writer:
for line in promptlist:
print(line)
try:
sound,duration = load_audio_file(line)
print("Duration: {}".format(duration))
#Cut into 20 second slices
slices = sound[0:-1:20*1000]
for index,s in enumerate(slices):
wavdata = s.get_array_of_samples()
wavdata = np.asarray(wavdata, dtype = np.int16)
meldata = lib_extractor.signalToMel(wavdata.tobytes())
meldata = np.reshape(meldata, (-1,80))
text = "".encode('utf-8')
write_example_to_record(meldata,text,writer)
except:
print("Failed to make sample")
#Uncomment to run
make_folder("./myfolder","ambient_test_v1.0.tfrecords")
make_librispeech("./LibriTTS/test-clean/","libri_test_" + version + ".tfrecords",False,0)
#make_librispeech("./LibriTTS/test-clean/","libri_test_plus10db_" + version + ".tfrecords",True,10)
#make_librispeech("./LibriTTS/test-clean/","libri_test_minus10db_" + version + ".tfrecords",True,-10)
#make_librispeech("./LibriTTS/test-clean/","libri_test_minus20db_" + version + ".tfrecords",True,-20)