-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathextract_audio.py
122 lines (94 loc) · 5.29 KB
/
extract_audio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import os
from scipy.io.wavfile import read
import subprocess as sp
import numpy as np
import argparse
# Required directories
dir_text = os.path.expanduser('~/Downloads/voxceleb_data/voxceleb1_txt')
dir_audio = os.path.expanduser('~/Downloads/voxceleb_data/voxceleb1_audio')
root_path = os.path.expanduser('~/Downloads/voxceleb')
# path indicator
audio_file_path = os.path.join('%s', '%s.mp3')
parser = argparse.ArgumentParser(description='Extracting the utterances of POIs from OXFORD VOXCELEB dataset')
parser.add_argument('--root_path', default=root_path, help='The directory which the whole data files are available')
parser.add_argument('--dir_audio', default=dir_audio, help='The directory which the output audio files will be stored')
parser.add_argument('--dir_text', default=dir_text, help='The directory which the annotations exist')
args = parser.parse_args()
def extract():
for root, dirs, files in os.walk(args.dir_text, topdown=False):
### Get the information ###
# Example:
# norm_path = /home/sina/Downloads/voxceleb_data_test/voxceleb1_txt/A.J._Buckley
# ID = A.J._Buckley
norm_path = os.path.normpath(root)
ID = os.path.basename(norm_path)
dir_output_path = os.path.join(args.dir_audio, ID)
# Create output path if does not exist!
if not os.path.exists(dir_output_path):
os.makedirs(dir_output_path)
# walk through the directory
for name in files:
# Check all files to be of format .txt
# Get each file path
file_path = os.path.join(root, name)
assert os.path.splitext(file_path)[1] == '.txt', "file extension is not .txt: %s" % file_path
for name in files:
# Get each file path
# file_path = /home/sina/Downloads/voxceleb_data_test/voxceleb1_txt/A.J._Buckley/9mQ11vBs1wc.txt
file_path = os.path.join(root, name)
assert os.path.splitext(file_path) != '.txt', "file extension is not .txt: %s" % file_path
# Parsing each line of text
for line in open(file_path):
# We only want to deal with the lines which are indicator of sound existence in different files.
if line.startswith(ID):
# Split each line based on space and turn it into a list.
line_split = line.split()
# Get the file name based on the naming convention.
# line_split[0] = A.J._Buckley/9mQ11vBs1wc_0000001
# file_name = 9mQ11vBs1wc
file_name = os.path.basename(line_split[0]).split('_')[0]
file_output_path = os.path.join(dir_output_path,os.path.basename(line_split[0])) + '.wav'
# Start of the speech by the POI.
start = float(line_split[1])
# End of the speech by the POI.
end = float(line_split[2])
# Duration of the utterance
duration = end - start
print("start=%f , end=%f , duration=%f" % (start, end, duration))
# Refer to the full path of the main audio file which has POI.
full_file_path_ID = audio_file_path % (args.root_path, file_name)
#################################
##### Read mp3 using ffmpeg #####
#################################
# Extract the part of the sound file which is associated to the spoken utterance of POI using FFmpeg.
command = ['ffmpeg',
'-i', full_file_path_ID,
'-ss', str(start),
'-t', str(duration),
'-ar', '16000', # ouput will have 48000 Hz
'-ac', '1', # stereo (set to '1' for mono)
file_output_path]
pipe = sp.Popen(command, stdout=sp.PIPE, bufsize=10 ** 8)
# #################################
# ##### Read mp3 using ffmpeg #####
# #################################
# FFMPEG_BIN = "ffmpeg"
# command = [FFMPEG_BIN,
# '-i', full_file_path_ID,
# '-f', 's16le',
# '-acodec', 'pcm_s16le',
# '-ar', '16000', # ouput will have 44100 Hz
# '-ac', '1', # stereo (set to '1' for mono)
# '-']
# pipe = sp.Popen(command, stdout=sp.PIPE, bufsize=10 ** 8)
# raw_audio = pipe.stdout.read()
#
# # Turn the read file to numpy array
# audio_array = np.fromstring(raw_audio, dtype="int16")
# audio_array = audio_array.reshape((len(audio_array), 1))
# print("length=%d" % audio_array.shape[0])
# print("max=%d , min=%d" % (np.max(audio_array),np.min(audio_array)))
#
# # Extract the part of the sound file which is associated to the spoken utterance of POI.
if __name__ == '__main__':
extract()