-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathformant_track.py
240 lines (210 loc) · 11.3 KB
/
formant_track.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
#########################################
## SPADE formant track analysis script ##
#########################################
## Processes and analyses multi-point 'tracks' of formant values, along with linguistics
## and acoustic information from corpora collected as part of the SPeech Acros Dialects
## of English (SPADE) project.
## Input:
## - corpus name (e.g., Buckeye, SOTC)
## - corpus metadata (stored in a YAML file)
## this file should specify the path to the
## audio, transcripts, metadata files (e.g.,
## speaker, lexicon), and a datafile containing
## prototype formant values to be used for
## formant estimation
## Output:
## - CSV of multi-point vowel measurements
## (default: 21 rows per token; 1 row per formant
## point sampled), with columns for the linguistic,
## acoustic, and speaker information associated
## with that token
import re
import sys
import os
import argparse
import time
base_dir = os.path.dirname(os.path.abspath(__file__))
script_dir = os.path.join(base_dir, 'Common')
sys.path.insert(0, script_dir)
drop_formant = True
import common
from polyglotdb import CorpusContext
from polyglotdb.utils import ensure_local_database_running
from polyglotdb import CorpusConfig
def formant_track_export(config, corpus_name, corpus_directory, dialect_code, speakers, vowel_inventory, vowel_prototypes_path, reset_formants, vowel_subset, ignored_speakers = None):
## Main function for processing and generating formant tracks
## Determine which vowels to apply over:
## if -s flag is used, the predefined vowel
## set will be analysed; otherwise, the list
## of vowels specfied in the YAML file will
## be analysed
if vowel_subset:
## The default list of vowels to be analysed:
## TIDE (ae), PRICE (ai), WASTE (ee), WAIST (ei), FLEECE (ii),
## CHOICE (oi), GOAT (ou), KNOW (ouw), MOUTH (ow), GOOSE (uu)
vowels_to_analyze = ['ae', 'ai', 'ee', 'ei', 'ii', 'oi', 'ou', 'ouw', 'ow', 'uu']
csv_path = os.path.join(base_dir, corpus_name, '{}_formant_tracks.csv'.format(corpus_name))
else:
vowels_to_analyze = vowel_inventory
csv_path = os.path.join(base_dir, corpus_name, '{}_formant_tracks_all_vowels.csv'.format(corpus_name))
print("Processing formant tracks for {}".format(corpus_name))
beg = time.time()
## Create the subset of corpus tokens that will be subject to
## formant track estimation
with CorpusContext(config) as c:
## Check the corpus has been enriched with UNISYN information
## If so, restrict the subset to: the UNISYN vowels defined above,
## vowels with primary stress, those which form the nucleus of the
## syllable, and those with a duration of at least 50ms
if c.hierarchy.has_type_property('word', 'unisynprimstressedvowel1'):
q = c.query_graph(c.phone)
q = q.filter(c.phone.syllable.stress == '1')
q = q.filter(c.phone.subset == 'nucleus')
if vowel_subset:
q = q.filter(c.phone.syllable.word.unisynprimstressedvowel1.in_(vowels_to_analyze))
else:
q = q.filter(c.phone.label.in_(vowels_to_analyze))
if ignored_speakers:
q = q.filter(c.phone.speaker.name.not_in_(ignored_speakers))
q = q.filter(c.phone.duration >= 0.05)
q.create_subset("unisyn_subset")
print('subset took {}'.format(time.time() - beg))
else:
print('{} has not been enriched with Unisyn information.'.format(corpus_name))
return
## If the -f flag has been used, previously-estimated
## formant values will be removed from the database,
## allowing formants to be re-estimated without needing
## to re-import the corpus
if reset_formants:
print("Resetting formants")
c.reset_acoustics()
print('Beginning formant calculation')
## Perform acoustic analysis on the defined subset, enriching the corpus
## with 21-point formant tracks for the tokens in the subset.
## See common.py for the details of this function.
common.formant_acoustic_analysis(config, None, vowel_prototypes_path, drop_formant = drop_formant, output_tracks = True, subset="unisyn_subset")
with CorpusContext(config) as c:
print('Beginning formant export')
## Constrain the formant track query
## to vowels which were subject to
## formant estimation
q = c.query_graph(c.phone)
q = q.filter(c.phone.subset == 'unisyn_subset')
if speakers:
q = q.filter(c.phone.speaker.name.in_(speakers))
q = q.filter(c.phone.duration >= 0.05)
print('Applied filters')
## Define the columns to be included in the query
## Include the formant columns with 'relativised' time
## (i.e., as % through the vowel, e.g., 5%, 10%, etc).
formants_prop = c.phone.formants
formants_prop.relative_time = True
formants_track = formants_prop.interpolated_track
formants_track.num_points = 21
## Include columns for speaker and file metadata,
## phone information (label, duration), surrounding
## phonological environment, syllable information
## (e.g., stress), word information, and speech rate
q = q.columns(c.phone.speaker.name.column_name('speaker'),
c.phone.discourse.name.column_name('discourse'),
c.phone.id.column_name('phone_id'),
c.phone.label.column_name('phone_label'),
c.phone.begin.column_name('phone_begin'),
c.phone.end.column_name('phone_end'),
c.phone.duration.column_name('phone_duration'),
c.phone.syllable.stress.column_name('syllable_stress'),
c.phone.word.stresspattern.column_name('word_stresspattern'),
c.phone.syllable.position_in_word.column_name('syllable_position_in_word'),
c.phone.following.label.column_name('following_phone'),
c.phone.previous.label.column_name('previous_phone'),
c.phone.word.label.column_name('word_label'),
c.phone.utterance.speech_rate.column_name('speech_rate'),
c.phone.syllable.label.column_name('syllable_label'),
c.phone.syllable.duration.column_name('syllable_duration'),
formants_track)
## Get UNISYN postlexical rules for all vowels
## iterate through word-level attributes
for prop in c.hierarchy.type_properties.items():
if prop[0] == 'word':
## UNISYN postlex rules are pre-pended with
## 'do_', so look for attributes with this
## also search rule name for X-SAMPA output
for attr in prop[1]:
try:
rule = re.findall('do_.*|unisynprimstressedvowel.*', attr[0])[0]
q = q.columns(getattr(c.phone.word, rule).column_name(rule))
except IndexError:
continue
## Get speaker metadata columns
for sp, _ in c.hierarchy.speaker_properties:
if sp == 'name':
continue
q = q.columns(getattr(c.phone.speaker, sp).column_name(sp))
## Get the phonological transcription labels if using the Buckeye corpus
if c.hierarchy.has_token_property('word', 'surface_transcription'):
print('getting underlying and surface transcriptions')
q = q.columns(
c.phone.word.transcription.column_name('word_underlying_transcription'),
c.phone.word.surface_transcription.column_name('word_surface_transcription'))
## Export the query
## as a CSV
print("Writing CSV")
q.to_csv(csv_path)
end = time.time()
time_taken = time.time() - beg
print('Query took: {}'.format(end - beg))
print("Results for query written to {}".format(csv_path))
common.save_performance_benchmark(config, 'formant_tracks_export', time_taken)
## Parse and process command line arguments
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('corpus_name', help='Name of the corpus')
parser.add_argument('-r', '--reset', help="Reset the corpus", action='store_true')
parser.add_argument('-f', '--formant_reset', help="Reset formant measures", action = 'store_true', default=False)
parser.add_argument('-d', '--docker', help="This script is being called from Docker", action='store_true')
parser.add_argument('-s', '--subset', help="Use pre-defined vowel subset versus vowels defined in config", action='store_true', default=False)
args = parser.parse_args()
corpus_name = args.corpus_name
reset = args.reset
docker = args.docker
reset_formants = args.formant_reset
vowel_subset = args.subset
directories = [x for x in os.listdir(base_dir) if os.path.isdir(x) and x != 'Common']
## Check the corpus has a directory including
## a YAML file
if args.corpus_name not in directories:
print(
'The corpus {0} does not have a directory (available: {1}). Please make it with a {0}.yaml file inside.'.format(
args.corpus_name, ', '.join(directories)))
sys.exit(1)
corpus_conf = common.load_config(corpus_name)
print('Processing...')
ignored_speakers = corpus_conf.get('ignore_speakers', [])
if reset:
common.reset(corpus_name)
ip = common.server_ip
if docker:
ip = common.docker_ip
with ensure_local_database_running(corpus_name, ip=ip, port=common.server_port, token=common.load_token()) as params:
print(params)
config = CorpusConfig(corpus_name, **params)
config.formant_source = 'praat'
# Common set up
## Check whether the corpus has already been imported (i.e., has a database file);
## if not, import the corpus using the audio and transcript files
common.loading(config, corpus_conf['corpus_directory'], corpus_conf['input_format'])
## Add lexical, speaker, and linguistic/acoustic enrichments to the database
common.lexicon_enrichment(config, corpus_conf['unisyn_spade_directory'], corpus_conf['dialect_code'])
common.speaker_enrichment(config, corpus_conf['speaker_enrichment_file'])
common.basic_enrichment(config, corpus_conf['vowel_inventory'] + corpus_conf['extra_syllabic_segments'], corpus_conf['pauses'])
## Check if the YAML contains a path to the vowel prototypes file;
## if not, use the default path (inside the corpus directory)
vowel_prototypes_path = corpus_conf.get('vowel_prototypes_path','')
if not vowel_prototypes_path:
vowel_prototypes_path = os.path.join(base_dir, corpus_name, '{}_prototypes.csv'.format(corpus_name))
## Call formant track function defined above
formant_track_export(config, corpus_name, corpus_conf['corpus_directory'], corpus_conf['dialect_code'],
corpus_conf['speakers'], corpus_conf['vowel_inventory'], vowel_prototypes_path = vowel_prototypes_path,
reset_formants = reset_formants, vowel_subset = vowel_subset, ignored_speakers = ignored_speakers)
print('Finishing up!')