From 08371098d602448a4ebe677bcfcb417f23e7712b Mon Sep 17 00:00:00 2001 From: Yu-Han Liu Date: Mon, 31 Jul 2017 12:21:27 -0700 Subject: [PATCH] add word time offsets to async sample [(#1042)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1042) --- speech/snippets/transcribe_async.py | 12 +++++++++++- speech/snippets/transcribe_async_test.py | 11 +++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/speech/snippets/transcribe_async.py b/speech/snippets/transcribe_async.py index 9e5a416a5674..b25121217ff6 100644 --- a/speech/snippets/transcribe_async.py +++ b/speech/snippets/transcribe_async.py @@ -79,7 +79,8 @@ def transcribe_gcs(gcs_uri): config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.FLAC, sample_rate_hertz=16000, - language_code='en-US') + language_code='en-US', + enable_word_time_offsets=True) operation = client.long_running_recognize(config, audio) @@ -96,6 +97,15 @@ def transcribe_gcs(gcs_uri): for alternative in alternatives: print('Transcript: {}'.format(alternative.transcript)) print('Confidence: {}'.format(alternative.confidence)) + + for word_info in alternative.words: + word = word_info.word + start_time = word_info.start_time + end_time = word_info.end_time + print('Word: {}, start_time: {}, end_time: {}'.format( + word, + start_time.seconds + start_time.nanos * 1e-9, + end_time.seconds + end_time.nanos * 1e-9)) # [END def_transcribe_gcs] diff --git a/speech/snippets/transcribe_async_test.py b/speech/snippets/transcribe_async_test.py index 7d66747eb446..286434d06096 100644 --- a/speech/snippets/transcribe_async_test.py +++ b/speech/snippets/transcribe_async_test.py @@ -33,3 +33,14 @@ def test_transcribe_gcs(capsys): out, err = capsys.readouterr() assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I) + + +def test_transcribe_gcs_word_time_offsets(capsys): + transcribe_async.transcribe_gcs( + 'gs://python-docs-samples-tests/speech/audio.flac') + out, err = capsys.readouterr() + + match = re.search(r'Bridge, start_time: ([0-9.]+)', out, re.DOTALL | re.I) + time = float(match.group(1)) + + assert time > 0