Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added the sample for Word Level Confidence #1567

Merged
merged 4 commits into from
Jul 16, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions speech/cloud-client/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ To run this sample:
python beta_snippets.py diarization resources/commercial_mono.wav
python beta_snippets.py multi-channel resources/commercial_mono.wav
python beta_snippets.py multi-language resources/multi.wav en-US es
python beta_snippets.py word-level-conf resources/commercial_mono.wav

positional arguments:
command
Expand Down
42 changes: 40 additions & 2 deletions speech/cloud-client/beta_snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
python beta_snippets.py diarization resources/commercial_mono.wav
python beta_snippets.py multi-channel resources/commercial_mono.wav
python beta_snippets.py multi-language resources/multi.wav en-US es
python beta_snippets.py word-level-conf resources/commercial_mono.wav
"""

import argparse
Expand Down Expand Up @@ -240,6 +241,39 @@ def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang):
# [END speech_transcribe_multilanguage]


def transcribe_file_with_word_level_confidence(speech_file):
"""Transcribe the given audio file synchronously with
word level confidence."""
# [START speech_transcribe_word_level_confidence]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

# TODO(developer): Uncomment and set to a path to your audio file.
# speech_file = 'path/to/file.wav'

with open(speech_file, 'rb') as audio_file:
content = audio_file.read()

audio = speech.types.RecognitionAudio(content=content)

config = speech.types.RecognitionConfig(
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US',
enable_word_confidence=True)

response = client.recognize(config, audio)

for i, result in enumerate(response.results):
alternative = result.alternatives[0]
print('-' * 20)
print('First alternative of result {}'.format(i))
print(u'Transcript: {}'.format(alternative.transcript))
print(u'First Word and Confidence: ({}, {})'.format(
alternative.words[0].word, alternative.words[0].confidence))
# [END speech_transcribe_word_level_confidence]


if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=__doc__,
Expand All @@ -248,9 +282,11 @@ def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang):
parser.add_argument(
'path', help='File for audio file to be recognized')
parser.add_argument(
'first', help='First language in audio file to be recognized')
'first', help='First language in audio file to be recognized',
nargs='?')
parser.add_argument(
'second', help='Second language in audio file to be recognized')
'second', help='Second language in audio file to be recognized',
nargs='?')

args = parser.parse_args()

Expand All @@ -266,3 +302,5 @@ def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang):
transcribe_file_with_multichannel(args.path)
elif args.command == 'multi-language':
transcribe_file_with_multilanguage(args.path, args.first, args.second)
elif args.command == 'word-level-conf':
transcribe_file_with_word_level_confidence(args.path)
11 changes: 10 additions & 1 deletion speech/cloud-client/beta_snippets_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
transcribe_file_with_enhanced_model,
transcribe_file_with_metadata,
transcribe_file_with_multichannel,
transcribe_file_with_multilanguage)
transcribe_file_with_multilanguage,
transcribe_file_with_word_level_confidence)

RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')

Expand Down Expand Up @@ -70,3 +71,11 @@ def test_transcribe_multilanguage_file(capsys):
out, err = capsys.readouterr()

assert 'how are you doing estoy bien e tu' in out


def test_transcribe_word_level_confidence(capsys):
transcribe_file_with_word_level_confidence(
os.path.join(RESOURCES, 'Google_Gnome.wav'))
out, err = capsys.readouterr()

assert 'OK Google stream stranger things from Netflix to my TV' in out