From e08366d586f804bbeb367a1d9c07def172902723 Mon Sep 17 00:00:00 2001 From: Shahin Date: Thu, 12 Jul 2018 14:16:59 -0700 Subject: [PATCH] Diarization [(#1556)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1556) Diarization --- speech/snippets/README.rst | 1 + speech/snippets/beta_snippets.py | 33 +++++++++++++++++++++++++++ speech/snippets/beta_snippets_test.py | 12 +++++++++- speech/snippets/requirements.txt | 2 +- 4 files changed, 46 insertions(+), 2 deletions(-) diff --git a/speech/snippets/README.rst b/speech/snippets/README.rst index 05a20ec67c0e..8efaa01472b0 100644 --- a/speech/snippets/README.rst +++ b/speech/snippets/README.rst @@ -230,6 +230,7 @@ To run this sample: python beta_snippets.py enhanced-model resources/commercial_mono.wav python beta_snippets.py metadata resources/commercial_mono.wav python beta_snippets.py punctuation resources/commercial_mono.wav + python beta_snippets.py diarization resources/commercial_mono.wav positional arguments: command diff --git a/speech/snippets/beta_snippets.py b/speech/snippets/beta_snippets.py index 95a9d8405e96..a518307f7871 100644 --- a/speech/snippets/beta_snippets.py +++ b/speech/snippets/beta_snippets.py @@ -21,6 +21,7 @@ python beta_snippets.py enhanced-model resources/commercial_mono.wav python beta_snippets.py metadata resources/commercial_mono.wav python beta_snippets.py punctuation resources/commercial_mono.wav + python beta_snippets.py diarization resources/commercial_mono.wav """ import argparse @@ -126,6 +127,36 @@ def transcribe_file_with_auto_punctuation(path): # [END speech_transcribe_file_with_auto_punctuation] +# [START speech_transcribe_diarization] +def transcribe_file_with_diarization(path): + """Transcribe the given audio file synchronously with diarization.""" + client = speech.SpeechClient() + + with open(path, 'rb') as audio_file: + content = audio_file.read() + + audio = speech.types.RecognitionAudio(content=content) + + config = speech.types.RecognitionConfig( + encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=16000, + language_code='en-US', + enable_speaker_diarization=True, + diarization_speaker_count=2) + + print('Waiting for operation to complete...') + response = client.recognize(config, audio) + + for i, result in enumerate(response.results): + alternative = result.alternatives[0] + print('-' * 20) + print('First alternative of result {}: {}' + .format(i, alternative.transcript)) + print('Speaker Tag for the first word: {}' + .format(alternative.words[0].speaker_tag)) +# [END speech_transcribe_diarization] + + if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, @@ -142,3 +173,5 @@ def transcribe_file_with_auto_punctuation(path): transcribe_file_with_metadata(args.path) elif args.command == 'punctuation': transcribe_file_with_auto_punctuation(args.path) + elif args.command == 'diarization': + transcribe_file_with_diarization(args.path) diff --git a/speech/snippets/beta_snippets_test.py b/speech/snippets/beta_snippets_test.py index a241a435d6eb..ef78f941d67a 100644 --- a/speech/snippets/beta_snippets_test.py +++ b/speech/snippets/beta_snippets_test.py @@ -14,7 +14,9 @@ import os from beta_snippets import ( - transcribe_file_with_auto_punctuation, transcribe_file_with_enhanced_model, + transcribe_file_with_auto_punctuation, + transcribe_file_with_diarization, + transcribe_file_with_enhanced_model, transcribe_file_with_metadata) RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') @@ -42,3 +44,11 @@ def test_transcribe_file_with_auto_punctuation(capsys): out, _ = capsys.readouterr() assert 'Okay. Sure.' in out + + +def test_transcribe_diarization(capsys): + transcribe_file_with_diarization( + os.path.join(RESOURCES, 'Google_Gnome.wav')) + out, err = capsys.readouterr() + + assert 'OK Google stream stranger things from Netflix to my TV' in out diff --git a/speech/snippets/requirements.txt b/speech/snippets/requirements.txt index 87b74e0d7334..c8b0bf24f01f 100644 --- a/speech/snippets/requirements.txt +++ b/speech/snippets/requirements.txt @@ -1 +1 @@ -google-cloud-speech==0.33.0 +google-cloud-speech==0.35.0