From 1b7f973531c19ec5b09e43ad047bf43a088d4f4f Mon Sep 17 00:00:00 2001 From: happyhuman Date: Tue, 3 Jul 2018 09:42:41 -0700 Subject: [PATCH 01/13] Added diarization sample --- speech/cloud-client/transcribe_diarization.py | 103 ++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 speech/cloud-client/transcribe_diarization.py diff --git a/speech/cloud-client/transcribe_diarization.py b/speech/cloud-client/transcribe_diarization.py new file mode 100644 index 000000000000..9766187af893 --- /dev/null +++ b/speech/cloud-client/transcribe_diarization.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 + +# Copyright 2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r"""Google Cloud Speech API sample that demonstrates how to request speaker diarization. + +Example usage: + python transcribe_diarization.py \ + resources/Google_Gnome.wav + python transcribe_diarization.py \ + gs://cloud-ml-api-e2e-testing/speech/stereo_audio.wav +""" + +import argparse + + +# [START speech_transcribe_diarization] +def speech_transcribe_diarization(speech_file): + """Transcribe the given audio file synchronously with + the selected model.""" + from google.cloud import speech_v1p1beta1 as speech + client = speech.SpeechClient() + + with open(speech_file, 'rb') as audio_file: + content = audio_file.read() + + audio = speech.types.RecognitionAudio(content=content) + + config = speech.types.RecognitionConfig( + encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=8000, + language_code='en-US', + audio_channel_count=2, + enable_separate_recognition_per_channel=True, + enable_automatic_punctuation=True) + + response = client.recognize(config, audio) + + for i, result in enumerate(response.results): + alternative = result.alternatives[0] + print('-' * 20) + print('First alternative of result {}'.format(i)) + + +# [END speech_transcribe_diarization] + + +# [START speech_transcribe_diarization_gcs] +def speech_transcribe_diarization_gcs(gcs_uri): + """Transcribe the given audio file asynchronously with + the selected model.""" + from google.cloud import speech_v1p1beta1 as speech + client = speech.SpeechClient() + + audio = speech.types.RecognitionAudio(uri=gcs_uri) + + config = speech.types.RecognitionConfig( + encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=8000, + language_code='en-US', + enable_speaker_diarization=True, + diarization_speaker_count=2, + audio_channel_count=2, + enable_automatic_punctuation=True) + + print('Waiting for operation to complete...') + response = client.recognize(config, audio) + + for i, result in enumerate(response.results): + alternative = result.alternatives[0] + print('-' * 20) + print('First alternative of result {}'.format(i)) + print(u'Transcript: {}'.format(alternative.transcript)) + print(u'First Word Speaker Tag: {}'.format( + alternative.words[0].speaker_tag)) + + +# [END speech_transcribe_diarization_gcs] + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument( + 'path', help='File or GCS path for audio file to be recognized') + + args = parser.parse_args() + + if args.path.startswith('gs://'): + speech_transcribe_diarization_gcs(args.path) + else: + speech_transcribe_diarization(args.path) \ No newline at end of file From de21a5d5390e38aafd1c33d6ab6c1fa0ed6993cd Mon Sep 17 00:00:00 2001 From: happyhuman Date: Tue, 3 Jul 2018 10:03:36 -0700 Subject: [PATCH 02/13] Added unit test --- speech/cloud-client/transcribe_diarization.py | 108 +++++++++--------- .../transcribe_diarization_test.py | 35 ++++++ 2 files changed, 89 insertions(+), 54 deletions(-) create mode 100644 speech/cloud-client/transcribe_diarization_test.py diff --git a/speech/cloud-client/transcribe_diarization.py b/speech/cloud-client/transcribe_diarization.py index 9766187af893..75060c17ffa0 100644 --- a/speech/cloud-client/transcribe_diarization.py +++ b/speech/cloud-client/transcribe_diarization.py @@ -28,30 +28,30 @@ # [START speech_transcribe_diarization] def speech_transcribe_diarization(speech_file): - """Transcribe the given audio file synchronously with - the selected model.""" - from google.cloud import speech_v1p1beta1 as speech - client = speech.SpeechClient() + """Transcribe the given audio file synchronously with + the selected model.""" + from google.cloud import speech_v1p1beta1 as speech + client = speech.SpeechClient() - with open(speech_file, 'rb') as audio_file: - content = audio_file.read() + with open(speech_file, 'rb') as audio_file: + content = audio_file.read() - audio = speech.types.RecognitionAudio(content=content) + audio = speech.types.RecognitionAudio(content=content) - config = speech.types.RecognitionConfig( - encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, - sample_rate_hertz=8000, - language_code='en-US', - audio_channel_count=2, - enable_separate_recognition_per_channel=True, - enable_automatic_punctuation=True) + config = speech.types.RecognitionConfig( + encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=8000, + language_code='en-US', + audio_channel_count=2, + enable_separate_recognition_per_channel=True, + enable_automatic_punctuation=True) - response = client.recognize(config, audio) + response = client.recognize(config, audio) - for i, result in enumerate(response.results): - alternative = result.alternatives[0] - print('-' * 20) - print('First alternative of result {}'.format(i)) + for i, result in enumerate(response.results): + alternative = result.alternatives[0] + print('-' * 20) + print('First alternative of result {}'.format(i)) # [END speech_transcribe_diarization] @@ -59,45 +59,45 @@ def speech_transcribe_diarization(speech_file): # [START speech_transcribe_diarization_gcs] def speech_transcribe_diarization_gcs(gcs_uri): - """Transcribe the given audio file asynchronously with - the selected model.""" - from google.cloud import speech_v1p1beta1 as speech - client = speech.SpeechClient() - - audio = speech.types.RecognitionAudio(uri=gcs_uri) - - config = speech.types.RecognitionConfig( - encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, - sample_rate_hertz=8000, - language_code='en-US', - enable_speaker_diarization=True, - diarization_speaker_count=2, - audio_channel_count=2, - enable_automatic_punctuation=True) - - print('Waiting for operation to complete...') - response = client.recognize(config, audio) - - for i, result in enumerate(response.results): - alternative = result.alternatives[0] - print('-' * 20) - print('First alternative of result {}'.format(i)) - print(u'Transcript: {}'.format(alternative.transcript)) - print(u'First Word Speaker Tag: {}'.format( - alternative.words[0].speaker_tag)) + """Transcribe the given audio file asynchronously with + the selected model.""" + from google.cloud import speech_v1p1beta1 as speech + client = speech.SpeechClient() + + audio = speech.types.RecognitionAudio(uri=gcs_uri) + + config = speech.types.RecognitionConfig( + encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=8000, + language_code='en-US', + enable_speaker_diarization=True, + diarization_speaker_count=2, + audio_channel_count=2, + enable_automatic_punctuation=True) + + print('Waiting for operation to complete...') + response = client.recognize(config, audio) + + for i, result in enumerate(response.results): + alternative = result.alternatives[0] + print('-' * 20) + print('First alternative of result {}'.format(i)) + print(u'Transcript: {}'.format(alternative.transcript)) + print(u'First Word Speaker Tag: {}'.format( + alternative.words[0].speaker_tag)) # [END speech_transcribe_diarization_gcs] if __name__ == '__main__': - parser = argparse.ArgumentParser( - description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument( - 'path', help='File or GCS path for audio file to be recognized') + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument( + 'path', help='File or GCS path for audio file to be recognized') - args = parser.parse_args() + args = parser.parse_args() - if args.path.startswith('gs://'): - speech_transcribe_diarization_gcs(args.path) - else: - speech_transcribe_diarization(args.path) \ No newline at end of file + if args.path.startswith('gs://'): + speech_transcribe_diarization_gcs(args.path) + else: + speech_transcribe_diarization(args.path) \ No newline at end of file diff --git a/speech/cloud-client/transcribe_diarization_test.py b/speech/cloud-client/transcribe_diarization_test.py new file mode 100644 index 000000000000..bdcf238badab --- /dev/null +++ b/speech/cloud-client/transcribe_diarization_test.py @@ -0,0 +1,35 @@ +# Copyright 2016, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re + +import transcribe_diarization + +RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') + + +def test_transcribe_model_selection_file(capsys): + transcribe_diarization.speech_transcribe_diarization( + os.path.join(RESOURCES, 'Google_Gnome.wav')) + out, err = capsys.readouterr() + + assert re.search(r'the weather outside is sunny', out, re.DOTALL | re.I) + + +def test_transcribe_model_selection_gcs(capsys): + transcribe_diarization.speech_transcribe_diarization_gcs( + 'gs://cloud-samples-tests/speech/Google_Gnome.wav') + out, err = capsys.readouterr() + + assert re.search(r'the weather outside is sunny', out, re.DOTALL | re.I) From 7479ad274bacf4c8bebc7990c6e71b3df90cd34d Mon Sep 17 00:00:00 2001 From: happyhuman Date: Tue, 3 Jul 2018 11:25:39 -0700 Subject: [PATCH 03/13] Code cleanup --- speech/cloud-client/transcribe_diarization.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/speech/cloud-client/transcribe_diarization.py b/speech/cloud-client/transcribe_diarization.py index 75060c17ffa0..e141cfd7791a 100644 --- a/speech/cloud-client/transcribe_diarization.py +++ b/speech/cloud-client/transcribe_diarization.py @@ -28,8 +28,7 @@ # [START speech_transcribe_diarization] def speech_transcribe_diarization(speech_file): - """Transcribe the given audio file synchronously with - the selected model.""" + """Transcribe the given audio file synchronously with diarization.""" from google.cloud import speech_v1p1beta1 as speech client = speech.SpeechClient() @@ -43,8 +42,7 @@ def speech_transcribe_diarization(speech_file): sample_rate_hertz=8000, language_code='en-US', audio_channel_count=2, - enable_separate_recognition_per_channel=True, - enable_automatic_punctuation=True) + enable_separate_recognition_per_channel=True) response = client.recognize(config, audio) @@ -52,15 +50,12 @@ def speech_transcribe_diarization(speech_file): alternative = result.alternatives[0] print('-' * 20) print('First alternative of result {}'.format(i)) - - # [END speech_transcribe_diarization] # [START speech_transcribe_diarization_gcs] def speech_transcribe_diarization_gcs(gcs_uri): - """Transcribe the given audio file asynchronously with - the selected model.""" + """Transcribe the given audio file asynchronously with diarization.""" from google.cloud import speech_v1p1beta1 as speech client = speech.SpeechClient() @@ -72,8 +67,7 @@ def speech_transcribe_diarization_gcs(gcs_uri): language_code='en-US', enable_speaker_diarization=True, diarization_speaker_count=2, - audio_channel_count=2, - enable_automatic_punctuation=True) + audio_channel_count=2) print('Waiting for operation to complete...') response = client.recognize(config, audio) @@ -85,10 +79,9 @@ def speech_transcribe_diarization_gcs(gcs_uri): print(u'Transcript: {}'.format(alternative.transcript)) print(u'First Word Speaker Tag: {}'.format( alternative.words[0].speaker_tag)) - - # [END speech_transcribe_diarization_gcs] + if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) From 8f4a8d80670bc31d13125cfe2615e1c2ddca487c Mon Sep 17 00:00:00 2001 From: happyhuman Date: Tue, 10 Jul 2018 11:58:25 -0700 Subject: [PATCH 04/13] Cleaned up Diarization code and fixed the tests. --- speech/cloud-client/transcribe_diarization.py | 23 +++++++++---------- .../transcribe_diarization_test.py | 10 ++++---- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/speech/cloud-client/transcribe_diarization.py b/speech/cloud-client/transcribe_diarization.py index e141cfd7791a..7fd17693f018 100644 --- a/speech/cloud-client/transcribe_diarization.py +++ b/speech/cloud-client/transcribe_diarization.py @@ -39,17 +39,19 @@ def speech_transcribe_diarization(speech_file): config = speech.types.RecognitionConfig( encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, - sample_rate_hertz=8000, + sample_rate_hertz=16000, language_code='en-US', - audio_channel_count=2, - enable_separate_recognition_per_channel=True) + enable_speaker_diarization=True, + diarization_speaker_count=2) + print('Waiting for operation to complete...') response = client.recognize(config, audio) for i, result in enumerate(response.results): alternative = result.alternatives[0] print('-' * 20) - print('First alternative of result {}'.format(i)) + print('First alternative of result {}: {}'.format(i, alternative.transcript)) + print('Speaker Tag for the first word: {}'.format(alternative.words[0].speaker_tag)) # [END speech_transcribe_diarization] @@ -63,11 +65,10 @@ def speech_transcribe_diarization_gcs(gcs_uri): config = speech.types.RecognitionConfig( encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, - sample_rate_hertz=8000, + sample_rate_hertz=16000, language_code='en-US', enable_speaker_diarization=True, - diarization_speaker_count=2, - audio_channel_count=2) + diarization_speaker_count=2) print('Waiting for operation to complete...') response = client.recognize(config, audio) @@ -75,10 +76,8 @@ def speech_transcribe_diarization_gcs(gcs_uri): for i, result in enumerate(response.results): alternative = result.alternatives[0] print('-' * 20) - print('First alternative of result {}'.format(i)) - print(u'Transcript: {}'.format(alternative.transcript)) - print(u'First Word Speaker Tag: {}'.format( - alternative.words[0].speaker_tag)) + print('First alternative of result {}: {}'.format(i, alternative.transcript)) + print('Speaker Tag for the first word: {}'.format(alternative.words[0].speaker_tag)) # [END speech_transcribe_diarization_gcs] @@ -93,4 +92,4 @@ def speech_transcribe_diarization_gcs(gcs_uri): if args.path.startswith('gs://'): speech_transcribe_diarization_gcs(args.path) else: - speech_transcribe_diarization(args.path) \ No newline at end of file + speech_transcribe_diarization(args.path) diff --git a/speech/cloud-client/transcribe_diarization_test.py b/speech/cloud-client/transcribe_diarization_test.py index bdcf238badab..b85b22f99067 100644 --- a/speech/cloud-client/transcribe_diarization_test.py +++ b/speech/cloud-client/transcribe_diarization_test.py @@ -19,17 +19,19 @@ RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') -def test_transcribe_model_selection_file(capsys): +def test_transcribe_diarization(capsys): transcribe_diarization.speech_transcribe_diarization( os.path.join(RESOURCES, 'Google_Gnome.wav')) out, err = capsys.readouterr() - assert re.search(r'the weather outside is sunny', out, re.DOTALL | re.I) + assert re.search(r'OK Google stream stranger things from Netflix to my TV', out, re.DOTALL | re.I) + assert re.search(r'Speaker Tag', out, re.DOTALL | re.I) -def test_transcribe_model_selection_gcs(capsys): +def test_transcribe_diarization_gcs(capsys): transcribe_diarization.speech_transcribe_diarization_gcs( 'gs://cloud-samples-tests/speech/Google_Gnome.wav') out, err = capsys.readouterr() - assert re.search(r'the weather outside is sunny', out, re.DOTALL | re.I) + assert re.search(r'OK Google stream stranger things from Netflix to my TV', out, re.DOTALL | re.I) + assert re.search(r'Speaker Tag', out, re.DOTALL | re.I) From 7c96b39a7435716a202f1e4b1e6c544ee3fe3711 Mon Sep 17 00:00:00 2001 From: happyhuman Date: Tue, 10 Jul 2018 12:05:43 -0700 Subject: [PATCH 05/13] Stylization cleanup --- speech/cloud-client/transcribe_diarization.py | 17 +++++++++++------ .../cloud-client/transcribe_diarization_test.py | 12 +++++++----- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/speech/cloud-client/transcribe_diarization.py b/speech/cloud-client/transcribe_diarization.py index 7fd17693f018..7a3ade5019d3 100644 --- a/speech/cloud-client/transcribe_diarization.py +++ b/speech/cloud-client/transcribe_diarization.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2017 Google Inc. All Rights Reserved. +# Copyright 2017 Google LLC. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -50,8 +50,10 @@ def speech_transcribe_diarization(speech_file): for i, result in enumerate(response.results): alternative = result.alternatives[0] print('-' * 20) - print('First alternative of result {}: {}'.format(i, alternative.transcript)) - print('Speaker Tag for the first word: {}'.format(alternative.words[0].speaker_tag)) + print('First alternative of result {}: {}' + .format(i, alternative.transcript)) + print('Speaker Tag for the first word: {}' + .format(alternative.words[0].speaker_tag)) # [END speech_transcribe_diarization] @@ -76,14 +78,17 @@ def speech_transcribe_diarization_gcs(gcs_uri): for i, result in enumerate(response.results): alternative = result.alternatives[0] print('-' * 20) - print('First alternative of result {}: {}'.format(i, alternative.transcript)) - print('Speaker Tag for the first word: {}'.format(alternative.words[0].speaker_tag)) + print('First alternative of result {}: {}' + .format(i, alternative.transcript)) + print('Speaker Tag for the first word: {}' + .format(alternative.words[0].speaker_tag)) # [END speech_transcribe_diarization_gcs] if __name__ == '__main__': parser = argparse.ArgumentParser( - description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( 'path', help='File or GCS path for audio file to be recognized') diff --git a/speech/cloud-client/transcribe_diarization_test.py b/speech/cloud-client/transcribe_diarization_test.py index b85b22f99067..7a8e85c52612 100644 --- a/speech/cloud-client/transcribe_diarization_test.py +++ b/speech/cloud-client/transcribe_diarization_test.py @@ -1,4 +1,4 @@ -# Copyright 2016, Google, Inc. +# Copyright 2016, Google, LLC. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,6 +17,8 @@ import transcribe_diarization RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') +OUTPUT1 = r'OK Google stream stranger things from Netflix to my TV' +OUTPUT2 = r'Speaker Tag' def test_transcribe_diarization(capsys): @@ -24,8 +26,8 @@ def test_transcribe_diarization(capsys): os.path.join(RESOURCES, 'Google_Gnome.wav')) out, err = capsys.readouterr() - assert re.search(r'OK Google stream stranger things from Netflix to my TV', out, re.DOTALL | re.I) - assert re.search(r'Speaker Tag', out, re.DOTALL | re.I) + assert re.search(OUTPUT1, out, re.DOTALL | re.I) + assert re.search(OUTPUT2, out, re.DOTALL | re.I) def test_transcribe_diarization_gcs(capsys): @@ -33,5 +35,5 @@ def test_transcribe_diarization_gcs(capsys): 'gs://cloud-samples-tests/speech/Google_Gnome.wav') out, err = capsys.readouterr() - assert re.search(r'OK Google stream stranger things from Netflix to my TV', out, re.DOTALL | re.I) - assert re.search(r'Speaker Tag', out, re.DOTALL | re.I) + assert re.search(OUTPUT1, out, re.DOTALL | re.I) + assert re.search(OUTPUT2, out, re.DOTALL | re.I) From cf2dc43e6747cd3df8e499b079ac9e7b08e85e55 Mon Sep 17 00:00:00 2001 From: happyhuman Date: Tue, 10 Jul 2018 12:11:15 -0700 Subject: [PATCH 06/13] Stylization cleanup --- speech/cloud-client/transcribe_diarization.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/speech/cloud-client/transcribe_diarization.py b/speech/cloud-client/transcribe_diarization.py index 7a3ade5019d3..905953e0fcfd 100644 --- a/speech/cloud-client/transcribe_diarization.py +++ b/speech/cloud-client/transcribe_diarization.py @@ -14,7 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -r"""Google Cloud Speech API sample that demonstrates how to request speaker diarization. +r"""Google Cloud Speech API sample that demonstrates how to request +speaker diarization. Example usage: python transcribe_diarization.py \ From 2891e7eb538470fdfee58ce33f44eb5eba8efed5 Mon Sep 17 00:00:00 2001 From: happyhuman Date: Tue, 10 Jul 2018 15:19:33 -0700 Subject: [PATCH 07/13] Updated library version --- speech/cloud-client/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/speech/cloud-client/requirements.txt b/speech/cloud-client/requirements.txt index 87b74e0d7334..c8b0bf24f01f 100644 --- a/speech/cloud-client/requirements.txt +++ b/speech/cloud-client/requirements.txt @@ -1 +1 @@ -google-cloud-speech==0.33.0 +google-cloud-speech==0.35.0 From 3607c47af5bbfdc20e02856879c8885314a28482 Mon Sep 17 00:00:00 2001 From: happyhuman Date: Tue, 10 Jul 2018 15:45:07 -0700 Subject: [PATCH 08/13] Updates based on Noah's suggestions --- speech/cloud-client/transcribe_diarization.py | 4 ++-- speech/cloud-client/transcribe_diarization_test.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/speech/cloud-client/transcribe_diarization.py b/speech/cloud-client/transcribe_diarization.py index 905953e0fcfd..3de921bd2f5c 100644 --- a/speech/cloud-client/transcribe_diarization.py +++ b/speech/cloud-client/transcribe_diarization.py @@ -1,6 +1,6 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python -# Copyright 2017 Google LLC. All Rights Reserved. +# Copyright 2018 Google LLC. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/speech/cloud-client/transcribe_diarization_test.py b/speech/cloud-client/transcribe_diarization_test.py index 7a8e85c52612..7bb834ba3285 100644 --- a/speech/cloud-client/transcribe_diarization_test.py +++ b/speech/cloud-client/transcribe_diarization_test.py @@ -1,4 +1,4 @@ -# Copyright 2016, Google, LLC. +# Copyright 2018, Google, LLC. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,8 +17,8 @@ import transcribe_diarization RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') -OUTPUT1 = r'OK Google stream stranger things from Netflix to my TV' -OUTPUT2 = r'Speaker Tag' +OUTPUT1 = 'OK Google stream stranger things from Netflix to my TV' +OUTPUT2 = 'Speaker Tag' def test_transcribe_diarization(capsys): From 321cc981b51c7fdf2381e95a49e92082d6180398 Mon Sep 17 00:00:00 2001 From: happyhuman Date: Wed, 11 Jul 2018 10:44:12 -0700 Subject: [PATCH 09/13] Updated README.rst.in and README.rst --- speech/cloud-client/README.rst | 34 +++++++++++++++++++++++++++++++ speech/cloud-client/README.rst.in | 3 +++ 2 files changed, 37 insertions(+) diff --git a/speech/cloud-client/README.rst b/speech/cloud-client/README.rst index 05a20ec67c0e..c304c18799be 100644 --- a/speech/cloud-client/README.rst +++ b/speech/cloud-client/README.rst @@ -240,6 +240,40 @@ To run this sample: +Diarization ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=speech/cloud-client/transcribe_diarization.py,speech/cloud-client/README.rst + + + + +To run this sample: + +.. code-block:: bash + + $ python transcribe_diarization.py + + usage: transcribe_diarization.py [-h] path + + Google Cloud Speech API sample that demonstrates how to request + speaker diarization. + + Example usage: + python transcribe_diarization.py \ + resources/Google_Gnome.wav + python transcribe_diarization.py \ + gs://cloud-ml-api-e2e-testing/speech/stereo_audio.wav + + positional arguments: + path File or GCS path for audio file to be recognized + + optional arguments: + -h, --help show this help message and exit + + + The client library diff --git a/speech/cloud-client/README.rst.in b/speech/cloud-client/README.rst.in index 18aa61f0cc6f..a6d9ded52150 100644 --- a/speech/cloud-client/README.rst.in +++ b/speech/cloud-client/README.rst.in @@ -37,6 +37,9 @@ samples: - name: Beta Samples file: beta_snippets.py show_help: true +- name: Diarization + file: transcribe_diarization.py + show_help: true cloud_client_library: true From 544304597a8f78a79cd27115e1aa00874cb94c77 Mon Sep 17 00:00:00 2001 From: happyhuman Date: Wed, 11 Jul 2018 10:48:18 -0700 Subject: [PATCH 10/13] Undoing the updates of README.rst.in and README.rst --- speech/cloud-client/README.rst | 34 ------------------------------- speech/cloud-client/README.rst.in | 3 --- 2 files changed, 37 deletions(-) diff --git a/speech/cloud-client/README.rst b/speech/cloud-client/README.rst index c304c18799be..05a20ec67c0e 100644 --- a/speech/cloud-client/README.rst +++ b/speech/cloud-client/README.rst @@ -240,40 +240,6 @@ To run this sample: -Diarization -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -.. image:: https://gstatic.com/cloudssh/images/open-btn.png - :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=speech/cloud-client/transcribe_diarization.py,speech/cloud-client/README.rst - - - - -To run this sample: - -.. code-block:: bash - - $ python transcribe_diarization.py - - usage: transcribe_diarization.py [-h] path - - Google Cloud Speech API sample that demonstrates how to request - speaker diarization. - - Example usage: - python transcribe_diarization.py \ - resources/Google_Gnome.wav - python transcribe_diarization.py \ - gs://cloud-ml-api-e2e-testing/speech/stereo_audio.wav - - positional arguments: - path File or GCS path for audio file to be recognized - - optional arguments: - -h, --help show this help message and exit - - - The client library diff --git a/speech/cloud-client/README.rst.in b/speech/cloud-client/README.rst.in index a6d9ded52150..18aa61f0cc6f 100644 --- a/speech/cloud-client/README.rst.in +++ b/speech/cloud-client/README.rst.in @@ -37,9 +37,6 @@ samples: - name: Beta Samples file: beta_snippets.py show_help: true -- name: Diarization - file: transcribe_diarization.py - show_help: true cloud_client_library: true From c569529e2cfe8cb6fc7cbcc3dc7a9160a9abcc16 Mon Sep 17 00:00:00 2001 From: happyhuman Date: Wed, 11 Jul 2018 11:22:16 -0700 Subject: [PATCH 11/13] Put the beta sample in the right file. --- speech/cloud-client/README.rst | 1 + speech/cloud-client/beta_snippets.py | 34 ++++++ speech/cloud-client/beta_snippets_test.py | 14 ++- speech/cloud-client/transcribe_diarization.py | 101 ------------------ .../transcribe_diarization_test.py | 39 ------- 5 files changed, 47 insertions(+), 142 deletions(-) delete mode 100644 speech/cloud-client/transcribe_diarization.py delete mode 100644 speech/cloud-client/transcribe_diarization_test.py diff --git a/speech/cloud-client/README.rst b/speech/cloud-client/README.rst index 05a20ec67c0e..8efaa01472b0 100644 --- a/speech/cloud-client/README.rst +++ b/speech/cloud-client/README.rst @@ -230,6 +230,7 @@ To run this sample: python beta_snippets.py enhanced-model resources/commercial_mono.wav python beta_snippets.py metadata resources/commercial_mono.wav python beta_snippets.py punctuation resources/commercial_mono.wav + python beta_snippets.py diarization resources/commercial_mono.wav positional arguments: command diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py index 95a9d8405e96..32e63d2d72ac 100644 --- a/speech/cloud-client/beta_snippets.py +++ b/speech/cloud-client/beta_snippets.py @@ -21,6 +21,7 @@ python beta_snippets.py enhanced-model resources/commercial_mono.wav python beta_snippets.py metadata resources/commercial_mono.wav python beta_snippets.py punctuation resources/commercial_mono.wav + python beta_snippets.py diarization resources/commercial_mono.wav """ import argparse @@ -126,6 +127,36 @@ def transcribe_file_with_auto_punctuation(path): # [END speech_transcribe_file_with_auto_punctuation] +# [START speech_transcribe_diarization] +def transcribe_file_with_diarization(path): + """Transcribe the given audio file synchronously with diarization.""" + client = speech.SpeechClient() + + with open(path, 'rb') as audio_file: + content = audio_file.read() + + audio = speech.types.RecognitionAudio(content=content) + + config = speech.types.RecognitionConfig( + encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=16000, + language_code='en-US', + enable_speaker_diarization=True, + diarization_speaker_count=2) + + print('Waiting for operation to complete...') + response = client.recognize(config, audio) + + for i, result in enumerate(response.results): + alternative = result.alternatives[0] + print('-' * 20) + print('First alternative of result {}: {}' + .format(i, alternative.transcript)) + print('Speaker Tag for the first word: {}' + .format(alternative.words[0].speaker_tag)) +# [END speech_transcribe_diarization] + + if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, @@ -142,3 +173,6 @@ def transcribe_file_with_auto_punctuation(path): transcribe_file_with_metadata(args.path) elif args.command == 'punctuation': transcribe_file_with_auto_punctuation(args.path) + elif args.command == 'diarization': + speech_transcribe_diarization(args.path) + diff --git a/speech/cloud-client/beta_snippets_test.py b/speech/cloud-client/beta_snippets_test.py index a241a435d6eb..658e0793b55f 100644 --- a/speech/cloud-client/beta_snippets_test.py +++ b/speech/cloud-client/beta_snippets_test.py @@ -14,8 +14,10 @@ import os from beta_snippets import ( - transcribe_file_with_auto_punctuation, transcribe_file_with_enhanced_model, - transcribe_file_with_metadata) + transcribe_file_with_auto_punctuation, + transcribe_file_with_enhanced_model, + transcribe_file_with_metadata, + transcribe_file_with_diarization) RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') @@ -42,3 +44,11 @@ def test_transcribe_file_with_auto_punctuation(capsys): out, _ = capsys.readouterr() assert 'Okay. Sure.' in out + + +def test_transcribe_diarization(capsys): + transcribe_file_with_diarization( + os.path.join(RESOURCES, 'Google_Gnome.wav')) + out, err = capsys.readouterr() + + assert 'OK Google stream stranger things from Netflix to my TV' in out diff --git a/speech/cloud-client/transcribe_diarization.py b/speech/cloud-client/transcribe_diarization.py deleted file mode 100644 index 3de921bd2f5c..000000000000 --- a/speech/cloud-client/transcribe_diarization.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2018 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -r"""Google Cloud Speech API sample that demonstrates how to request -speaker diarization. - -Example usage: - python transcribe_diarization.py \ - resources/Google_Gnome.wav - python transcribe_diarization.py \ - gs://cloud-ml-api-e2e-testing/speech/stereo_audio.wav -""" - -import argparse - - -# [START speech_transcribe_diarization] -def speech_transcribe_diarization(speech_file): - """Transcribe the given audio file synchronously with diarization.""" - from google.cloud import speech_v1p1beta1 as speech - client = speech.SpeechClient() - - with open(speech_file, 'rb') as audio_file: - content = audio_file.read() - - audio = speech.types.RecognitionAudio(content=content) - - config = speech.types.RecognitionConfig( - encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, - sample_rate_hertz=16000, - language_code='en-US', - enable_speaker_diarization=True, - diarization_speaker_count=2) - - print('Waiting for operation to complete...') - response = client.recognize(config, audio) - - for i, result in enumerate(response.results): - alternative = result.alternatives[0] - print('-' * 20) - print('First alternative of result {}: {}' - .format(i, alternative.transcript)) - print('Speaker Tag for the first word: {}' - .format(alternative.words[0].speaker_tag)) -# [END speech_transcribe_diarization] - - -# [START speech_transcribe_diarization_gcs] -def speech_transcribe_diarization_gcs(gcs_uri): - """Transcribe the given audio file asynchronously with diarization.""" - from google.cloud import speech_v1p1beta1 as speech - client = speech.SpeechClient() - - audio = speech.types.RecognitionAudio(uri=gcs_uri) - - config = speech.types.RecognitionConfig( - encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, - sample_rate_hertz=16000, - language_code='en-US', - enable_speaker_diarization=True, - diarization_speaker_count=2) - - print('Waiting for operation to complete...') - response = client.recognize(config, audio) - - for i, result in enumerate(response.results): - alternative = result.alternatives[0] - print('-' * 20) - print('First alternative of result {}: {}' - .format(i, alternative.transcript)) - print('Speaker Tag for the first word: {}' - .format(alternative.words[0].speaker_tag)) -# [END speech_transcribe_diarization_gcs] - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument( - 'path', help='File or GCS path for audio file to be recognized') - - args = parser.parse_args() - - if args.path.startswith('gs://'): - speech_transcribe_diarization_gcs(args.path) - else: - speech_transcribe_diarization(args.path) diff --git a/speech/cloud-client/transcribe_diarization_test.py b/speech/cloud-client/transcribe_diarization_test.py deleted file mode 100644 index 7bb834ba3285..000000000000 --- a/speech/cloud-client/transcribe_diarization_test.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright 2018, Google, LLC. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import re - -import transcribe_diarization - -RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') -OUTPUT1 = 'OK Google stream stranger things from Netflix to my TV' -OUTPUT2 = 'Speaker Tag' - - -def test_transcribe_diarization(capsys): - transcribe_diarization.speech_transcribe_diarization( - os.path.join(RESOURCES, 'Google_Gnome.wav')) - out, err = capsys.readouterr() - - assert re.search(OUTPUT1, out, re.DOTALL | re.I) - assert re.search(OUTPUT2, out, re.DOTALL | re.I) - - -def test_transcribe_diarization_gcs(capsys): - transcribe_diarization.speech_transcribe_diarization_gcs( - 'gs://cloud-samples-tests/speech/Google_Gnome.wav') - out, err = capsys.readouterr() - - assert re.search(OUTPUT1, out, re.DOTALL | re.I) - assert re.search(OUTPUT2, out, re.DOTALL | re.I) From bdb3ed79ecfa85c98f12c04ee05846ab8fc9bf12 Mon Sep 17 00:00:00 2001 From: happyhuman Date: Wed, 11 Jul 2018 11:36:16 -0700 Subject: [PATCH 12/13] Fixed the import order --- speech/cloud-client/beta_snippets.py | 2 +- speech/cloud-client/beta_snippets_test.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py index 32e63d2d72ac..11cc906498ce 100644 --- a/speech/cloud-client/beta_snippets.py +++ b/speech/cloud-client/beta_snippets.py @@ -174,5 +174,5 @@ def transcribe_file_with_diarization(path): elif args.command == 'punctuation': transcribe_file_with_auto_punctuation(args.path) elif args.command == 'diarization': - speech_transcribe_diarization(args.path) + transcribe_file_with_diarization(args.path) diff --git a/speech/cloud-client/beta_snippets_test.py b/speech/cloud-client/beta_snippets_test.py index 658e0793b55f..ef78f941d67a 100644 --- a/speech/cloud-client/beta_snippets_test.py +++ b/speech/cloud-client/beta_snippets_test.py @@ -15,9 +15,9 @@ from beta_snippets import ( transcribe_file_with_auto_punctuation, + transcribe_file_with_diarization, transcribe_file_with_enhanced_model, - transcribe_file_with_metadata, - transcribe_file_with_diarization) + transcribe_file_with_metadata) RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') From f18b8fc6d1bf50176b747d9648950c31547c6a46 Mon Sep 17 00:00:00 2001 From: happyhuman Date: Wed, 11 Jul 2018 11:43:26 -0700 Subject: [PATCH 13/13] Removed the blank line --- speech/cloud-client/beta_snippets.py | 1 - 1 file changed, 1 deletion(-) diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py index 11cc906498ce..a518307f7871 100644 --- a/speech/cloud-client/beta_snippets.py +++ b/speech/cloud-client/beta_snippets.py @@ -175,4 +175,3 @@ def transcribe_file_with_diarization(path): transcribe_file_with_auto_punctuation(args.path) elif args.command == 'diarization': transcribe_file_with_diarization(args.path) -