diff --git a/speech/snippets/adaptation_v2_custom_class_reference.py b/speech/snippets/adaptation_v2_custom_class_reference.py new file mode 100644 index 000000000000..542b0d51108e --- /dev/null +++ b/speech/snippets/adaptation_v2_custom_class_reference.py @@ -0,0 +1,92 @@ +# Copyright 2022 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START speech_adaptation_v2_custom_class_reference] +import io + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + + +def adaptation_v2_custom_class_reference(project_id, recognizer_id, phrase_set_id, custom_class_id, audio_file): + # Instantiates a client + client = SpeechClient() + + request = cloud_speech.CreateRecognizerRequest( + parent=f"projects/{project_id}/locations/global", + recognizer_id=recognizer_id, + recognizer=cloud_speech.Recognizer( + language_codes=["en-US"], model="latest_short" + ), + ) + + # Creates a Recognizer + operation = client.create_recognizer(request=request) + recognizer = operation.result() + + # Reads a file as bytes + with io.open(audio_file, "rb") as f: + content = f.read() + + # Create a persistent CustomClass to reference in phrases + request = cloud_speech.CreateCustomClassRequest( + parent=f"projects/{project_id}/locations/global", + custom_class_id=custom_class_id, + custom_class=cloud_speech.CustomClass(items=[{"value": "Keem"}])) + + operation = client.create_custom_class(request=request) + custom_class = operation.result() + + # Create a persistent PhraseSet to reference in a recognition request + request = cloud_speech.CreatePhraseSetRequest( + parent=f"projects/{project_id}/locations/global", + phrase_set_id=phrase_set_id, + phrase_set=cloud_speech.PhraseSet(phrases=[{"value": f"${{{custom_class.name}}}", "boost": 20}])) + + operation = client.create_phrase_set(request=request) + phrase_set = operation.result() + + # Add a reference of the PhraseSet into the recognition request + adaptation = cloud_speech.SpeechAdaptation( + phrase_sets=[ + cloud_speech.SpeechAdaptation.AdaptationPhraseSet( + phrase_set=phrase_set.name + ) + ] + ) + config = cloud_speech.RecognitionConfig( + auto_decoding_config={}, adaptation=adaptation + ) + + print(custom_class) + print(phrase_set) + print(config) + + request = cloud_speech.RecognizeRequest( + recognizer=recognizer.name, config=config, content=content + ) + + # Transcribes the audio into text + response = client.recognize(request=request) + + for result in response.results: + print("Transcript: {}".format(result.alternatives[0].transcript)) + + return response +# [END speech_adaptation_v2_custom_class_reference] + + +if __name__ == "__main__": + adaptation_v2_custom_class_reference() diff --git a/speech/snippets/adaptation_v2_custom_class_reference_test.py b/speech/snippets/adaptation_v2_custom_class_reference_test.py new file mode 100644 index 000000000000..a76aa6c9cf30 --- /dev/null +++ b/speech/snippets/adaptation_v2_custom_class_reference_test.py @@ -0,0 +1,70 @@ +# Copyright 2022, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from uuid import uuid4 + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + +import adaptation_v2_custom_class_reference + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +def delete_recognizer(name): + client = SpeechClient() + request = cloud_speech.DeleteRecognizerRequest(name=name) + client.delete_recognizer(request=request) + + +def delete_phrase_set(name): + client = SpeechClient() + request = cloud_speech.DeletePhraseSetRequest(name=name) + client.delete_phrase_set(request=request) + + +def delete_custom_class(name): + client = SpeechClient() + request = cloud_speech.DeleteCustomClassRequest(name=name) + client.delete_custom_class(request=request) + + +def test_adaptation_v2_custom_class_reference(capsys): + project_id = os.getenv("GOOGLE_CLOUD_PROJECT") + + recognizer_id = "recognizer-" + str(uuid4()) + phrase_set_id = "phrase-set-" + str(uuid4()) + custom_class_id = "custom-class-" + str(uuid4()) + response = adaptation_v2_custom_class_reference.adaptation_v2_custom_class_reference( + project_id, recognizer_id, phrase_set_id, custom_class_id, os.path.join(RESOURCES, "baby_keem.wav") + ) + + assert re.search( + r"play Baby Keem", + response.results[0].alternatives[0].transcript, + re.DOTALL | re.I, + ) + + delete_recognizer( + f"projects/{project_id}/locations/global/recognizers/{recognizer_id}" + ) + + delete_phrase_set( + f"projects/{project_id}/locations/global/phraseSets/{phrase_set_id}" + ) + + delete_custom_class( + f"projects/{project_id}/locations/global/customClasses/{custom_class_id}" + ) diff --git a/speech/snippets/adaptation_v2_inline_custom_class.py b/speech/snippets/adaptation_v2_inline_custom_class.py new file mode 100644 index 000000000000..060a0a566dda --- /dev/null +++ b/speech/snippets/adaptation_v2_inline_custom_class.py @@ -0,0 +1,73 @@ +# Copyright 2022 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START speech_adaptation_v2_inline_custom_class] +import io + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + + +def adaptation_v2_inline_custom_class(project_id, recognizer_id, audio_file): + # Instantiates a client + client = SpeechClient() + + request = cloud_speech.CreateRecognizerRequest( + parent=f"projects/{project_id}/locations/global", + recognizer_id=recognizer_id, + recognizer=cloud_speech.Recognizer( + language_codes=["en-US"], model="latest_short" + ), + ) + + # Creates a Recognizer + operation = client.create_recognizer(request=request) + recognizer = operation.result() + + # Reads a file as bytes + with io.open(audio_file, "rb") as f: + content = f.read() + + # Build inline phrase set to produce a more accurate transcript + phrase_set = cloud_speech.PhraseSet(phrases=[{"value": "${keem}", "boost": 20}]) + custom_class = cloud_speech.CustomClass(name="keem", items=[{"value": "Keem"}]) + adaptation = cloud_speech.SpeechAdaptation( + phrase_sets=[ + cloud_speech.SpeechAdaptation.AdaptationPhraseSet( + inline_phrase_set=phrase_set + ) + ], + custom_classes=[custom_class] + ) + config = cloud_speech.RecognitionConfig( + auto_decoding_config={}, adaptation=adaptation + ) + + request = cloud_speech.RecognizeRequest( + recognizer=recognizer.name, config=config, content=content + ) + + # Transcribes the audio into text + response = client.recognize(request=request) + + for result in response.results: + print("Transcript: {}".format(result.alternatives[0].transcript)) + + return response +# [END speech_adaptation_v2_inline_custom_class] + + +if __name__ == "__main__": + adaptation_v2_inline_custom_class() diff --git a/speech/snippets/adaptation_v2_inline_custom_class_test.py b/speech/snippets/adaptation_v2_inline_custom_class_test.py new file mode 100644 index 000000000000..79cdf78699ef --- /dev/null +++ b/speech/snippets/adaptation_v2_inline_custom_class_test.py @@ -0,0 +1,48 @@ +# Copyright 2022, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from uuid import uuid4 + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + +import adaptation_v2_inline_custom_class + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +def delete_recognizer(name): + client = SpeechClient() + request = cloud_speech.DeleteRecognizerRequest(name=name) + client.delete_recognizer(request=request) + + +def test_adaptation_v2_inline_custom_class(capsys): + project_id = os.getenv("GOOGLE_CLOUD_PROJECT") + + recognizer_id = "recognizer-" + str(uuid4()) + response = adaptation_v2_inline_custom_class.adaptation_v2_inline_custom_class( + project_id, recognizer_id, os.path.join(RESOURCES, "baby_keem.wav") + ) + + assert re.search( + r"play Baby Keem", + response.results[0].alternatives[0].transcript, + re.DOTALL | re.I, + ) + + delete_recognizer( + f"projects/{project_id}/locations/global/recognizers/{recognizer_id}" + ) diff --git a/speech/snippets/adaptation_v2_inline_phrase_set.py b/speech/snippets/adaptation_v2_inline_phrase_set.py new file mode 100644 index 000000000000..de2939b9788f --- /dev/null +++ b/speech/snippets/adaptation_v2_inline_phrase_set.py @@ -0,0 +1,71 @@ +# Copyright 2022 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START speech_adaptation_v2_inline_phrase_set] +import io + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + + +def adaptation_v2_inline_phrase_set(project_id, recognizer_id, audio_file): + # Instantiates a client + client = SpeechClient() + + request = cloud_speech.CreateRecognizerRequest( + parent=f"projects/{project_id}/locations/global", + recognizer_id=recognizer_id, + recognizer=cloud_speech.Recognizer( + language_codes=["en-US"], model="latest_short" + ), + ) + + # Creates a Recognizer + operation = client.create_recognizer(request=request) + recognizer = operation.result() + + # Reads a file as bytes + with io.open(audio_file, "rb") as f: + content = f.read() + + # Build inline phrase set to produce a more accurate transcript + phrase_set = cloud_speech.PhraseSet(phrases=[{"value": "Keem", "boost": 10}]) + adaptation = cloud_speech.SpeechAdaptation( + phrase_sets=[ + cloud_speech.SpeechAdaptation.AdaptationPhraseSet( + inline_phrase_set=phrase_set + ) + ] + ) + config = cloud_speech.RecognitionConfig( + auto_decoding_config={}, adaptation=adaptation + ) + + request = cloud_speech.RecognizeRequest( + recognizer=recognizer.name, config=config, content=content + ) + + # Transcribes the audio into text + response = client.recognize(request=request) + + for result in response.results: + print("Transcript: {}".format(result.alternatives[0].transcript)) + + return response +# [END speech_adaptation_v2_inline_phrase_set] + + +if __name__ == "__main__": + adaptation_v2_inline_phrase_set() diff --git a/speech/snippets/adaptation_v2_inline_phrase_set_test.py b/speech/snippets/adaptation_v2_inline_phrase_set_test.py new file mode 100644 index 000000000000..4254381c3360 --- /dev/null +++ b/speech/snippets/adaptation_v2_inline_phrase_set_test.py @@ -0,0 +1,48 @@ +# Copyright 2022, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from uuid import uuid4 + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + +import adaptation_v2_inline_phrase_set + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +def delete_recognizer(name): + client = SpeechClient() + request = cloud_speech.DeleteRecognizerRequest(name=name) + client.delete_recognizer(request=request) + + +def test_adaptation_v2_inline_phrase_set(capsys): + project_id = os.getenv("GOOGLE_CLOUD_PROJECT") + + recognizer_id = "recognizer-" + str(uuid4()) + response = adaptation_v2_inline_phrase_set.adaptation_v2_inline_phrase_set( + project_id, recognizer_id, os.path.join(RESOURCES, "baby_keem.wav") + ) + + assert re.search( + r"play Baby Keem", + response.results[0].alternatives[0].transcript, + re.DOTALL | re.I, + ) + + delete_recognizer( + f"projects/{project_id}/locations/global/recognizers/{recognizer_id}" + ) diff --git a/speech/snippets/adaptation_v2_phrase_set_reference.py b/speech/snippets/adaptation_v2_phrase_set_reference.py new file mode 100644 index 000000000000..b89660d21ccd --- /dev/null +++ b/speech/snippets/adaptation_v2_phrase_set_reference.py @@ -0,0 +1,79 @@ +# Copyright 2022 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START speech_adaptation_v2_phrase_set_reference] +import io + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + + +def adaptation_v2_phrase_set_reference(project_id, recognizer_id, phrase_set_id, audio_file): + # Instantiates a client + client = SpeechClient() + + request = cloud_speech.CreateRecognizerRequest( + parent=f"projects/{project_id}/locations/global", + recognizer_id=recognizer_id, + recognizer=cloud_speech.Recognizer( + language_codes=["en-US"], model="latest_short" + ), + ) + + # Creates a Recognizer + operation = client.create_recognizer(request=request) + recognizer = operation.result() + + # Reads a file as bytes + with io.open(audio_file, "rb") as f: + content = f.read() + + # Create a persistent PhraseSet to reference in a recognition request + request = cloud_speech.CreatePhraseSetRequest( + parent=f"projects/{project_id}/locations/global", + phrase_set_id=phrase_set_id, + phrase_set=cloud_speech.PhraseSet(phrases=[{"value": "Keem", "boost": 10}])) + + operation = client.create_phrase_set(request=request) + phrase_set = operation.result() + + # Add a reference of the PhraseSet into the recognition request + adaptation = cloud_speech.SpeechAdaptation( + phrase_sets=[ + cloud_speech.SpeechAdaptation.AdaptationPhraseSet( + phrase_set=phrase_set.name + ) + ] + ) + config = cloud_speech.RecognitionConfig( + auto_decoding_config={}, adaptation=adaptation + ) + + request = cloud_speech.RecognizeRequest( + recognizer=recognizer.name, config=config, content=content + ) + + # Transcribes the audio into text + response = client.recognize(request=request) + + for result in response.results: + print("Transcript: {}".format(result.alternatives[0].transcript)) + + return response +# [END speech_adaptation_v2_phrase_set_reference] + + +if __name__ == "__main__": + adaptation_v2_phrase_set_reference() diff --git a/speech/snippets/adaptation_v2_phrase_set_reference_test.py b/speech/snippets/adaptation_v2_phrase_set_reference_test.py new file mode 100644 index 000000000000..933d552ad967 --- /dev/null +++ b/speech/snippets/adaptation_v2_phrase_set_reference_test.py @@ -0,0 +1,59 @@ +# Copyright 2022, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from uuid import uuid4 + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + +import adaptation_v2_phrase_set_reference + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +def delete_recognizer(name): + client = SpeechClient() + request = cloud_speech.DeleteRecognizerRequest(name=name) + client.delete_recognizer(request=request) + + +def delete_phrase_set(name): + client = SpeechClient() + request = cloud_speech.DeletePhraseSetRequest(name=name) + client.delete_phrase_set(request=request) + + +def test_adaptation_v2_phrase_set_reference(capsys): + project_id = os.getenv("GOOGLE_CLOUD_PROJECT") + + recognizer_id = "recognizer-" + str(uuid4()) + phrase_set_id = "phrase-set-" + str(uuid4()) + response = adaptation_v2_phrase_set_reference.adaptation_v2_phrase_set_reference( + project_id, recognizer_id, phrase_set_id, os.path.join(RESOURCES, "baby_keem.wav") + ) + + assert re.search( + r"play Baby Keem", + response.results[0].alternatives[0].transcript, + re.DOTALL | re.I, + ) + + delete_recognizer( + f"projects/{project_id}/locations/global/recognizers/{recognizer_id}" + ) + + delete_phrase_set( + f"projects/{project_id}/locations/global/phraseSets/{phrase_set_id}" + ) diff --git a/speech/snippets/create_recognizer.py b/speech/snippets/create_recognizer.py index 43a3efcbe14d..986e7c5cd0ca 100644 --- a/speech/snippets/create_recognizer.py +++ b/speech/snippets/create_recognizer.py @@ -35,8 +35,6 @@ def create_recognizer(project_id, recognizer_id): print("Created Recognizer:", recognizer.name) return recognizer - - # [END speech_create_recognizer] diff --git a/speech/snippets/quickstart_v2.py b/speech/snippets/quickstart_v2.py index 6ba58ef7ec8d..d045c42c4655 100644 --- a/speech/snippets/quickstart_v2.py +++ b/speech/snippets/quickstart_v2.py @@ -53,8 +53,6 @@ def quickstart_v2(project_id, recognizer_id, audio_file): print("Transcript: {}".format(result.alternatives[0].transcript)) return response - - # [END speech_quickstart_v2] diff --git a/speech/snippets/resources/baby_keem.wav b/speech/snippets/resources/baby_keem.wav new file mode 100644 index 000000000000..4e7a5ca9bcd0 Binary files /dev/null and b/speech/snippets/resources/baby_keem.wav differ diff --git a/speech/snippets/transcribe_file_v2.py b/speech/snippets/transcribe_file_v2.py new file mode 100644 index 000000000000..ef923051e2be --- /dev/null +++ b/speech/snippets/transcribe_file_v2.py @@ -0,0 +1,60 @@ +# Copyright 2022 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START speech_transcribe_file_v2] +import io + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + + +def transcribe_file_v2(project_id, recognizer_id, audio_file): + # Instantiates a client + client = SpeechClient() + + request = cloud_speech.CreateRecognizerRequest( + parent=f"projects/{project_id}/locations/global", + recognizer_id=recognizer_id, + recognizer=cloud_speech.Recognizer( + language_codes=["en-US"], model="latest_long" + ), + ) + + # Creates a Recognizer + operation = client.create_recognizer(request=request) + recognizer = operation.result() + + # Reads a file as bytes + with io.open(audio_file, "rb") as f: + content = f.read() + + config = cloud_speech.RecognitionConfig(auto_decoding_config={}) + + request = cloud_speech.RecognizeRequest( + recognizer=recognizer.name, config=config, content=content + ) + + # Transcribes the audio into text + response = client.recognize(request=request) + + for result in response.results: + print("Transcript: {}".format(result.alternatives[0].transcript)) + + return response +# [END speech_transcribe_file_v2] + + +if __name__ == "__main__": + transcribe_file_v2() diff --git a/speech/snippets/transcribe_file_v2_test.py b/speech/snippets/transcribe_file_v2_test.py new file mode 100644 index 000000000000..1dfb270f6c97 --- /dev/null +++ b/speech/snippets/transcribe_file_v2_test.py @@ -0,0 +1,48 @@ +# Copyright 2022, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from uuid import uuid4 + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + +import transcribe_file_v2 + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +def delete_recognizer(name): + client = SpeechClient() + request = cloud_speech.DeleteRecognizerRequest(name=name) + client.delete_recognizer(request=request) + + +def test_transcribe_file_v2(capsys): + project_id = os.getenv("GOOGLE_CLOUD_PROJECT") + + recognizer_id = "recognizer-" + str(uuid4()) + response = transcribe_file_v2.transcribe_file_v2( + project_id, recognizer_id, os.path.join(RESOURCES, "audio.wav") + ) + + assert re.search( + r"how old is the Brooklyn Bridge", + response.results[0].alternatives[0].transcript, + re.DOTALL | re.I, + ) + + delete_recognizer( + f"projects/{project_id}/locations/global/recognizers/{recognizer_id}" + ) diff --git a/speech/snippets/transcribe_gcs_v2.py b/speech/snippets/transcribe_gcs_v2.py new file mode 100644 index 000000000000..0d9bdefe668a --- /dev/null +++ b/speech/snippets/transcribe_gcs_v2.py @@ -0,0 +1,54 @@ +# Copyright 2022 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START speech_transcribe_gcs_v2] +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + + +def transcribe_gcs_v2(project_id, recognizer_id, gcs_uri): + # Instantiates a client + client = SpeechClient() + + request = cloud_speech.CreateRecognizerRequest( + parent=f"projects/{project_id}/locations/global", + recognizer_id=recognizer_id, + recognizer=cloud_speech.Recognizer( + language_codes=["en-US"], model="latest_long" + ), + ) + + # Creates a Recognizer + operation = client.create_recognizer(request=request) + recognizer = operation.result() + + config = cloud_speech.RecognitionConfig(auto_decoding_config={}) + + request = cloud_speech.RecognizeRequest( + recognizer=recognizer.name, config=config, uri=gcs_uri + ) + + # Transcribes the audio into text + response = client.recognize(request=request) + + for result in response.results: + print("Transcript: {}".format(result.alternatives[0].transcript)) + + return response +# [END speech_transcribe_gcs_v2] + + +if __name__ == "__main__": + transcribe_gcs_v2() diff --git a/speech/snippets/transcribe_gcs_v2_test.py b/speech/snippets/transcribe_gcs_v2_test.py new file mode 100644 index 000000000000..888dd3790039 --- /dev/null +++ b/speech/snippets/transcribe_gcs_v2_test.py @@ -0,0 +1,46 @@ +# Copyright 2022, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from uuid import uuid4 + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + +import transcribe_gcs_v2 + + +def delete_recognizer(name): + client = SpeechClient() + request = cloud_speech.DeleteRecognizerRequest(name=name) + client.delete_recognizer(request=request) + + +def test_transcribe_gcs_v2(capsys): + project_id = os.getenv("GOOGLE_CLOUD_PROJECT") + + recognizer_id = "recognizer-" + str(uuid4()) + response = transcribe_gcs_v2.transcribe_gcs_v2( + project_id, recognizer_id, "gs://cloud-samples-data/speech/audio.flac" + ) + + assert re.search( + r"how old is the Brooklyn Bridge", + response.results[0].alternatives[0].transcript, + re.DOTALL | re.I, + ) + + delete_recognizer( + f"projects/{project_id}/locations/global/recognizers/{recognizer_id}" + ) diff --git a/speech/snippets/transcribe_streaming_v2.py b/speech/snippets/transcribe_streaming_v2.py new file mode 100644 index 000000000000..d6f3fa57d991 --- /dev/null +++ b/speech/snippets/transcribe_streaming_v2.py @@ -0,0 +1,81 @@ +# Copyright 2022 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START speech_transcribe_streaming_v2] +import io + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + + +def transcribe_streaming_v2(project_id, recognizer_id, audio_file): + # Instantiates a client + client = SpeechClient() + + request = cloud_speech.CreateRecognizerRequest( + parent=f"projects/{project_id}/locations/global", + recognizer_id=recognizer_id, + recognizer=cloud_speech.Recognizer( + language_codes=["en-US"], model="latest_long" + ), + ) + + # Creates a Recognizer + operation = client.create_recognizer(request=request) + recognizer = operation.result() + + # Reads a file as bytes + with io.open(audio_file, "rb") as f: + content = f.read() + + # In practice, stream should be a generator yielding chunks of audio data + chunk_length = len(content) // 5 + stream = [ + content[start : start + chunk_length] + for start in range(0, len(content), chunk_length) + ] + audio_requests = ( + cloud_speech.StreamingRecognizeRequest(audio=audio) for audio in stream + ) + + recognition_config = cloud_speech.RecognitionConfig(auto_decoding_config={}) + streaming_config = cloud_speech.StreamingRecognitionConfig( + config=recognition_config + ) + config_request = cloud_speech.StreamingRecognizeRequest( + recognizer=recognizer.name, streaming_config=streaming_config + ) + + def requests(config, audio): + yield config + for message in audio: + yield message + + # Transcribes the audio into text + responses_iterator = client.streaming_recognize( + requests=requests(config_request, audio_requests) + ) + responses = [] + for response in responses_iterator: + responses.append(response) + for result in response.results: + print("Transcript: {}".format(result.alternatives[0].transcript)) + + return responses +# [END speech_transcribe_streaming_v2] + + +if __name__ == "__main__": + transcribe_streaming_v2() diff --git a/speech/snippets/transcribe_streaming_v2_test.py b/speech/snippets/transcribe_streaming_v2_test.py new file mode 100644 index 000000000000..5202bb3728df --- /dev/null +++ b/speech/snippets/transcribe_streaming_v2_test.py @@ -0,0 +1,53 @@ +# Copyright 2022, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from uuid import uuid4 + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + +import transcribe_streaming_v2 + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +def delete_recognizer(name): + client = SpeechClient() + request = cloud_speech.DeleteRecognizerRequest(name=name) + client.delete_recognizer(request=request) + + +def test_transcribe_streaming_v2(capsys): + project_id = os.getenv("GOOGLE_CLOUD_PROJECT") + + recognizer_id = "recognizer-" + str(uuid4()) + responses = transcribe_streaming_v2.transcribe_streaming_v2( + project_id, recognizer_id, os.path.join(RESOURCES, "audio.wav") + ) + + transcript = "" + for response in responses: + for result in response.results: + transcript += result.alternatives[0].transcript + + assert re.search( + r"how old is the Brooklyn Bridge", + transcript, + re.DOTALL | re.I, + ) + + delete_recognizer( + f"projects/{project_id}/locations/global/recognizers/{recognizer_id}" + )