diff --git a/speech/microphone/README.rst b/speech/microphone/README.rst new file mode 100644 index 000000000000..6363b5738a98 --- /dev/null +++ b/speech/microphone/README.rst @@ -0,0 +1,82 @@ +.. This file is automatically generated. Do not edit this file directly. + +Google Cloud Speech API Python Samples +=============================================================================== + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=speech/microphone/README.rst + + +This directory contains samples for Google Cloud Speech API. The `Google Cloud Speech API`_ enables easy integration of Google speech recognition technologies into developer applications. Send audio and receive a text transcription from the Cloud Speech API service. + +- See the `migration guide`_ for information about migrating to Python client library v0.27. + +.. _migration guide: https://cloud.google.com/speech/docs/python-client-migration + + + + +.. _Google Cloud Speech API: https://cloud.google.com/speech/docs/ + +Setup +------------------------------------------------------------------------------- + + +Authentication +++++++++++++++ + +This sample requires you to have authentication setup. Refer to the +`Authentication Getting Started Guide`_ for instructions on setting up +credentials for applications. + +.. _Authentication Getting Started Guide: + https://cloud.google.com/docs/authentication/getting-started + +Install Dependencies +++++++++++++++++++++ + +#. Clone python-docs-samples and change directory to the sample directory you want to use. + + .. code-block:: bash + + $ git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git + +#. Install `pip`_ and `virtualenv`_ if you do not already have them. You may want to refer to the `Python Development Environment Setup Guide`_ for Google Cloud Platform for instructions. + + .. _Python Development Environment Setup Guide: + https://cloud.google.com/python/setup + +#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. + + .. code-block:: bash + + $ virtualenv env + $ source env/bin/activate + +#. Install the dependencies needed to run the samples. + + .. code-block:: bash + + $ pip install -r requirements.txt + +.. _pip: https://pip.pypa.io/ +.. _virtualenv: https://virtualenv.pypa.io/ + + + +The client library +------------------------------------------------------------------------------- + +This sample uses the `Google Cloud Client Library for Python`_. +You can read the documentation for more details on API usage and use GitHub +to `browse the source`_ and `report issues`_. + +.. _Google Cloud Client Library for Python: + https://googlecloudplatform.github.io/google-cloud-python/ +.. _browse the source: + https://github.com/GoogleCloudPlatform/google-cloud-python +.. _report issues: + https://github.com/GoogleCloudPlatform/google-cloud-python/issues + + +.. _Google Cloud SDK: https://cloud.google.com/sdk/ \ No newline at end of file diff --git a/speech/microphone/README.rst.in b/speech/microphone/README.rst.in new file mode 100644 index 000000000000..11831cca2df4 --- /dev/null +++ b/speech/microphone/README.rst.in @@ -0,0 +1,24 @@ +# This file is used to generate README.rst + +product: + name: Google Cloud Speech API + short_name: Cloud Speech API + url: https://cloud.google.com/speech/docs/ + description: > + The `Google Cloud Speech API`_ enables easy integration of Google speech + recognition technologies into developer applications. Send audio and receive + a text transcription from the Cloud Speech API service. + + + - See the `migration guide`_ for information about migrating to Python client library v0.27. + + + .. _migration guide: https://cloud.google.com/speech/docs/python-client-migration + +setup: +- auth +- install_deps + +cloud_client_library: true + +folder: speech/microphone \ No newline at end of file diff --git a/speech/microphone/requirements.txt b/speech/microphone/requirements.txt new file mode 100644 index 000000000000..88d0bd85cb12 --- /dev/null +++ b/speech/microphone/requirements.txt @@ -0,0 +1,3 @@ +google-cloud-speech==0.36.3 +pyaudio==0.2.11 +six==1.12.0 diff --git a/speech/microphone/resources/quit.raw b/speech/microphone/resources/quit.raw new file mode 100644 index 000000000000..a01dfc45a597 Binary files /dev/null and b/speech/microphone/resources/quit.raw differ diff --git a/speech/microphone/transcribe_streaming_indefinite.py b/speech/microphone/transcribe_streaming_indefinite.py new file mode 100644 index 000000000000..f1adb2247f13 --- /dev/null +++ b/speech/microphone/transcribe_streaming_indefinite.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python + +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Google Cloud Speech API sample application using the streaming API. + +NOTE: This module requires the additional dependency `pyaudio`. To install +using pip: + + pip install pyaudio + +Example usage: + python transcribe_streaming_indefinite.py +""" + +# [START speech_transcribe_infinite_streaming] +from __future__ import division + +import time +import re +import sys + +from google.cloud import speech + +import pyaudio +from six.moves import queue + +# Audio recording parameters +STREAMING_LIMIT = 55000 +SAMPLE_RATE = 16000 +CHUNK_SIZE = int(SAMPLE_RATE / 10) # 100ms + + +def get_current_time(): + return int(round(time.time() * 1000)) + + +def duration_to_secs(duration): + return duration.seconds + (duration.nanos / float(1e9)) + + +class ResumableMicrophoneStream: + """Opens a recording stream as a generator yielding the audio chunks.""" + def __init__(self, rate, chunk_size): + self._rate = rate + self._chunk_size = chunk_size + self._num_channels = 1 + self._max_replay_secs = 5 + + # Create a thread-safe buffer of audio data + self._buff = queue.Queue() + self.closed = True + self.start_time = get_current_time() + + # 2 bytes in 16 bit samples + self._bytes_per_sample = 2 * self._num_channels + self._bytes_per_second = self._rate * self._bytes_per_sample + + self._bytes_per_chunk = (self._chunk_size * self._bytes_per_sample) + self._chunks_per_second = ( + self._bytes_per_second // self._bytes_per_chunk) + + def __enter__(self): + self.closed = False + + self._audio_interface = pyaudio.PyAudio() + self._audio_stream = self._audio_interface.open( + format=pyaudio.paInt16, + channels=self._num_channels, + rate=self._rate, + input=True, + frames_per_buffer=self._chunk_size, + # Run the audio stream asynchronously to fill the buffer object. + # This is necessary so that the input device's buffer doesn't + # overflow while the calling thread makes network requests, etc. + stream_callback=self._fill_buffer, + ) + + return self + + def __exit__(self, type, value, traceback): + self._audio_stream.stop_stream() + self._audio_stream.close() + self.closed = True + # Signal the generator to terminate so that the client's + # streaming_recognize method will not block the process termination. + self._buff.put(None) + self._audio_interface.terminate() + + def _fill_buffer(self, in_data, *args, **kwargs): + """Continuously collect data from the audio stream, into the buffer.""" + self._buff.put(in_data) + return None, pyaudio.paContinue + + def generator(self): + while not self.closed: + if get_current_time() - self.start_time > STREAMING_LIMIT: + self.start_time = get_current_time() + break + # Use a blocking get() to ensure there's at least one chunk of + # data, and stop iteration if the chunk is None, indicating the + # end of the audio stream. + chunk = self._buff.get() + if chunk is None: + return + data = [chunk] + + # Now consume whatever other data's still buffered. + while True: + try: + chunk = self._buff.get(block=False) + if chunk is None: + return + data.append(chunk) + except queue.Empty: + break + + yield b''.join(data) + + +def listen_print_loop(responses, stream): + """Iterates through server responses and prints them. + + The responses passed is a generator that will block until a response + is provided by the server. + + Each response may contain multiple results, and each result may contain + multiple alternatives; for details, see https://goo.gl/tjCPAU. Here we + print only the transcription for the top alternative of the top result. + + In this case, responses are provided for interim results as well. If the + response is an interim one, print a line feed at the end of it, to allow + the next result to overwrite it, until the response is a final one. For the + final one, print a newline to preserve the finalized transcription. + """ + responses = (r for r in responses if ( + r.results and r.results[0].alternatives)) + + num_chars_printed = 0 + for response in responses: + if not response.results: + continue + + # The `results` list is consecutive. For streaming, we only care about + # the first result being considered, since once it's `is_final`, it + # moves on to considering the next utterance. + result = response.results[0] + if not result.alternatives: + continue + + # Display the transcription of the top alternative. + top_alternative = result.alternatives[0] + transcript = top_alternative.transcript + + # Display interim results, but with a carriage return at the end of the + # line, so subsequent lines will overwrite them. + # + # If the previous result was longer than this one, we need to print + # some extra spaces to overwrite the previous result + overwrite_chars = ' ' * (num_chars_printed - len(transcript)) + + if not result.is_final: + sys.stdout.write(transcript + overwrite_chars + '\r') + sys.stdout.flush() + + num_chars_printed = len(transcript) + else: + print(transcript + overwrite_chars) + + # Exit recognition if any of the transcribed phrases could be + # one of our keywords. + if re.search(r'\b(exit|quit)\b', transcript, re.I): + print('Exiting..') + stream.closed = True + break + + num_chars_printed = 0 + + +def main(): + client = speech.SpeechClient() + config = speech.types.RecognitionConfig( + encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=SAMPLE_RATE, + language_code='en-US', + max_alternatives=1, + enable_word_time_offsets=True) + streaming_config = speech.types.StreamingRecognitionConfig( + config=config, + interim_results=True) + + mic_manager = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE) + + print('Say "Quit" or "Exit" to terminate the program.') + + with mic_manager as stream: + while not stream.closed: + audio_generator = stream.generator() + requests = (speech.types.StreamingRecognizeRequest( + audio_content=content) + for content in audio_generator) + + responses = client.streaming_recognize(streaming_config, + requests) + # Now, put the transcription responses to use. + listen_print_loop(responses, stream) + + +if __name__ == '__main__': + main() +# [END speech_transcribe_infinite_streaming] diff --git a/speech/microphone/transcribe_streaming_mic.py b/speech/microphone/transcribe_streaming_mic.py new file mode 100644 index 000000000000..3ca7b7094124 --- /dev/null +++ b/speech/microphone/transcribe_streaming_mic.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python + +# Copyright 2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Google Cloud Speech API sample application using the streaming API. + +NOTE: This module requires the additional dependency `pyaudio`. To install +using pip: + + pip install pyaudio + +Example usage: + python transcribe_streaming_mic.py +""" + +# [START speech_transcribe_streaming_mic] +from __future__ import division + +import re +import sys + +from google.cloud import speech +from google.cloud.speech import enums +from google.cloud.speech import types +import pyaudio +from six.moves import queue + +# Audio recording parameters +RATE = 16000 +CHUNK = int(RATE / 10) # 100ms + + +class MicrophoneStream(object): + """Opens a recording stream as a generator yielding the audio chunks.""" + def __init__(self, rate, chunk): + self._rate = rate + self._chunk = chunk + + # Create a thread-safe buffer of audio data + self._buff = queue.Queue() + self.closed = True + + def __enter__(self): + self._audio_interface = pyaudio.PyAudio() + self._audio_stream = self._audio_interface.open( + format=pyaudio.paInt16, + # The API currently only supports 1-channel (mono) audio + # https://goo.gl/z757pE + channels=1, rate=self._rate, + input=True, frames_per_buffer=self._chunk, + # Run the audio stream asynchronously to fill the buffer object. + # This is necessary so that the input device's buffer doesn't + # overflow while the calling thread makes network requests, etc. + stream_callback=self._fill_buffer, + ) + + self.closed = False + + return self + + def __exit__(self, type, value, traceback): + self._audio_stream.stop_stream() + self._audio_stream.close() + self.closed = True + # Signal the generator to terminate so that the client's + # streaming_recognize method will not block the process termination. + self._buff.put(None) + self._audio_interface.terminate() + + def _fill_buffer(self, in_data, frame_count, time_info, status_flags): + """Continuously collect data from the audio stream, into the buffer.""" + self._buff.put(in_data) + return None, pyaudio.paContinue + + def generator(self): + while not self.closed: + # Use a blocking get() to ensure there's at least one chunk of + # data, and stop iteration if the chunk is None, indicating the + # end of the audio stream. + chunk = self._buff.get() + if chunk is None: + return + data = [chunk] + + # Now consume whatever other data's still buffered. + while True: + try: + chunk = self._buff.get(block=False) + if chunk is None: + return + data.append(chunk) + except queue.Empty: + break + + yield b''.join(data) + + +def listen_print_loop(responses): + """Iterates through server responses and prints them. + + The responses passed is a generator that will block until a response + is provided by the server. + + Each response may contain multiple results, and each result may contain + multiple alternatives; for details, see https://goo.gl/tjCPAU. Here we + print only the transcription for the top alternative of the top result. + + In this case, responses are provided for interim results as well. If the + response is an interim one, print a line feed at the end of it, to allow + the next result to overwrite it, until the response is a final one. For the + final one, print a newline to preserve the finalized transcription. + """ + num_chars_printed = 0 + for response in responses: + if not response.results: + continue + + # The `results` list is consecutive. For streaming, we only care about + # the first result being considered, since once it's `is_final`, it + # moves on to considering the next utterance. + result = response.results[0] + if not result.alternatives: + continue + + # Display the transcription of the top alternative. + transcript = result.alternatives[0].transcript + + # Display interim results, but with a carriage return at the end of the + # line, so subsequent lines will overwrite them. + # + # If the previous result was longer than this one, we need to print + # some extra spaces to overwrite the previous result + overwrite_chars = ' ' * (num_chars_printed - len(transcript)) + + if not result.is_final: + sys.stdout.write(transcript + overwrite_chars + '\r') + sys.stdout.flush() + + num_chars_printed = len(transcript) + + else: + print(transcript + overwrite_chars) + + # Exit recognition if any of the transcribed phrases could be + # one of our keywords. + if re.search(r'\b(exit|quit)\b', transcript, re.I): + print('Exiting..') + break + + num_chars_printed = 0 + + +def main(): + # See http://g.co/cloud/speech/docs/languages + # for a list of supported languages. + language_code = 'en-US' # a BCP-47 language tag + + client = speech.SpeechClient() + config = types.RecognitionConfig( + encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=RATE, + language_code=language_code) + streaming_config = types.StreamingRecognitionConfig( + config=config, + interim_results=True) + + with MicrophoneStream(RATE, CHUNK) as stream: + audio_generator = stream.generator() + requests = (types.StreamingRecognizeRequest(audio_content=content) + for content in audio_generator) + + responses = client.streaming_recognize(streaming_config, requests) + + # Now, put the transcription responses to use. + listen_print_loop(responses) + + +if __name__ == '__main__': + main() +# [END speech_transcribe_streaming_mic] diff --git a/speech/microphone/transcribe_streaming_mic_test.py b/speech/microphone/transcribe_streaming_mic_test.py new file mode 100644 index 000000000000..dd5e7ea6f5e6 --- /dev/null +++ b/speech/microphone/transcribe_streaming_mic_test.py @@ -0,0 +1,69 @@ +# Copyright 2017, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +import threading +import time + +import mock + +RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') + + +class MockPyAudio(object): + def __init__(self, audio_filename): + self.audio_filename = audio_filename + + def __call__(self, *args): + return self + + def open(self, stream_callback, rate, *args, **kwargs): + self.rate = rate + self.closed = threading.Event() + self.stream_thread = threading.Thread( + target=self.stream_audio, args=( + self.audio_filename, stream_callback, self.closed)) + self.stream_thread.start() + return self + + def close(self): + self.closed.set() + + def stop_stream(self): + pass + + def terminate(self): + pass + + def stream_audio(self, audio_filename, callback, closed, num_frames=512): + with open(audio_filename, 'rb') as audio_file: + while not closed.is_set(): + # Approximate realtime by sleeping for the appropriate time for + # the requested number of frames + time.sleep(num_frames / float(self.rate)) + # audio is 16-bit samples, whereas python byte is 8-bit + num_bytes = 2 * num_frames + chunk = audio_file.read(num_bytes) or b'\0' * num_bytes + callback(chunk, None, None, None) + + +@mock.patch.dict('sys.modules', pyaudio=mock.MagicMock( + PyAudio=MockPyAudio(os.path.join(RESOURCES, 'quit.raw')))) +def test_main(capsys): + import transcribe_streaming_mic + + transcribe_streaming_mic.main() + out, err = capsys.readouterr() + + assert re.search(r'quit', out, re.DOTALL | re.I)