diff --git a/speech/google/cloud/speech/_gax.py b/speech/google/cloud/speech/_gax.py index 3b6445d3aad9..877e71b71ce5 100644 --- a/speech/google/cloud/speech/_gax.py +++ b/speech/google/cloud/speech/_gax.py @@ -18,6 +18,11 @@ from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import SpeechContext from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import RecognitionConfig from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import RecognitionAudio +from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import ( + StreamingRecognitionConfig) +from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import ( + StreamingRecognizeRequest) + from google.cloud.speech.transcript import Transcript @@ -138,3 +143,84 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None, for alternative in alternatives] else: raise ValueError('More than one result or none returned from API.') + + +def _make_streaming_request(sample, language_code, + max_alternatives, profanity_filter, + speech_context, single_utterance, + interim_results): + """Build streaming request. + + :type sample: :class:`~google.cloud.speech.sample.Sample` + :param sample: Instance of ``Sample`` containing audio information. + + :type language_code: str + :param language_code: The language of the supplied audio as + BCP-47 language tag. Example: ``'en-GB'``. + If omitted, defaults to ``'en-US'``. + + :type max_alternatives: int + :param max_alternatives: Maximum number of recognition + hypotheses to be returned. The server may + return fewer than maxAlternatives. + Valid values are 0-30. A value of 0 or 1 + will return a maximum of 1. Defaults to 1 + + :type profanity_filter: bool + :param profanity_filter: If True, the server will attempt to filter + out profanities, replacing all but the + initial character in each filtered word with + asterisks, e.g. ``'f***'``. If False or + omitted, profanities won't be filtered out. + + :type speech_context: list + :param speech_context: A list of strings (max 50) containing words and + phrases "hints" so that the speech recognition + is more likely to recognize them. This can be + used to improve the accuracy for specific words + and phrases. This can also be used to add new + words to the vocabulary of the recognizer. + + :type single_utterance: bool + :param single_utterance: If false or omitted, the recognizer + will perform continuous recognition + (continuing to process audio even if the user + pauses speaking) until the client closes the + output stream (gRPC API) or when the maximum + time limit has been reached. Multiple + SpeechRecognitionResults with the is_final + flag set to true may be returned. + + If true, the recognizer will detect a single + spoken utterance. When it detects that the + user has paused or stopped speaking, it will + return an END_OF_UTTERANCE event and cease + recognition. It will return no more than one + SpeechRecognitionResult with the is_final flag + set to true. + + :type interim_results: bool + :param interim_results: If true, interim results (tentative + hypotheses) may be returned as they become + available (these interim results are indicated + with the is_final=false flag). If false or + omitted, only is_final=true result(s) are + returned. + + :rtype: + :class:`~grpc.speech.v1beta1.cloud_speech_pb2.StreamingRecognizeRequest` + :returns: Instance of ``StreamingRecognizeRequest``. + """ + config = RecognitionConfig( + encoding=sample.encoding, sample_rate=sample.sample_rate, + language_code=language_code, max_alternatives=max_alternatives, + profanity_filter=profanity_filter, speech_context=speech_context) + + streaming_config = StreamingRecognitionConfig( + config=config, single_utterance=single_utterance, + interim_results=interim_results) + + config_request = StreamingRecognizeRequest( + streaming_config=streaming_config) + + return config_request diff --git a/speech/unit_tests/test__gax.py b/speech/unit_tests/test__gax.py new file mode 100644 index 000000000000..444d2f9cd40c --- /dev/null +++ b/speech/unit_tests/test__gax.py @@ -0,0 +1,80 @@ +# Copyright 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + + +class TestSpeechGAX(unittest.TestCase): + SAMPLE_RATE = 16000 + HINTS = ['hi'] + AUDIO_CONTENT = '/9j/4QNURXhpZgAASUkq' + + def _callFUT(self, sample, language_code, max_alternatives, + profanity_filter, speech_context, single_utterance, + interim_results): + from google.cloud.speech._gax import _make_streaming_request + return _make_streaming_request(sample=sample, + language_code=language_code, + max_alternatives=max_alternatives, + profanity_filter=profanity_filter, + speech_context=speech_context, + single_utterance=single_utterance, + interim_results=interim_results) + + def test_ctor(self): + from google.cloud import speech + from google.cloud.speech.sample import Sample + from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import ( + SpeechContext) + from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import ( + RecognitionConfig) + from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import ( + StreamingRecognitionConfig) + from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import ( + StreamingRecognizeRequest) + + sample = Sample(content=self.AUDIO_CONTENT, + encoding=speech.Encoding.FLAC, + sample_rate=self.SAMPLE_RATE) + language_code = 'US-en' + max_alternatives = 2 + profanity_filter = True + speech_context = SpeechContext(phrases=self.HINTS) + single_utterance = True + interim_results = False + + streaming_request = self._callFUT(sample, language_code, + max_alternatives, profanity_filter, + speech_context, single_utterance, + interim_results) + self.assertIsInstance(streaming_request, StreamingRecognizeRequest) + + # This isn't set by _make_streaming_request(). + # The first request can only have `streaming_config` set. + # The following requests can only have `audio_content` set. + self.assertEqual(streaming_request.audio_content, b'') + + self.assertIsInstance(streaming_request.streaming_config, + StreamingRecognitionConfig) + streaming_config = streaming_request.streaming_config + self.assertTrue(streaming_config.single_utterance) + self.assertFalse(streaming_config.interim_results) + config = streaming_config.config + self.assertIsInstance(config, RecognitionConfig) + self.assertEqual(config.encoding, 2) # speech.Encoding.FLAC maps to 2. + self.assertEqual(config.sample_rate, self.SAMPLE_RATE) + self.assertEqual(config.language_code, language_code) + self.assertEqual(config.max_alternatives, max_alternatives) + self.assertTrue(config.profanity_filter) + self.assertEqual(config.speech_context.phrases, self.HINTS)