From 68251b063830357aeff2924e7200fd5b451761e7 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Mon, 11 Jul 2016 10:23:58 -0700 Subject: [PATCH] Speech beta (#411) --- speech/api/requirements-speech_grpc.txt | 2 +- speech/api/speech_async_grpc.py | 115 ++++++++++++++++++ speech/api/speech_async_grpc_test.py | 39 ++++++ speech/api/speech_async_rest.py | 98 +++++++++++++++ speech/api/speech_async_rest_test.py | 23 ++++ speech/api/{speech_gcs.py => speech_grpc.py} | 0 ...speech_gcs_test.py => speech_grpc_test.py} | 4 +- 7 files changed, 278 insertions(+), 3 deletions(-) create mode 100644 speech/api/speech_async_grpc.py create mode 100644 speech/api/speech_async_grpc_test.py create mode 100644 speech/api/speech_async_rest.py create mode 100644 speech/api/speech_async_rest_test.py rename speech/api/{speech_gcs.py => speech_grpc.py} (100%) rename speech/api/{speech_gcs_test.py => speech_grpc_test.py} (95%) diff --git a/speech/api/requirements-speech_grpc.txt b/speech/api/requirements-speech_grpc.txt index e23321ba10a7..d2799018f93a 100644 --- a/speech/api/requirements-speech_grpc.txt +++ b/speech/api/requirements-speech_grpc.txt @@ -1,4 +1,4 @@ gcloud==0.17.0 grpcio==0.15.0 PyAudio==0.2.9 -grpc-google-cloud-speech==1.0.4 +grpc-google-cloud-speech-v1beta1==1.0.1 diff --git a/speech/api/speech_async_grpc.py b/speech/api/speech_async_grpc.py new file mode 100644 index 000000000000..3186d9f30603 --- /dev/null +++ b/speech/api/speech_async_grpc.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python +# Copyright (C) 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Sample that transcribes a FLAC audio file stored in Google Cloud Storage, +using async GRPC.""" + +import argparse +import time + +from gcloud.credentials import get_credentials +from google.cloud.speech.v1beta1 import cloud_speech_pb2 +from google.longrunning import operations_grpc_pb2 +from grpc.beta import implementations + +# Keep the request alive for this many seconds +DEADLINE_SECS = 10 +SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform' + + +def make_channel(host, port): + """Creates an SSL channel with auth credentials from the environment.""" + # In order to make an https call, use an ssl channel with defaults + ssl_channel = implementations.ssl_channel_credentials(None, None, None) + + # Grab application default credentials from the environment + creds = get_credentials().create_scoped([SPEECH_SCOPE]) + # Add a plugin to inject the creds into the header + auth_header = ( + 'Authorization', + 'Bearer ' + creds.get_access_token().access_token) + auth_plugin = implementations.metadata_call_credentials( + lambda _, cb: cb([auth_header], None), + name='google_creds') + + # compose the two together for both ssl and google auth + composite_channel = implementations.composite_channel_credentials( + ssl_channel, auth_plugin) + + return implementations.secure_channel(host, port, composite_channel) + + +def main(input_uri, encoding, sample_rate): + channel = make_channel('speech.googleapis.com', 443) + service = cloud_speech_pb2.beta_create_Speech_stub(channel) + # The method and parameters can be inferred from the proto from which the + # grpc client lib was generated. See: + # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto + response = service.AsyncRecognize(cloud_speech_pb2.AsyncRecognizeRequest( + config=cloud_speech_pb2.RecognitionConfig( + encoding=encoding, + sample_rate=sample_rate, + ), + audio=cloud_speech_pb2.RecognitionAudio( + uri=input_uri, + ) + ), DEADLINE_SECS) + + # Print the longrunning operation handle. + print(response) + + # Construct a long running operation endpoint. + service = operations_grpc_pb2.beta_create_Operations_stub(channel) + + name = response.name + + while True: + # Give the server a few seconds to process. + print('Waiting for server processing...') + time.sleep(1) + # Get the long running operation with response. + response = service.GetOperation( + operations_grpc_pb2.GetOperationRequest(name=name), + DEADLINE_SECS) + + if response.done: + break + + # Print the recognition results. + results = cloud_speech_pb2.AsyncRecognizeResponse() + response.response.Unpack(results) + print(results) + + +def _gcs_uri(text): + if not text.startswith('gs://'): + raise argparse.ArgumentTypeError( + 'Cloud Storage uri must be of the form gs://bucket/path/') + return text + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('input_uri', type=_gcs_uri) + parser.add_argument( + '--encoding', default='FLAC', choices=[ + 'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'], + help='How the audio file is encoded. See {}#L67'.format( + 'https://github.com/googleapis/googleapis/blob/master/' + 'google/cloud/speech/v1beta1/cloud_speech.proto')) + parser.add_argument('--sample_rate', default=16000) + + args = parser.parse_args() + main(args.input_uri, args.encoding, args.sample_rate) diff --git a/speech/api/speech_async_grpc_test.py b/speech/api/speech_async_grpc_test.py new file mode 100644 index 000000000000..61070326bfac --- /dev/null +++ b/speech/api/speech_async_grpc_test.py @@ -0,0 +1,39 @@ +# Copyright 2016, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import re +import sys + +import pytest +from speech_async_grpc import _gcs_uri +from speech_async_grpc import main + + +@pytest.mark.skipif( + sys.version_info >= (3, 0), + reason=("grpc doesn't yet support python3 " + 'https://github.com/grpc/grpc/issues/282')) +def test_main(cloud_config, capsys): + input_uri = 'gs://{}/speech/audio.flac'.format(cloud_config.storage_bucket) + + main(input_uri, 'FLAC', 16000) + + out, err = capsys.readouterr() + assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I) + + +def test_gcs_uri(): + _gcs_uri('gs://bucket/path') + with pytest.raises(argparse.ArgumentTypeError): + _gcs_uri('/local/path') diff --git a/speech/api/speech_async_rest.py b/speech/api/speech_async_rest.py new file mode 100644 index 000000000000..c0ddbdb4cd51 --- /dev/null +++ b/speech/api/speech_async_rest.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Speech API sample application using the REST API for async +batch processing.""" + +# [START import_libraries] +import argparse +import base64 +import json +import time + +from googleapiclient import discovery +from oauth2client.client import GoogleCredentials +# [END import_libraries] + + +# [START authenticating] +DISCOVERY_URL = ('https://{api}.googleapis.com/$discovery/rest?' + 'version={apiVersion}') + + +# Application default credentials provided by env variable +# GOOGLE_APPLICATION_CREDENTIALS +def get_speech_service(): + credentials = GoogleCredentials.get_application_default().create_scoped( + ['https://www.googleapis.com/auth/cloud-platform']) + + return discovery.build( + 'speech', 'v1beta1', credentials=credentials, + discoveryServiceUrl=DISCOVERY_URL) +# [END authenticating] + + +def main(speech_file): + """Transcribe the given audio file asynchronously. + + Args: + speech_file: the name of the audio file. + """ + # [START construct_request] + with open(speech_file, 'rb') as speech: + # Base64 encode the binary audio file for inclusion in the request. + speech_content = base64.b64encode(speech.read()) + + service = get_speech_service() + service_request = service.speech().asyncrecognize( + body={ + 'config': { + 'encoding': 'LINEAR16', + 'sampleRate': 16000 + }, + 'audio': { + 'content': speech_content.decode('UTF-8') + } + }) + # [END construct_request] + # [START send_request] + response = service_request.execute() + print(json.dumps(response)) + # [END send_request] + + name = response['name'] + # Construct a GetOperation request. + service_request = service.operations().get(name=name) + + while True: + # Give the server a few seconds to process. + print('Waiting for server processing...') + time.sleep(1) + # Get the long running operation with response. + response = service_request.execute() + + if 'done' in response and response['done']: + break + + print(json.dumps(response['response']['results'])) + + +# [START run_application] +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + 'speech_file', help='Full path of audio file to be recognized') + args = parser.parse_args() + main(args.speech_file) + # [END run_application] diff --git a/speech/api/speech_async_rest_test.py b/speech/api/speech_async_rest_test.py new file mode 100644 index 000000000000..d9f79e6aac57 --- /dev/null +++ b/speech/api/speech_async_rest_test.py @@ -0,0 +1,23 @@ +# Copyright 2016, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +from speech_async_rest import main + + +def test_main(resource, capsys): + main(resource('audio.raw')) + out, err = capsys.readouterr() + + assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I) diff --git a/speech/api/speech_gcs.py b/speech/api/speech_grpc.py similarity index 100% rename from speech/api/speech_gcs.py rename to speech/api/speech_grpc.py diff --git a/speech/api/speech_gcs_test.py b/speech/api/speech_grpc_test.py similarity index 95% rename from speech/api/speech_gcs_test.py rename to speech/api/speech_grpc_test.py index 7f03ede18ebf..a755b59e4bb8 100644 --- a/speech/api/speech_gcs_test.py +++ b/speech/api/speech_grpc_test.py @@ -14,8 +14,8 @@ import sys import pytest -from speech_gcs import _gcs_uri -from speech_gcs import main +from speech_grpc import _gcs_uri +from speech_grpc import main @pytest.mark.skipif(