diff --git a/speech/cloud-client/resources/two_channel_16k.wav b/speech/cloud-client/resources/two_channel_16k.wav new file mode 100644 index 000000000000..2db62a8145a6 Binary files /dev/null and b/speech/cloud-client/resources/two_channel_16k.wav differ diff --git a/speech/cloud-client/transcribe_onprem/README.rst b/speech/cloud-client/transcribe_onprem/README.rst new file mode 100644 index 000000000000..dea6504637f9 --- /dev/null +++ b/speech/cloud-client/transcribe_onprem/README.rst @@ -0,0 +1,111 @@ +.. This file is automatically generated. Do not edit this file directly. + +Google Cloud Speech-to-Text On-Prem Python Samples +=============================================================================== + + +.. warning:: This product is only available to customers that have been granted access. Please `contact us`_ to request access to the Speech-to-Text On-Prem feature. + +This directory contains samples for `Google Cloud Speech-to-Text On-Prem`_. Speech-to-Text On-Prem enables easy integration of Google speech recognition technologies into your on-prem solution. + + +.. _Google Cloud Speech-to-Text On-Prem: https://cloud.google.com/speech-to-text/on-prem/priv/docs + +.. _contact us: https://cloud.google.com/contact + +.. _Google Cloud Speech-to-Text On-Prem: https://cloud.google.com/speech-to-text/on-prem/priv/docs + +Setup +------------------------------------------------------------------------------- + + +Prepare and Deploy API ++++++++++++++++++++++++ + +This sample requires you to have a Kubernetes cluster with the Speech-to-Text On-Prem service deployed. Follow the quickstart steps listed below: + +#. `Setup IAM, Kubernetes, Billing`_ + +#. `Deploy the API using the UI or command line`_ + +#. `Query the API to ensure it's working`_ + + +.. _Query the API to ensure it's working: + https://cloud.google.com/speech-to-text/on-prem/priv/docs/query + +.. _Deploy the API using the UI or command line: + https://cloud.google.com/speech-to-text/on-prem/priv/docs/deploy + +.. _Setup IAM, Kubernetes, Billing: + https://cloud.google.com/speech-to-text/on-prem/priv/docs/before-you-begin + +Install Dependencies +++++++++++++++++++++ + +#. Clone python-docs-samples and change directory to the sample directory you want to use. + + .. code-block:: bash + + $ git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git + $ cd python-doc-samples/speech/cloud-client + +#. Install `pip`_ and `virtualenv`_ if you do not already have them. You may want to refer to the `Python Development Environment Setup Guide`_ for Google Cloud Platform for instructions. + + .. _Python Development Environment Setup Guide: + https://cloud.google.com/python/setup + +#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. + + .. code-block:: bash + + $ virtualenv env + $ source env/bin/activate + +#. Install the dependencies needed to run the samples. + + .. code-block:: bash + + $ pip install -r requirements.txt + +.. _pip: https://pip.pypa.io/ +.. _virtualenv: https://virtualenv.pypa.io/ + +Samples +------------------------------------------------------------------------------- + +transcribe_onprem ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + +You can run this sample one of two ways, using a **public IP**: + +.. code-block:: bash + + # Using a Public IP + $ python transcribe_onprem.py --file_path="../resources/two_channel_16k.wav" --api_endpoint=${PUBLIC_IP}:443 + +or by using a **cluster level IP**: + +.. code-block:: bash + + # Using a cluster level IP + $ kubectl port-forward -n $NAMESPACE $POD 10000:443 + $ python transcribe_onprem.py --file_path="../resources/two_channel_16k.wav" --api_endpoint="0.0.0.0:10000" + +The client library +------------------------------------------------------------------------------- + +This sample uses the `Google Cloud Client Library for Python`_. +You can read the documentation for more details on API usage and use GitHub +to `browse the source`_ and `report issues`_. + +.. _Google Cloud Client Library for Python: + https://googlecloudplatform.github.io/google-cloud-python/ +.. _browse the source: + https://github.com/GoogleCloudPlatform/google-cloud-python +.. _report issues: + https://github.com/GoogleCloudPlatform/google-cloud-python/issues + + +.. _Google Cloud SDK: https://cloud.google.com/sdk/ \ No newline at end of file diff --git a/speech/cloud-client/transcribe_onprem/transcribe_onprem.py b/speech/cloud-client/transcribe_onprem/transcribe_onprem.py new file mode 100644 index 000000000000..2c050a153f37 --- /dev/null +++ b/speech/cloud-client/transcribe_onprem/transcribe_onprem.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python + +# Copyright 2020, Google LLC +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + + +# [START speech_transcribe_onprem] +def transcribe_onprem(local_file_path, api_endpoint): + """ + Transcribe a short audio file using synchronous speech recognition on-prem + + Args: + local_file_path: The path to local audio file, e.g. /path/audio.wav + api_endpoint: Endpoint to call for speech recognition, e.g. 0.0.0.0:10000 + """ + from google.cloud import speech_v1p1beta1 + from google.cloud.speech_v1p1beta1 import enums + import grpc + import io + + # api_endpoint = '0.0.0.0:10000' + # local_file_path = '../resources/two_channel_16k.raw' + + # Create a gRPC channel to your server + channel = grpc.insecure_channel(target=api_endpoint) + + client = speech_v1p1beta1.SpeechClient(channel=channel) + + # The language of the supplied audio + language_code = "en-US" + + # Sample rate in Hertz of the audio data sent + sample_rate_hertz = 16000 + + # Encoding of audio data sent. This sample sets this explicitly. + # This field is optional for FLAC and WAV audio formats. + encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16 + config = { + "encoding": encoding, + "language_code": language_code, + "sample_rate_hertz": sample_rate_hertz, + } + with io.open(local_file_path, "rb") as f: + content = f.read() + audio = {"content": content} + + response = client.recognize(config, audio) + for result in response.results: + # First alternative is the most probable result + alternative = result.alternatives[0] + print(f"Transcript: {alternative.transcript}") +# [END speech_transcribe_onprem] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument( + "--file_path", + required=True, + help="Path to local audio file to be recognized, e.g. /path/audio.wav", + ) + parser.add_argument( + "--api_endpoint", + required=True, + help="Endpoint to call for speech recognition, e.g. 0.0.0.0:10000", + ) + + args = parser.parse_args() + transcribe_onprem( + local_file_path=args.file_path, api_endpoint=args.api_endpoint + )