Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(speech): Add Speech-to-Text On-Prem sample #4223

Merged
merged 6 commits into from
Jul 22, 2020
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 91 additions & 0 deletions speech/cloud-client/transcribe_onprem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#!/usr/bin/env python

# Copyright 2020, Google LLC
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse


# [START speech_transcribe_onprem]
def transcribe_onprem(local_file_path, api_endpoint):
"""
Transcribe a short audio file using synchronous speech recognition on-prem

Args:
local_file_path: The path to local audio file, e.g. /path/audio.wav
api_endpoint: Endpoint to call for speech recognition, e.g. 0.0.0.0:10000
"""
from google.cloud import speech_v1p1beta1
from google.cloud.speech_v1p1beta1 import enums
import grpc
import io

# api_endpoint = '0.0.0.0:10000'
# local_file_path = 'resources/brooklyn_bridge.raw'

# Set the API endpoint to direct requests to
client_options = {"api_endpoint": api_endpoint}

# Create a gRPC channel to your server
channel = grpc.insecure_channel(target=api_endpoint)

client = speech_v1p1beta1.SpeechClient(
client_options=client_options, channel=channel
)
vinnysenthil marked this conversation as resolved.
Show resolved Hide resolved

# The language of the supplied audio
language_code = "en-US"

# Sample rate in Hertz of the audio data sent
sample_rate_hertz = 8000

# Encoding of audio data sent. This sample sets this explicitly.
# This field is optional for FLAC and WAV audio formats.
encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16
config = {
"encoding": encoding,
"language_code": language_code,
"sample_rate_hertz": sample_rate_hertz,
}
with io.open(local_file_path, "rb") as f:
content = f.read()
audio = {"content": content}

response = client.recognize(config, audio)
for result in response.results:
# First alternative is the most probable result
alternative = result.alternatives[0]
print(u"Transcript: {}".format(alternative.transcript))
vinnysenthil marked this conversation as resolved.
Show resolved Hide resolved
# [END speech_transcribe_onprem]


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument(
"--file_path",
required=True,
help="Path to local audio file to be recognized, e.g. /path/audio.wav",
)
parser.add_argument(
"--api_endpoint",
required=True,
help="Endpoint to call for speech recognition, e.g. 0.0.0.0:10000",
)

args = parser.parse_args()
transcribe_onprem(
local_file_path=args.file_path, api_endpoint=args.api_endpoint
)