Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add speech api multichannel samples #2003

Merged
merged 6 commits into from
Feb 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions speech/cloud-client/transcribe_enhanced_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,15 @@
"""

import argparse
import io


def transcribe_file_with_enhanced_model(path):
"""Transcribe the given audio file using an enhanced model."""
# [START speech_transcribe_enhanced_model]
import io
import io

from google.cloud import speech

client = speech.SpeechClient()

# path = 'resources/commercial_mono.wav'
Expand Down
95 changes: 95 additions & 0 deletions speech/cloud-client/transcribe_multichannel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/usr/bin/env python

# Copyright 2019 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Google Cloud Speech API sample that demonstrates multichannel recognition.

Example usage:
python transcribe_multichannel.py resources/multi.wav
python transcribe_multichannel.py \
gs://cloud-samples-tests/speech/multi.wav
"""

import argparse


def transcribe_file_with_multichannel(speech_file):
"""Transcribe the given audio file synchronously with
multi channel."""
# [START speech_transcribe_multichannel]
from google.cloud import speech
client = speech.SpeechClient()

with open(speech_file, 'rb') as audio_file:
content = audio_file.read()

audio = speech.types.RecognitionAudio(content=content)

config = speech.types.RecognitionConfig(
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=44100,
language_code='en-US',
audio_channel_count=2,
enable_separate_recognition_per_channel=True)

response = client.recognize(config, audio)

for i, result in enumerate(response.results):
alternative = result.alternatives[0]
print('-' * 20)
print('First alternative of result {}'.format(i))
print(u'Transcript: {}'.format(alternative.transcript))
print(u'Channel Tag: {}'.format(result.channel_tag))
# [END speech_transcribe_multichannel]


def transcribe_gcs_with_multichannel(gcs_uri):
"""Transcribe the given audio file on GCS with
multi channel."""
# [START speech_transcribe_multichannel_gcs]
from google.cloud import speech
client = speech.SpeechClient()

audio = speech.types.RecognitionAudio(uri=gcs_uri)

config = speech.types.RecognitionConfig(
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=44100,
language_code='en-US',
audio_channel_count=2,
enable_separate_recognition_per_channel=True)

response = client.recognize(config, audio)

for i, result in enumerate(response.results):
alternative = result.alternatives[0]
print('-' * 20)
print('First alternative of result {}'.format(i))
print(u'Transcript: {}'.format(alternative.transcript))
print(u'Channel Tag: {}'.format(result.channel_tag))
# [END speech_transcribe_multichannel_gcs]


if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument(
'path', help='File or GCS path for audio file to be recognized')
args = parser.parse_args()
if args.path.startswith('gs://'):
transcribe_gcs_with_multichannel(args.path)
else:
transcribe_file_with_multichannel(args.path)
36 changes: 36 additions & 0 deletions speech/cloud-client/transcribe_multichannel_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright 2019, Google, Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

from transcribe_multichannel import (
transcribe_file_with_multichannel,
transcribe_gcs_with_multichannel)

RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')


def test_transcribe_multichannel_file(capsys):
transcribe_file_with_multichannel(
os.path.join(RESOURCES, 'multi.wav'))
out, err = capsys.readouterr()

assert 'how are you doing' in out


def test_transcribe_multichannel_gcs(capsys):
transcribe_gcs_with_multichannel(
'gs://cloud-samples-data/speech/multi.wav')
out, err = capsys.readouterr()

assert 'how are you doing' in out