Skip to content

Commit

Permalink
Multi Channel [(#1561)](#1561)
Browse files Browse the repository at this point in the history
* Implemented the multi channel sample

* Added parameter comment

* Moved region tags inside the functions

* Deleted the extra line

* Fixing typos
  • Loading branch information
happyhuman authored and telpirion committed Mar 13, 2023
1 parent 381c2d1 commit 991d55e
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 19 deletions.
1 change: 1 addition & 0 deletions speech/snippets/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ To run this sample:
python beta_snippets.py metadata resources/commercial_mono.wav
python beta_snippets.py punctuation resources/commercial_mono.wav
python beta_snippets.py diarization resources/commercial_mono.wav
python beta_snippets.py multi-channel resources/commercial_mono.wav
positional arguments:
command
Expand Down
86 changes: 68 additions & 18 deletions speech/snippets/beta_snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,23 @@
python beta_snippets.py metadata resources/commercial_mono.wav
python beta_snippets.py punctuation resources/commercial_mono.wav
python beta_snippets.py diarization resources/commercial_mono.wav
python beta_snippets.py multi-channel resources/commercial_mono.wav
"""

import argparse
import io

from google.cloud import speech_v1p1beta1 as speech


# [START speech_transcribe_file_with_enhanced_model]
def transcribe_file_with_enhanced_model(path):
def transcribe_file_with_enhanced_model(speech_file):
"""Transcribe the given audio file using an enhanced model."""
# [START speech_transcribe_file_with_enhanced_model]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

with io.open(path, 'rb') as audio_file:
# TODO(developer): Uncomment and set to a path to your audio file.
# speech_file = 'path/to/file.wav'

with io.open(speech_file, 'rb') as audio_file:
content = audio_file.read()

audio = speech.types.RecognitionAudio(content=content)
Expand All @@ -56,15 +59,19 @@ def transcribe_file_with_enhanced_model(path):
print('-' * 20)
print('First alternative of result {}'.format(i))
print('Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_file_with_enhanced_model]
# [END speech_transcribe_file_with_enhanced_model]


# [START speech_transcribe_file_with_metadata]
def transcribe_file_with_metadata(path):
def transcribe_file_with_metadata(speech_file):
"""Send a request that includes recognition metadata."""
# [START speech_transcribe_file_with_metadata]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

with io.open(path, 'rb') as audio_file:
# TODO(developer): Uncomment and set to a path to your audio file.
# speech_file = 'path/to/file.wav'

with io.open(speech_file, 'rb') as audio_file:
content = audio_file.read()

# Here we construct a recognition metadata object.
Expand Down Expand Up @@ -98,15 +105,19 @@ def transcribe_file_with_metadata(path):
print('-' * 20)
print('First alternative of result {}'.format(i))
print('Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_file_with_metadata]
# [END speech_transcribe_file_with_metadata]


# [START speech_transcribe_file_with_auto_punctuation]
def transcribe_file_with_auto_punctuation(path):
def transcribe_file_with_auto_punctuation(speech_file):
"""Transcribe the given audio file with auto punctuation enabled."""
# [START speech_transcribe_file_with_auto_punctuation]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

with io.open(path, 'rb') as audio_file:
# TODO(developer): Uncomment and set to a path to your audio file.
# speech_file = 'path/to/file.wav'

with io.open(speech_file, 'rb') as audio_file:
content = audio_file.read()

audio = speech.types.RecognitionAudio(content=content)
Expand All @@ -124,15 +135,19 @@ def transcribe_file_with_auto_punctuation(path):
print('-' * 20)
print('First alternative of result {}'.format(i))
print('Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_file_with_auto_punctuation]
# [END speech_transcribe_file_with_auto_punctuation]


# [START speech_transcribe_diarization]
def transcribe_file_with_diarization(path):
def transcribe_file_with_diarization(speech_file):
"""Transcribe the given audio file synchronously with diarization."""
# [START speech_transcribe_diarization]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

with open(path, 'rb') as audio_file:
# TODO(developer): Uncomment and set to a path to your audio file.
# speech_file = 'path/to/file.wav'

with open(speech_file, 'rb') as audio_file:
content = audio_file.read()

audio = speech.types.RecognitionAudio(content=content)
Expand All @@ -154,7 +169,40 @@ def transcribe_file_with_diarization(path):
.format(i, alternative.transcript))
print('Speaker Tag for the first word: {}'
.format(alternative.words[0].speaker_tag))
# [END speech_transcribe_diarization]
# [END speech_transcribe_diarization]


def transcribe_file_with_multichannel(speech_file):
"""Transcribe the given audio file synchronously with
multi channel."""
# [START speech_transcribe_multichannel]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

# TODO(developer): Uncomment and set to a path to your audio file.
# speech_file = 'path/to/file.wav'

with open(speech_file, 'rb') as audio_file:
content = audio_file.read()

audio = speech.types.RecognitionAudio(content=content)

config = speech.types.RecognitionConfig(
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US',
audio_channel_count=1,
enable_separate_recognition_per_channel=True)

response = client.recognize(config, audio)

for i, result in enumerate(response.results):
alternative = result.alternatives[0]
print('-' * 20)
print('First alternative of result {}'.format(i))
print(u'Transcript: {}'.format(alternative.transcript))
print(u'Channel Tag: {}'.format(result.channel_tag))
# [END speech_transcribe_multichannel]


if __name__ == '__main__':
Expand All @@ -175,3 +223,5 @@ def transcribe_file_with_diarization(path):
transcribe_file_with_auto_punctuation(args.path)
elif args.command == 'diarization':
transcribe_file_with_diarization(args.path)
elif args.command == 'multi-channel':
transcribe_file_with_multichannel(args.path)
11 changes: 10 additions & 1 deletion speech/snippets/beta_snippets_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
transcribe_file_with_auto_punctuation,
transcribe_file_with_diarization,
transcribe_file_with_enhanced_model,
transcribe_file_with_metadata)
transcribe_file_with_metadata,
transcribe_file_with_multichannel)

RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')

Expand Down Expand Up @@ -52,3 +53,11 @@ def test_transcribe_diarization(capsys):
out, err = capsys.readouterr()

assert 'OK Google stream stranger things from Netflix to my TV' in out


def test_transcribe_multichannel_file(capsys):
transcribe_file_with_multichannel(
os.path.join(RESOURCES, 'Google_Gnome.wav'))
out, err = capsys.readouterr()

assert 'OK Google stream stranger things from Netflix to my TV' in out

0 comments on commit 991d55e

Please sign in to comment.