Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Diarization #1556

Merged
merged 15 commits into from
Jul 12, 2018
1 change: 1 addition & 0 deletions speech/cloud-client/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ To run this sample:
python beta_snippets.py enhanced-model resources/commercial_mono.wav
python beta_snippets.py metadata resources/commercial_mono.wav
python beta_snippets.py punctuation resources/commercial_mono.wav
python beta_snippets.py diarization resources/commercial_mono.wav

positional arguments:
command
Expand Down
33 changes: 33 additions & 0 deletions speech/cloud-client/beta_snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
python beta_snippets.py enhanced-model resources/commercial_mono.wav
python beta_snippets.py metadata resources/commercial_mono.wav
python beta_snippets.py punctuation resources/commercial_mono.wav
python beta_snippets.py diarization resources/commercial_mono.wav
"""

import argparse
Expand Down Expand Up @@ -126,6 +127,36 @@ def transcribe_file_with_auto_punctuation(path):
# [END speech_transcribe_file_with_auto_punctuation]


# [START speech_transcribe_diarization]
def transcribe_file_with_diarization(path):
"""Transcribe the given audio file synchronously with diarization."""
client = speech.SpeechClient()

with open(path, 'rb') as audio_file:
content = audio_file.read()

audio = speech.types.RecognitionAudio(content=content)

config = speech.types.RecognitionConfig(
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US',
enable_speaker_diarization=True,
diarization_speaker_count=2)

print('Waiting for operation to complete...')
response = client.recognize(config, audio)

for i, result in enumerate(response.results):
alternative = result.alternatives[0]
print('-' * 20)
print('First alternative of result {}: {}'
.format(i, alternative.transcript))
print('Speaker Tag for the first word: {}'
.format(alternative.words[0].speaker_tag))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does each word in alternative.words have other relevant information concerning diarization besides speaker_tag? if so print them as well, if not please ignore this comment.

# [END speech_transcribe_diarization]


if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=__doc__,
Expand All @@ -142,3 +173,5 @@ def transcribe_file_with_auto_punctuation(path):
transcribe_file_with_metadata(args.path)
elif args.command == 'punctuation':
transcribe_file_with_auto_punctuation(args.path)
elif args.command == 'diarization':
transcribe_file_with_diarization(args.path)
12 changes: 11 additions & 1 deletion speech/cloud-client/beta_snippets_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
import os

from beta_snippets import (
transcribe_file_with_auto_punctuation, transcribe_file_with_enhanced_model,
transcribe_file_with_auto_punctuation,
transcribe_file_with_diarization,
transcribe_file_with_enhanced_model,
transcribe_file_with_metadata)

RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
Expand Down Expand Up @@ -42,3 +44,11 @@ def test_transcribe_file_with_auto_punctuation(capsys):
out, _ = capsys.readouterr()

assert 'Okay. Sure.' in out


def test_transcribe_diarization(capsys):
transcribe_file_with_diarization(
os.path.join(RESOURCES, 'Google_Gnome.wav'))
out, err = capsys.readouterr()

assert 'OK Google stream stranger things from Netflix to my TV' in out
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are there two different speakers in this audio file? if so assert something about the speaker_tags being returned correctly.

2 changes: 1 addition & 1 deletion speech/cloud-client/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
google-cloud-speech==0.33.0
google-cloud-speech==0.35.0