Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Diarization
  • Loading branch information
happyhuman authored and busunkim96 committed Sep 3, 2020
1 parent e8bc008 commit 211966e
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 2 deletions.
1 change: 1 addition & 0 deletions google-cloud-speech/samples/snippets/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ To run this sample:
python beta_snippets.py enhanced-model resources/commercial_mono.wav
python beta_snippets.py metadata resources/commercial_mono.wav
python beta_snippets.py punctuation resources/commercial_mono.wav
python beta_snippets.py diarization resources/commercial_mono.wav
positional arguments:
command
Expand Down
33 changes: 33 additions & 0 deletions google-cloud-speech/samples/snippets/beta_snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
python beta_snippets.py enhanced-model resources/commercial_mono.wav
python beta_snippets.py metadata resources/commercial_mono.wav
python beta_snippets.py punctuation resources/commercial_mono.wav
python beta_snippets.py diarization resources/commercial_mono.wav
"""

import argparse
Expand Down Expand Up @@ -126,6 +127,36 @@ def transcribe_file_with_auto_punctuation(path):
# [END speech_transcribe_file_with_auto_punctuation]


# [START speech_transcribe_diarization]
def transcribe_file_with_diarization(path):
"""Transcribe the given audio file synchronously with diarization."""
client = speech.SpeechClient()

with open(path, 'rb') as audio_file:
content = audio_file.read()

audio = speech.types.RecognitionAudio(content=content)

config = speech.types.RecognitionConfig(
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US',
enable_speaker_diarization=True,
diarization_speaker_count=2)

print('Waiting for operation to complete...')
response = client.recognize(config, audio)

for i, result in enumerate(response.results):
alternative = result.alternatives[0]
print('-' * 20)
print('First alternative of result {}: {}'
.format(i, alternative.transcript))
print('Speaker Tag for the first word: {}'
.format(alternative.words[0].speaker_tag))
# [END speech_transcribe_diarization]


if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=__doc__,
Expand All @@ -142,3 +173,5 @@ def transcribe_file_with_auto_punctuation(path):
transcribe_file_with_metadata(args.path)
elif args.command == 'punctuation':
transcribe_file_with_auto_punctuation(args.path)
elif args.command == 'diarization':
transcribe_file_with_diarization(args.path)
12 changes: 11 additions & 1 deletion google-cloud-speech/samples/snippets/beta_snippets_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
import os

from beta_snippets import (
transcribe_file_with_auto_punctuation, transcribe_file_with_enhanced_model,
transcribe_file_with_auto_punctuation,
transcribe_file_with_diarization,
transcribe_file_with_enhanced_model,
transcribe_file_with_metadata)

RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
Expand Down Expand Up @@ -42,3 +44,11 @@ def test_transcribe_file_with_auto_punctuation(capsys):
out, _ = capsys.readouterr()

assert 'Okay. Sure.' in out


def test_transcribe_diarization(capsys):
transcribe_file_with_diarization(
os.path.join(RESOURCES, 'Google_Gnome.wav'))
out, err = capsys.readouterr()

assert 'OK Google stream stranger things from Netflix to my TV' in out
2 changes: 1 addition & 1 deletion google-cloud-speech/samples/snippets/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
google-cloud-speech==0.33.0
google-cloud-speech==0.35.0

0 comments on commit 211966e

Please sign in to comment.