From e08366d586f804bbeb367a1d9c07def172902723 Mon Sep 17 00:00:00 2001
From: Shahin <happyhuman@users.noreply.github.com>
Date: Thu, 12 Jul 2018 14:16:59 -0700
Subject: [PATCH] Diarization
 [(#1556)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1556)

Diarization
---
 speech/snippets/README.rst            |  1 +
 speech/snippets/beta_snippets.py      | 33 +++++++++++++++++++++++++++
 speech/snippets/beta_snippets_test.py | 12 +++++++++-
 speech/snippets/requirements.txt      |  2 +-
 4 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/speech/snippets/README.rst b/speech/snippets/README.rst
index 05a20ec67c0e..8efaa01472b0 100644
--- a/speech/snippets/README.rst
+++ b/speech/snippets/README.rst
@@ -230,6 +230,7 @@ To run this sample:
         python beta_snippets.py enhanced-model resources/commercial_mono.wav
         python beta_snippets.py metadata resources/commercial_mono.wav
         python beta_snippets.py punctuation resources/commercial_mono.wav
+        python beta_snippets.py diarization resources/commercial_mono.wav
 
     positional arguments:
       command
diff --git a/speech/snippets/beta_snippets.py b/speech/snippets/beta_snippets.py
index 95a9d8405e96..a518307f7871 100644
--- a/speech/snippets/beta_snippets.py
+++ b/speech/snippets/beta_snippets.py
@@ -21,6 +21,7 @@
     python beta_snippets.py enhanced-model resources/commercial_mono.wav
     python beta_snippets.py metadata resources/commercial_mono.wav
     python beta_snippets.py punctuation resources/commercial_mono.wav
+    python beta_snippets.py diarization resources/commercial_mono.wav
 """
 
 import argparse
@@ -126,6 +127,36 @@ def transcribe_file_with_auto_punctuation(path):
 # [END speech_transcribe_file_with_auto_punctuation]
 
 
+# [START speech_transcribe_diarization]
+def transcribe_file_with_diarization(path):
+    """Transcribe the given audio file synchronously with diarization."""
+    client = speech.SpeechClient()
+
+    with open(path, 'rb') as audio_file:
+        content = audio_file.read()
+
+    audio = speech.types.RecognitionAudio(content=content)
+
+    config = speech.types.RecognitionConfig(
+        encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
+        sample_rate_hertz=16000,
+        language_code='en-US',
+        enable_speaker_diarization=True,
+        diarization_speaker_count=2)
+
+    print('Waiting for operation to complete...')
+    response = client.recognize(config, audio)
+
+    for i, result in enumerate(response.results):
+        alternative = result.alternatives[0]
+        print('-' * 20)
+        print('First alternative of result {}: {}'
+              .format(i, alternative.transcript))
+        print('Speaker Tag for the first word: {}'
+              .format(alternative.words[0].speaker_tag))
+# [END speech_transcribe_diarization]
+
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(
         description=__doc__,
@@ -142,3 +173,5 @@ def transcribe_file_with_auto_punctuation(path):
         transcribe_file_with_metadata(args.path)
     elif args.command == 'punctuation':
         transcribe_file_with_auto_punctuation(args.path)
+    elif args.command == 'diarization':
+        transcribe_file_with_diarization(args.path)
diff --git a/speech/snippets/beta_snippets_test.py b/speech/snippets/beta_snippets_test.py
index a241a435d6eb..ef78f941d67a 100644
--- a/speech/snippets/beta_snippets_test.py
+++ b/speech/snippets/beta_snippets_test.py
@@ -14,7 +14,9 @@
 import os
 
 from beta_snippets import (
-    transcribe_file_with_auto_punctuation, transcribe_file_with_enhanced_model,
+    transcribe_file_with_auto_punctuation,
+    transcribe_file_with_diarization,
+    transcribe_file_with_enhanced_model,
     transcribe_file_with_metadata)
 
 RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
@@ -42,3 +44,11 @@ def test_transcribe_file_with_auto_punctuation(capsys):
     out, _ = capsys.readouterr()
 
     assert 'Okay. Sure.' in out
+
+
+def test_transcribe_diarization(capsys):
+    transcribe_file_with_diarization(
+        os.path.join(RESOURCES, 'Google_Gnome.wav'))
+    out, err = capsys.readouterr()
+
+    assert 'OK Google stream stranger things from Netflix to my TV' in out
diff --git a/speech/snippets/requirements.txt b/speech/snippets/requirements.txt
index 87b74e0d7334..c8b0bf24f01f 100644
--- a/speech/snippets/requirements.txt
+++ b/speech/snippets/requirements.txt
@@ -1 +1 @@
-google-cloud-speech==0.33.0
+google-cloud-speech==0.35.0