Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speech region tag update #1644

Merged
merged 13 commits into from
Aug 21, 2018
28 changes: 14 additions & 14 deletions speech/cloud-client/beta_snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

def transcribe_file_with_enhanced_model():
"""Transcribe the given audio file using an enhanced model."""
# [START speech_transcribe_file_with_enhanced_model]
# [START speech_transcribe_enhanced_model_beta]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

Expand All @@ -60,12 +60,12 @@ def transcribe_file_with_enhanced_model():
print('-' * 20)
print('First alternative of result {}'.format(i))
print('Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_file_with_enhanced_model]
# [END speech_transcribe_enhanced_model_beta]


def transcribe_file_with_metadata():
"""Send a request that includes recognition metadata."""
# [START speech_transcribe_file_with_metadata]
# [START speech_transcribe_recognition_metadata_beta]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

Expand Down Expand Up @@ -105,12 +105,12 @@ def transcribe_file_with_metadata():
print('-' * 20)
print('First alternative of result {}'.format(i))
print('Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_file_with_metadata]
# [END speech_transcribe_recognition_metadata_beta]


def transcribe_file_with_auto_punctuation():
"""Transcribe the given audio file with auto punctuation enabled."""
# [START speech_transcribe_file_with_auto_punctuation]
# [START speech_transcribe_auto_punctuation_beta]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

Expand All @@ -134,12 +134,12 @@ def transcribe_file_with_auto_punctuation():
print('-' * 20)
print('First alternative of result {}'.format(i))
print('Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_file_with_auto_punctuation]
# [END speech_transcribe_auto_punctuation_beta]


def transcribe_file_with_diarization():
"""Transcribe the given audio file synchronously with diarization."""
# [START speech_transcribe_diarization]
# [START speech_transcribe_diarization_beta]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

Expand Down Expand Up @@ -172,13 +172,13 @@ def transcribe_file_with_diarization():
for word_info in words_info:
print("word: '{}', speaker_tag: {}".format(word_info.word,
word_info.speaker_tag))
# [END speech_transcribe_diarization]
# [END speech_transcribe_diarization_beta]


def transcribe_file_with_multichannel():
"""Transcribe the given audio file synchronously with
multi channel."""
# [START speech_transcribe_multichannel]
# [START speech_transcribe_multichannel_beta]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

Expand All @@ -204,13 +204,13 @@ def transcribe_file_with_multichannel():
print('First alternative of result {}'.format(i))
print(u'Transcript: {}'.format(alternative.transcript))
print(u'Channel Tag: {}'.format(result.channel_tag))
# [END speech_transcribe_multichannel]
# [END speech_transcribe_multichannel_beta]


def transcribe_file_with_multilanguage():
"""Transcribe the given audio file synchronously with
multi language."""
# [START speech_transcribe_multilanguage]
# [START speech_transcribe_multilanguage_beta]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

Expand Down Expand Up @@ -238,13 +238,13 @@ def transcribe_file_with_multilanguage():
print('-' * 20)
print('First alternative of result {}: {}'.format(i, alternative))
print(u'Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_multilanguage]
# [END speech_transcribe_multilanguage_beta]


def transcribe_file_with_word_level_confidence():
"""Transcribe the given audio file synchronously with
word level confidence."""
# [START speech_transcribe_word_level_confidence]
# [START speech_transcribe_word_level_confidence_beta]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

Expand All @@ -270,7 +270,7 @@ def transcribe_file_with_word_level_confidence():
print(u'Transcript: {}'.format(alternative.transcript))
print(u'First Word and Confidence: ({}, {})'.format(
alternative.words[0].word, alternative.words[0].confidence))
# [END speech_transcribe_word_level_confidence]
# [END speech_transcribe_word_level_confidence_beta]


if __name__ == '__main__':
Expand Down
8 changes: 4 additions & 4 deletions speech/cloud-client/quickstart.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@ def run_quickstart():
import os

# Imports the Google Cloud client library
# [START migration_import]
# [START speech_python_migration_imports]
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
# [END migration_import]
# [END speech_python_migration_imports]

# Instantiates a client
# [START migration_client]
# [START speech_python_migration_client]
client = speech.SpeechClient()
# [END migration_client]
# [END speech_python_migration_client]

# The name of the audio file to transcribe
file_name = os.path.join(
Expand Down
26 changes: 12 additions & 14 deletions speech/cloud-client/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,20 @@
python transcribe.py gs://cloud-samples-tests/speech/brooklyn.flac
"""

# [START import_libraries]
import argparse
import io
# [END import_libraries]


# [START def_transcribe_file]
# [START speech_transcribe_sync]
def transcribe_file(speech_file):
"""Transcribe the given audio file."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()

# [START migration_sync_request]
# [START migration_audio_config_file]
# [START speech_python_migration_sync_request]
# [START speech_python_migration_config]
with io.open(speech_file, 'rb') as audio_file:
content = audio_file.read()

Expand All @@ -46,43 +44,43 @@ def transcribe_file(speech_file):
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US')
# [END migration_audio_config_file]
# [END speech_python_migration_config]

# [START migration_sync_response]
# [START speech_python_migration_sync_response]
response = client.recognize(config, audio)
# [END migration_sync_request]
# [END speech_python_migration_sync_request]
# Each result is for a consecutive portion of the audio. Iterate through
# them to get the transcripts for the entire audio file.
for result in response.results:
# The first alternative is the most likely one for this portion.
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
# [END migration_sync_response]
# [END def_transcribe_file]
# [END speech_python_migration_sync_response]
# [END speech_transcribe_sync]


# [START def_transcribe_gcs]
# [START speech_transcribe_sync_gcs]
def transcribe_gcs(gcs_uri):
"""Transcribes the audio file specified by the gcs_uri."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()

# [START migration_audio_config_gcs]
# [START speech_python_migration_config_gcs]
audio = types.RecognitionAudio(uri=gcs_uri)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
sample_rate_hertz=16000,
language_code='en-US')
# [END migration_audio_config_gcs]
# [END speech_python_migration_config_gcs]

response = client.recognize(config, audio)
# Each result is for a consecutive portion of the audio. Iterate through
# them to get the transcripts for the entire audio file.
for result in response.results:
# The first alternative is the most likely one for this portion.
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
# [END def_transcribe_gcs]
# [END speech_transcribe_sync_gcs]


if __name__ == '__main__':
Expand Down
16 changes: 8 additions & 8 deletions speech/cloud-client/transcribe_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,15 @@
import io


# [START def_transcribe_file]
# [START speech_transcribe_async]
def transcribe_file(speech_file):
"""Transcribe the given audio file asynchronously."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()

# [START migration_async_request]
# [START speech_python_migration_async_request]
with io.open(speech_file, 'rb') as audio_file:
content = audio_file.read()

Expand All @@ -44,9 +44,9 @@ def transcribe_file(speech_file):
sample_rate_hertz=16000,
language_code='en-US')

# [START migration_async_response]
# [START speech_python_migration_async_response]
operation = client.long_running_recognize(config, audio)
# [END migration_async_request]
# [END speech_python_migration_async_request]

print('Waiting for operation to complete...')
response = operation.result(timeout=90)
Expand All @@ -57,11 +57,11 @@ def transcribe_file(speech_file):
# The first alternative is the most likely one for this portion.
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
print('Confidence: {}'.format(result.alternatives[0].confidence))
# [END migration_async_response]
# [END def_transcribe_file]
# [END speech_python_migration_async_response]
# [END speech_transcribe_async]


# [START def_transcribe_gcs]
# [START speech_transcribe_async_gcs]
def transcribe_gcs(gcs_uri):
"""Asynchronously transcribes the audio file specified by the gcs_uri."""
from google.cloud import speech
Expand All @@ -86,7 +86,7 @@ def transcribe_gcs(gcs_uri):
# The first alternative is the most likely one for this portion.
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
print('Confidence: {}'.format(result.alternatives[0].confidence))
# [END def_transcribe_gcs]
# [END speech_transcribe_async_gcs]


if __name__ == '__main__':
Expand Down
8 changes: 4 additions & 4 deletions speech/cloud-client/transcribe_model_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import argparse


# [START speech_transcribe_model_selection]
# [START speech_transcribe_model_selection_beta]
def transcribe_model_selection(speech_file, model):
"""Transcribe the given audio file synchronously with
the selected model."""
Expand All @@ -52,10 +52,10 @@ def transcribe_model_selection(speech_file, model):
print('-' * 20)
print('First alternative of result {}'.format(i))
print(u'Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_model_selection]
# [END speech_transcribe_model_selection_beta]


# [START speech_transcribe_model_selection_gcs]
# [START speech_transcribe_model_selection_gcs_beta]
def transcribe_model_selection_gcs(gcs_uri, model):
"""Transcribe the given audio file asynchronously with
the selected model."""
Expand All @@ -80,7 +80,7 @@ def transcribe_model_selection_gcs(gcs_uri, model):
print('-' * 20)
print('First alternative of result {}'.format(i))
print(u'Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_model_selection_gcs]
# [END speech_transcribe_model_selection_gcs_beta]


if __name__ == '__main__':
Expand Down
14 changes: 6 additions & 8 deletions speech/cloud-client/transcribe_streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,19 @@
python transcribe_streaming.py resources/audio.raw
"""

# [START import_libraries]
import argparse
import io
# [END import_libraries]


# [START def_transcribe_streaming]
# [START speech_transcribe_streaming]
def transcribe_streaming(stream_file):
"""Streams transcription of the given audio file."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()

# [START migration_streaming_request]
# [START speech_python_migration_streaming_request]
with io.open(stream_file, 'rb') as audio_file:
content = audio_file.read()

Expand All @@ -50,9 +48,9 @@ def transcribe_streaming(stream_file):
streaming_config = types.StreamingRecognitionConfig(config=config)

# streaming_recognize returns a generator.
# [START migration_streaming_response]
# [START speech_python_migration_streaming_response]
responses = client.streaming_recognize(streaming_config, requests)
# [END migration_streaming_request]
# [END speech_python_migration_streaming_request]

for response in responses:
# Once the transcription has settled, the first result will contain the
Expand All @@ -66,8 +64,8 @@ def transcribe_streaming(stream_file):
for alternative in alternatives:
print('Confidence: {}'.format(alternative.confidence))
print(u'Transcript: {}'.format(alternative.transcript))
# [END migration_streaming_response]
# [END def_transcribe_streaming]
# [END speech_python_migration_streaming_response]
# [END speech_transcribe_streaming]


if __name__ == '__main__':
Expand Down
5 changes: 2 additions & 3 deletions speech/cloud-client/transcribe_streaming_mic.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
python transcribe_streaming_mic.py
"""

# [START import_libraries]
# [START speech_transcribe_streaming_mic]
from __future__ import division

import re
Expand All @@ -36,7 +36,6 @@
from google.cloud.speech import types
import pyaudio
from six.moves import queue
# [END import_libraries]

# Audio recording parameters
RATE = 16000
Expand Down Expand Up @@ -106,7 +105,6 @@ def generator(self):
break

yield b''.join(data)
# [END audio_stream]


def listen_print_loop(responses):
Expand Down Expand Up @@ -191,3 +189,4 @@ def main():

if __name__ == '__main__':
main()
# [END speech_transcribe_streaming_mic]
4 changes: 2 additions & 2 deletions speech/cloud-client/transcribe_word_time_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def transcribe_file_with_word_time_offsets(speech_file):
end_time.seconds + end_time.nanos * 1e-9))


# [START def_transcribe_gcs]
# [START speech_transcribe_async_time_offsets_gcs]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i would suggest keeping this as word_time_offset. there could be other types of time offset.

def transcribe_gcs_with_word_time_offsets(gcs_uri):
"""Transcribe the given audio file asynchronously and output the word time
offsets."""
Expand Down Expand Up @@ -94,7 +94,7 @@ def transcribe_gcs_with_word_time_offsets(gcs_uri):
word,
start_time.seconds + start_time.nanos * 1e-9,
end_time.seconds + end_time.nanos * 1e-9))
# [END def_transcribe_gcs]
# [END speech_transcribe_async_time_offsets_gcs]


if __name__ == '__main__':
Expand Down