Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated Infinite streaming sample #1422

Merged
merged 7 commits into from
May 16, 2019
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2018 Google LLC
* Copyright 2019 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -20,14 +20,17 @@
import com.google.api.gax.rpc.ClientStream;
import com.google.api.gax.rpc.ResponseObserver;
import com.google.api.gax.rpc.StreamController;
import com.google.cloud.speech.v1.RecognitionConfig;
import com.google.cloud.speech.v1.SpeechClient;
import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1.StreamingRecognitionConfig;
import com.google.cloud.speech.v1.StreamingRecognitionResult;
import com.google.cloud.speech.v1.StreamingRecognizeRequest;
import com.google.cloud.speech.v1.StreamingRecognizeResponse;
import com.google.cloud.speech.v1p1beta1.RecognitionConfig;
import com.google.cloud.speech.v1p1beta1.SpeechClient;
import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1p1beta1.StreamingRecognitionConfig;
import com.google.cloud.speech.v1p1beta1.StreamingRecognitionResult;
import com.google.cloud.speech.v1p1beta1.StreamingRecognizeRequest;
import com.google.cloud.speech.v1p1beta1.StreamingRecognizeResponse;
import com.google.protobuf.ByteString;
import com.google.protobuf.Duration;
import java.lang.Math;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
Expand All @@ -39,11 +42,29 @@

public class InfiniteStreamRecognize {

private static final int STREAMING_LIMIT = 10000; // 10 seconds

public static final String RED = "\033[0;31m";
public static final String GREEN = "\033[0;32m";
public static final String YELLOW = "\033[0;33m";

// Creating shared object
private static volatile BlockingQueue<byte[]> sharedQueue = new LinkedBlockingQueue();
private static TargetDataLine targetDataLine;
private static int BYTES_PER_BUFFER = 6400; // buffer size in bytes

private static int restartCounter = 0;
private static ArrayList<ByteString> audioInput = new ArrayList<ByteString>();
private static ArrayList<ByteString> lastAudioInput = new ArrayList<ByteString>();
private static int resultEndTimeInMS = 0;
private static int isFinalEndTime = 0;
private static int finalRequestEndTime = 0;
private static boolean newStream = true;
private static double bridgingOffset = 0;
private static boolean lastTranscriptWasFinal = false;
private static StreamController referenceToStreamController;
private static ByteString tempByteString;

public static void main(String... args) {
try {
infiniteStreamingRecognize();
Expand All @@ -60,6 +81,7 @@ class MicBuffer implements Runnable {

@Override
public void run() {
System.out.println(YELLOW);
System.out.println("Start speaking...Press Ctrl-C to stop");
targetDataLine.start();
byte[] data = new byte[BYTES_PER_BUFFER];
Expand Down Expand Up @@ -88,24 +110,48 @@ public void run() {

ArrayList<StreamingRecognizeResponse> responses = new ArrayList<>();

public void onStart(StreamController controller) {}
public void onStart(StreamController controller) {
referenceToStreamController = controller;
}

public void onResponse(StreamingRecognizeResponse response) {

responses.add(response);

StreamingRecognitionResult result = response.getResultsList().get(0);
// There can be several alternative transcripts for a given chunk of speech. Just
// use the first (most likely) one here.

Duration resultEndTime = result.getResultEndTime();

resultEndTimeInMS = (int) ((resultEndTime.getSeconds() * 1000)
+ (resultEndTime.getNanos() / 1000000));

double correctedTime = resultEndTimeInMS - bridgingOffset
+ (STREAMING_LIMIT * restartCounter);
DecimalFormat format = new DecimalFormat("0.#");

SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}
if (result.getIsFinal()) {
System.out.print(GREEN);
System.out.print("\033[2K\r");
System.out.printf("%s: %s\n", format.format(correctedTime),
alternative.getTranscript());

public void onComplete() {
System.out.println("Done");
}
isFinalEndTime = resultEndTimeInMS;
lastTranscriptWasFinal = true;
} else {
System.out.print(RED);
System.out.print("\033[2K\r");
System.out.printf("%s: %s", format.format(correctedTime),
alternative.getTranscript());

public void onError(Throwable t) {
System.out.println(t);
lastTranscriptWasFinal = false;
}
}

public void onComplete() {}

public void onError(Throwable t) {}

};

clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
Expand All @@ -116,8 +162,12 @@ public void onError(Throwable t) {
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build();

StreamingRecognitionConfig streamingRecognitionConfig =
StreamingRecognitionConfig.newBuilder().setConfig(recognitionConfig).build();
StreamingRecognitionConfig.newBuilder()
.setConfig(recognitionConfig)
.setInterimResults(true)
.build();

StreamingRecognizeRequest request =
StreamingRecognizeRequest.newBuilder()
Expand Down Expand Up @@ -151,23 +201,76 @@ public void onError(Throwable t) {

long estimatedTime = System.currentTimeMillis() - startTime;

if (estimatedTime >= 55000) {
if (estimatedTime >= STREAMING_LIMIT) {

clientStream.closeSend();
referenceToStreamController.cancel(); // remove Observer

if (resultEndTimeInMS > 0) {
finalRequestEndTime = isFinalEndTime;
}
resultEndTimeInMS = 0;

lastAudioInput = null;
lastAudioInput = audioInput;
audioInput = new ArrayList<ByteString>();

restartCounter++;

if (!lastTranscriptWasFinal) {
System.out.print('\n');
}

newStream = true;

clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);

request =
StreamingRecognizeRequest.newBuilder()
.setStreamingConfig(streamingRecognitionConfig)
.build();

System.out.println(YELLOW);
System.out.printf("%d: RESTARTING REQUEST\n", restartCounter * STREAMING_LIMIT);

startTime = System.currentTimeMillis();

} else {

if ((newStream) && (lastAudioInput.size() > 0)) {
double chunkTime = STREAMING_LIMIT / lastAudioInput.size(); // ms
if (chunkTime != 0) {
if (bridgingOffset < 0) {
bridgingOffset = 0;
}
if (bridgingOffset > finalRequestEndTime) {
bridgingOffset = finalRequestEndTime;
}
int chunksFromMS = (int) Math.floor((finalRequestEndTime
- bridgingOffset) / chunkTime);
bridgingOffset = (int) Math.floor((lastAudioInput.size()
- chunksFromMS) * chunkTime);
for (int i = chunksFromMS; i < lastAudioInput.size(); i++) {

request =
StreamingRecognizeRequest.newBuilder()
.setAudioContent(lastAudioInput.get(i))
.build();
clientStream.send(request);
}
}
newStream = false;
}

tempByteString = ByteString.copyFrom(sharedQueue.take());

request =
StreamingRecognizeRequest.newBuilder()
.setAudioContent(ByteString.copyFrom(sharedQueue.take()))
.setAudioContent(tempByteString)
.build();

audioInput.add(tempByteString);

}

clientStream.send(request);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ public void onError(Throwable t) {
System.out.println("Stop speaking.");
targetDataLine.stop();
targetDataLine.close();
break;
// break;
}
request =
StreamingRecognizeRequest.newBuilder()
Expand Down