Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Infinite Stream recognition #1297

Merged
merged 4 commits into from
Dec 14, 2018
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 142 additions & 1 deletion speech/cloud-client/src/main/java/com/example/speech/Recognize.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
import java.util.ArrayList;
import java.util.List;

import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
nirupa-kumar marked this conversation as resolved.
Show resolved Hide resolved
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
Expand All @@ -55,6 +57,11 @@

public class Recognize {

// Creating shared object
private static volatile BlockingQueue<byte[]> sharedQueue = new LinkedBlockingQueue();
private static TargetDataLine targetDataLine;
private static int BYTES_PER_BUFFER = 6400; // buffer size in bytes
nirupa-kumar marked this conversation as resolved.
Show resolved Hide resolved

/** Run speech recognition tasks. */
public static void main(String... args) throws Exception {
if (args.length < 1) {
Expand All @@ -64,7 +71,7 @@ public static void main(String... args) throws Exception {
+ "Commands:\n"
+ "\tsyncrecognize | asyncrecognize | streamrecognize | micstreamrecognize \n"
+ "\t| wordoffsets | auto-punctuation | stream-punctuation \n"
+ "\t| enhanced-model | model-selection\n"
+ "\t| enhanced-model | model-selection | infinitestreamrecognize\n"
+ "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
+ "for a Cloud Storage resource (gs://...)\n",
Recognize.class.getCanonicalName());
Expand Down Expand Up @@ -112,6 +119,8 @@ public static void main(String... args) throws Exception {
} else {
transcribeModelSelection(path);
}
} else if (command.equals("infinitestreamrecognize")) {
infiniteStreamingRecognize();
}
}

Expand Down Expand Up @@ -830,4 +839,136 @@ public static void transcribeModelSelectionGcs(String gcsUri) throws Exception {
}
}
// [END speech_transcribe_model_selection_gcs]

// [START speech_transcribe_infinite_streaming]
/** Performs infinite streaming speech recognition */
public static void infiniteStreamingRecognize() throws Exception {

// Microphone Input buffering
class MicBuffer implements Runnable {

@Override
public void run() {
System.out.println("Start speaking...Press Ctrl-C to stop transcription.");

targetDataLine.start();
byte[] data = new byte[BYTES_PER_BUFFER];
while (targetDataLine.isOpen()) {
try {
int numBytesRead = targetDataLine.read(data, 0, data.length);
if ((numBytesRead <= 0) && (targetDataLine.isOpen())) {
continue;
}
sharedQueue.put(data.clone());
} catch (InterruptedException e) {
System.out.println("Microphone input buffering interrupted : " + e.getMessage());
}
}
}
}

// Creating microphone input buffer thread
MicBuffer micrunnable = new MicBuffer();
Thread micThread = new Thread(micrunnable);

ResponseObserver<StreamingRecognizeResponse> responseObserver = null;
try (SpeechClient client = SpeechClient.create()) {

responseObserver =
new ResponseObserver<StreamingRecognizeResponse>() {

ArrayList<StreamingRecognizeResponse> responses = new ArrayList<>();

public void onStart(StreamController controller) {}

public void onResponse(StreamingRecognizeResponse response) {
responses.add(response);
StreamingRecognitionResult result = response.getResultsList().get(0);
// There can be several alternative transcripts for a given chunk of speech. Just use
// the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}

public void onComplete() {}

public void onError(Throwable t) {
System.out.println(t);
}
};

ClientStream<StreamingRecognizeRequest> clientStream =
client.streamingRecognizeCallable().splitCall(responseObserver);

RecognitionConfig recognitionConfig =
RecognitionConfig.newBuilder()
.setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build();
StreamingRecognitionConfig streamingRecognitionConfig =
StreamingRecognitionConfig.newBuilder().setConfig(recognitionConfig).build();

StreamingRecognizeRequest request =
StreamingRecognizeRequest.newBuilder()
.setStreamingConfig(streamingRecognitionConfig)
.build(); // The first request in a streaming call has to be a config

clientStream.send(request);

try {
// SampleRate:16000Hz, SampleSizeInBits: 16, Number of channels: 1, Signed: true,
// bigEndian: false
AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false);
DataLine.Info targetInfo =
new Info(
TargetDataLine.class,
audioFormat); // Set the system information to read from the microphone audio stream

if (!AudioSystem.isLineSupported(targetInfo)) {
System.out.println("Microphone not supported");
System.exit(0);
}
// Target data line captures the audio stream the microphone produces.
targetDataLine = (TargetDataLine) AudioSystem.getLine(targetInfo);
targetDataLine.open(audioFormat);
micThread.start();

long startTime = System.currentTimeMillis();

while (true) {

long estimatedTime = System.currentTimeMillis() - startTime;

if (estimatedTime >= 55000) {

clientStream.closeSend();
clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);

request =
StreamingRecognizeRequest.newBuilder()
.setStreamingConfig(streamingRecognitionConfig)
.build();

startTime = System.currentTimeMillis();

} else {
request =
StreamingRecognizeRequest.newBuilder()
.setAudioContent(ByteString.copyFrom(sharedQueue.take()))
.build();
}

clientStream.send(request);
}
} catch (Exception e) {
System.out.println(e);
}
responseObserver.onComplete();

clientStream.closeSend();
nirupa-kumar marked this conversation as resolved.
Show resolved Hide resolved
}
}
// [END speech_transcribe_infinite_streaming]
}