From 70d1b1bef1030cc7f77ec841942bad84efc3eaec Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Fri, 18 Oct 2019 09:11:06 -0700 Subject: [PATCH] fix: update retry configs, adds generated samples (#26) --- .../speech/v1/SpeechTranscribeAsync.java | 118 ++++++++++++++++ .../speech/v1/SpeechTranscribeAsyncGcs.java | 108 +++++++++++++++ ...eechTranscribeAsyncWordTimeOffsetsGcs.java | 116 ++++++++++++++++ .../v1/SpeechTranscribeEnhancedModel.java | 112 +++++++++++++++ .../v1/SpeechTranscribeModelSelection.java | 105 +++++++++++++++ .../v1/SpeechTranscribeModelSelectionGcs.java | 95 +++++++++++++ .../v1/SpeechTranscribeMultichannel.java | 113 ++++++++++++++++ .../v1/SpeechTranscribeMultichannelGcs.java | 103 ++++++++++++++ .../speech/v1/SpeechTranscribeSync.java | 110 +++++++++++++++ .../speech/v1/SpeechTranscribeSyncGcs.java | 100 ++++++++++++++ .../speech.java.20191017.083206.manifest.yaml | 60 +++++++++ .../v1p1beta1/SpeechAdaptationBeta.java | 123 +++++++++++++++++ .../v1p1beta1/SpeechContextsClassesBeta.java | 116 ++++++++++++++++ .../v1p1beta1/SpeechQuickstartBeta.java | 100 ++++++++++++++ .../SpeechTranscribeAutoPunctuationBeta.java | 107 +++++++++++++++ .../SpeechTranscribeDiarizationBeta.java | 127 ++++++++++++++++++ .../SpeechTranscribeMultilanguageBeta.java | 116 ++++++++++++++++ ...eechTranscribeRecognitionMetadataBeta.java | 123 +++++++++++++++++ ...eechTranscribeWordLevelConfidenceBeta.java | 113 ++++++++++++++++ .../speech.java.20191017.083221.manifest.yaml | 50 +++++++ 20 files changed, 2115 insertions(+) create mode 100644 speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeAsync.java create mode 100644 speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeAsyncGcs.java create mode 100644 speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeAsyncWordTimeOffsetsGcs.java create mode 100644 speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeEnhancedModel.java create mode 100644 speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeModelSelection.java create mode 100644 speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeModelSelectionGcs.java create mode 100644 speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeMultichannel.java create mode 100644 speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeMultichannelGcs.java create mode 100644 speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeSync.java create mode 100644 speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeSyncGcs.java create mode 100644 speech/src/main/java/com/google/cloud/examples/speech/v1/speech.java.20191017.083206.manifest.yaml create mode 100644 speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechAdaptationBeta.java create mode 100644 speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechContextsClassesBeta.java create mode 100644 speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechQuickstartBeta.java create mode 100644 speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechTranscribeAutoPunctuationBeta.java create mode 100644 speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechTranscribeDiarizationBeta.java create mode 100644 speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechTranscribeMultilanguageBeta.java create mode 100644 speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechTranscribeRecognitionMetadataBeta.java create mode 100644 speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechTranscribeWordLevelConfidenceBeta.java create mode 100644 speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/speech.java.20191017.083221.manifest.yaml diff --git a/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeAsync.java b/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeAsync.java new file mode 100644 index 00000000000..fde397cb6db --- /dev/null +++ b/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeAsync.java @@ -0,0 +1,118 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// DO NOT EDIT! This is a generated sample ("LongRunningRequestAsync", "speech_transcribe_async") +// sample-metadata: +// title: Transcribe Audio File using Long Running Operation (Local File) (LRO) +// description: Transcribe a long audio file using asynchronous speech recognition +// usage: gradle run -PmainClass=com.google.cloud.examples.speech.v1.SpeechTranscribeAsync [--args='[--local_file_path "resources/brooklyn_bridge.raw"]'] + +package com.google.cloud.examples.speech.v1; + +import com.google.api.gax.longrunning.OperationFuture; +import com.google.cloud.speech.v1.LongRunningRecognizeMetadata; +import com.google.cloud.speech.v1.LongRunningRecognizeRequest; +import com.google.cloud.speech.v1.LongRunningRecognizeResponse; +import com.google.cloud.speech.v1.RecognitionAudio; +import com.google.cloud.speech.v1.RecognitionConfig; +import com.google.cloud.speech.v1.SpeechClient; +import com.google.cloud.speech.v1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1.SpeechRecognitionResult; +import com.google.protobuf.ByteString; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +public class SpeechTranscribeAsync { + // [START speech_transcribe_async] + /* + * Please include the following imports to run this sample. + * + * import com.google.api.gax.longrunning.OperationFuture; + * import com.google.cloud.speech.v1.LongRunningRecognizeMetadata; + * import com.google.cloud.speech.v1.LongRunningRecognizeRequest; + * import com.google.cloud.speech.v1.LongRunningRecognizeResponse; + * import com.google.cloud.speech.v1.RecognitionAudio; + * import com.google.cloud.speech.v1.RecognitionConfig; + * import com.google.cloud.speech.v1.SpeechClient; + * import com.google.cloud.speech.v1.SpeechRecognitionAlternative; + * import com.google.cloud.speech.v1.SpeechRecognitionResult; + * import com.google.protobuf.ByteString; + * import java.nio.file.Files; + * import java.nio.file.Path; + * import java.nio.file.Paths; + */ + + /** + * Transcribe a long audio file using asynchronous speech recognition + * + * @param localFilePath Path to local audio file, e.g. /path/audio.wav + */ + public static void sampleLongRunningRecognize(String localFilePath) { + try (SpeechClient speechClient = SpeechClient.create()) { + // localFilePath = "resources/brooklyn_bridge.raw"; + + // The language of the supplied audio + String languageCode = "en-US"; + + // Sample rate in Hertz of the audio data sent + int sampleRateHertz = 16000; + + // Encoding of audio data sent. This sample sets this explicitly. + // This field is optional for FLAC and WAV audio formats. + RecognitionConfig.AudioEncoding encoding = RecognitionConfig.AudioEncoding.LINEAR16; + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setLanguageCode(languageCode) + .setSampleRateHertz(sampleRateHertz) + .setEncoding(encoding) + .build(); + Path path = Paths.get(localFilePath); + byte[] data = Files.readAllBytes(path); + ByteString content = ByteString.copyFrom(data); + RecognitionAudio audio = RecognitionAudio.newBuilder().setContent(content).build(); + LongRunningRecognizeRequest request = + LongRunningRecognizeRequest.newBuilder().setConfig(config).setAudio(audio).build(); + OperationFuture future = + speechClient.longRunningRecognizeAsync(request); + + System.out.println("Waiting for operation to complete..."); + LongRunningRecognizeResponse response = future.get(); + for (SpeechRecognitionResult result : response.getResultsList()) { + // First alternative is the most probable result + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript: %s\n", alternative.getTranscript()); + } + } catch (Exception exception) { + System.err.println("Failed to create the client due to: " + exception); + } + } + // [END speech_transcribe_async] + + public static void main(String[] args) throws Exception { + Options options = new Options(); + options.addOption( + Option.builder("").required(false).hasArg(true).longOpt("local_file_path").build()); + + CommandLine cl = (new DefaultParser()).parse(options, args); + String localFilePath = cl.getOptionValue("local_file_path", "resources/brooklyn_bridge.raw"); + + sampleLongRunningRecognize(localFilePath); + } +} diff --git a/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeAsyncGcs.java b/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeAsyncGcs.java new file mode 100644 index 00000000000..ab96b670f79 --- /dev/null +++ b/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeAsyncGcs.java @@ -0,0 +1,108 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// DO NOT EDIT! This is a generated sample ("LongRunningRequestAsync", "speech_transcribe_async_gcs") +// sample-metadata: +// title: Transcript Audio File using Long Running Operation (Cloud Storage) (LRO) +// description: Transcribe long audio file from Cloud Storage using asynchronous speech recognition +// usage: gradle run -PmainClass=com.google.cloud.examples.speech.v1.SpeechTranscribeAsyncGcs [--args='[--storage_uri "gs://cloud-samples-data/speech/brooklyn_bridge.raw"]'] + +package com.google.cloud.examples.speech.v1; + +import com.google.api.gax.longrunning.OperationFuture; +import com.google.cloud.speech.v1.LongRunningRecognizeMetadata; +import com.google.cloud.speech.v1.LongRunningRecognizeRequest; +import com.google.cloud.speech.v1.LongRunningRecognizeResponse; +import com.google.cloud.speech.v1.RecognitionAudio; +import com.google.cloud.speech.v1.RecognitionConfig; +import com.google.cloud.speech.v1.SpeechClient; +import com.google.cloud.speech.v1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1.SpeechRecognitionResult; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +public class SpeechTranscribeAsyncGcs { + // [START speech_transcribe_async_gcs] + /* + * Please include the following imports to run this sample. + * + * import com.google.api.gax.longrunning.OperationFuture; + * import com.google.cloud.speech.v1.LongRunningRecognizeMetadata; + * import com.google.cloud.speech.v1.LongRunningRecognizeRequest; + * import com.google.cloud.speech.v1.LongRunningRecognizeResponse; + * import com.google.cloud.speech.v1.RecognitionAudio; + * import com.google.cloud.speech.v1.RecognitionConfig; + * import com.google.cloud.speech.v1.SpeechClient; + * import com.google.cloud.speech.v1.SpeechRecognitionAlternative; + * import com.google.cloud.speech.v1.SpeechRecognitionResult; + */ + + /** + * Transcribe long audio file from Cloud Storage using asynchronous speech recognition + * + * @param storageUri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] + */ + public static void sampleLongRunningRecognize(String storageUri) { + try (SpeechClient speechClient = SpeechClient.create()) { + // storageUri = "gs://cloud-samples-data/speech/brooklyn_bridge.raw"; + + // Sample rate in Hertz of the audio data sent + int sampleRateHertz = 16000; + + // The language of the supplied audio + String languageCode = "en-US"; + + // Encoding of audio data sent. This sample sets this explicitly. + // This field is optional for FLAC and WAV audio formats. + RecognitionConfig.AudioEncoding encoding = RecognitionConfig.AudioEncoding.LINEAR16; + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setSampleRateHertz(sampleRateHertz) + .setLanguageCode(languageCode) + .setEncoding(encoding) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(storageUri).build(); + LongRunningRecognizeRequest request = + LongRunningRecognizeRequest.newBuilder().setConfig(config).setAudio(audio).build(); + OperationFuture future = + speechClient.longRunningRecognizeAsync(request); + + System.out.println("Waiting for operation to complete..."); + LongRunningRecognizeResponse response = future.get(); + for (SpeechRecognitionResult result : response.getResultsList()) { + // First alternative is the most probable result + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript: %s\n", alternative.getTranscript()); + } + } catch (Exception exception) { + System.err.println("Failed to create the client due to: " + exception); + } + } + // [END speech_transcribe_async_gcs] + + public static void main(String[] args) throws Exception { + Options options = new Options(); + options.addOption( + Option.builder("").required(false).hasArg(true).longOpt("storage_uri").build()); + + CommandLine cl = (new DefaultParser()).parse(options, args); + String storageUri = + cl.getOptionValue("storage_uri", "gs://cloud-samples-data/speech/brooklyn_bridge.raw"); + + sampleLongRunningRecognize(storageUri); + } +} diff --git a/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeAsyncWordTimeOffsetsGcs.java b/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeAsyncWordTimeOffsetsGcs.java new file mode 100644 index 00000000000..674277938fd --- /dev/null +++ b/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeAsyncWordTimeOffsetsGcs.java @@ -0,0 +1,116 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// DO NOT EDIT! This is a generated sample ("LongRunningRequestAsync", "speech_transcribe_async_word_time_offsets_gcs") +// sample-metadata: +// title: Getting word timestamps (Cloud Storage) (LRO) +// description: Print start and end time of each word spoken in audio file from Cloud Storage +// usage: gradle run -PmainClass=com.google.cloud.examples.speech.v1.SpeechTranscribeAsyncWordTimeOffsetsGcs [--args='[--storage_uri "gs://cloud-samples-data/speech/brooklyn_bridge.flac"]'] + +package com.google.cloud.examples.speech.v1; + +import com.google.api.gax.longrunning.OperationFuture; +import com.google.cloud.speech.v1.LongRunningRecognizeMetadata; +import com.google.cloud.speech.v1.LongRunningRecognizeRequest; +import com.google.cloud.speech.v1.LongRunningRecognizeResponse; +import com.google.cloud.speech.v1.RecognitionAudio; +import com.google.cloud.speech.v1.RecognitionConfig; +import com.google.cloud.speech.v1.SpeechClient; +import com.google.cloud.speech.v1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1.SpeechRecognitionResult; +import com.google.cloud.speech.v1.WordInfo; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +public class SpeechTranscribeAsyncWordTimeOffsetsGcs { + // [START speech_transcribe_async_word_time_offsets_gcs] + /* + * Please include the following imports to run this sample. + * + * import com.google.api.gax.longrunning.OperationFuture; + * import com.google.cloud.speech.v1.LongRunningRecognizeMetadata; + * import com.google.cloud.speech.v1.LongRunningRecognizeRequest; + * import com.google.cloud.speech.v1.LongRunningRecognizeResponse; + * import com.google.cloud.speech.v1.RecognitionAudio; + * import com.google.cloud.speech.v1.RecognitionConfig; + * import com.google.cloud.speech.v1.SpeechClient; + * import com.google.cloud.speech.v1.SpeechRecognitionAlternative; + * import com.google.cloud.speech.v1.SpeechRecognitionResult; + * import com.google.cloud.speech.v1.WordInfo; + */ + + /** + * Print start and end time of each word spoken in audio file from Cloud Storage + * + * @param storageUri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] + */ + public static void sampleLongRunningRecognize(String storageUri) { + try (SpeechClient speechClient = SpeechClient.create()) { + // storageUri = "gs://cloud-samples-data/speech/brooklyn_bridge.flac"; + + // When enabled, the first result returned by the API will include a list + // of words and the start and end time offsets (timestamps) for those words. + boolean enableWordTimeOffsets = true; + + // The language of the supplied audio + String languageCode = "en-US"; + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEnableWordTimeOffsets(enableWordTimeOffsets) + .setLanguageCode(languageCode) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(storageUri).build(); + LongRunningRecognizeRequest request = + LongRunningRecognizeRequest.newBuilder().setConfig(config).setAudio(audio).build(); + OperationFuture future = + speechClient.longRunningRecognizeAsync(request); + + System.out.println("Waiting for operation to complete..."); + LongRunningRecognizeResponse response = future.get(); + // The first result includes start and end time word offsets + SpeechRecognitionResult result = response.getResultsList().get(0); + // First alternative is the most probable result + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript: %s\n", alternative.getTranscript()); + // Print the start and end time of each word + for (WordInfo word : alternative.getWordsList()) { + System.out.printf("Word: %s\n", word.getWord()); + System.out.printf( + "Start time: %s seconds %s nanos\n", + word.getStartTime().getSeconds(), word.getStartTime().getNanos()); + System.out.printf( + "End time: %s seconds %s nanos\n", + word.getEndTime().getSeconds(), word.getEndTime().getNanos()); + } + } catch (Exception exception) { + System.err.println("Failed to create the client due to: " + exception); + } + } + // [END speech_transcribe_async_word_time_offsets_gcs] + + public static void main(String[] args) throws Exception { + Options options = new Options(); + options.addOption( + Option.builder("").required(false).hasArg(true).longOpt("storage_uri").build()); + + CommandLine cl = (new DefaultParser()).parse(options, args); + String storageUri = + cl.getOptionValue("storage_uri", "gs://cloud-samples-data/speech/brooklyn_bridge.flac"); + + sampleLongRunningRecognize(storageUri); + } +} diff --git a/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeEnhancedModel.java b/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeEnhancedModel.java new file mode 100644 index 00000000000..8fa8eabb16d --- /dev/null +++ b/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeEnhancedModel.java @@ -0,0 +1,112 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_enhanced_model") +// sample-metadata: +// title: Using Enhanced Models (Local File) +// description: Transcribe a short audio file using an enhanced model +// usage: gradle run -PmainClass=com.google.cloud.examples.speech.v1.SpeechTranscribeEnhancedModel [--args='[--local_file_path "resources/hello.wav"]'] + +package com.google.cloud.examples.speech.v1; + +import com.google.cloud.speech.v1.RecognitionAudio; +import com.google.cloud.speech.v1.RecognitionConfig; +import com.google.cloud.speech.v1.RecognizeRequest; +import com.google.cloud.speech.v1.RecognizeResponse; +import com.google.cloud.speech.v1.SpeechClient; +import com.google.cloud.speech.v1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1.SpeechRecognitionResult; +import com.google.protobuf.ByteString; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +public class SpeechTranscribeEnhancedModel { + // [START speech_transcribe_enhanced_model] + /* + * Please include the following imports to run this sample. + * + * import com.google.cloud.speech.v1.RecognitionAudio; + * import com.google.cloud.speech.v1.RecognitionConfig; + * import com.google.cloud.speech.v1.RecognizeRequest; + * import com.google.cloud.speech.v1.RecognizeResponse; + * import com.google.cloud.speech.v1.SpeechClient; + * import com.google.cloud.speech.v1.SpeechRecognitionAlternative; + * import com.google.cloud.speech.v1.SpeechRecognitionResult; + * import com.google.protobuf.ByteString; + * import java.nio.file.Files; + * import java.nio.file.Path; + * import java.nio.file.Paths; + */ + + /** + * Transcribe a short audio file using an enhanced model + * + * @param localFilePath Path to local audio file, e.g. /path/audio.wav + */ + public static void sampleRecognize(String localFilePath) { + try (SpeechClient speechClient = SpeechClient.create()) { + // localFilePath = "resources/hello.wav"; + + // The enhanced model to use, e.g. phone_call + // Currently phone_call is the only model available as an enhanced model. + String model = "phone_call"; + + // Use an enhanced model for speech recognition (when set to true). + // Project must be eligible for requesting enhanced models. + // Enhanced speech models require that you opt-in to data logging. + boolean useEnhanced = true; + + // The language of the supplied audio + String languageCode = "en-US"; + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setModel(model) + .setUseEnhanced(useEnhanced) + .setLanguageCode(languageCode) + .build(); + Path path = Paths.get(localFilePath); + byte[] data = Files.readAllBytes(path); + ByteString content = ByteString.copyFrom(data); + RecognitionAudio audio = RecognitionAudio.newBuilder().setContent(content).build(); + RecognizeRequest request = + RecognizeRequest.newBuilder().setConfig(config).setAudio(audio).build(); + RecognizeResponse response = speechClient.recognize(request); + for (SpeechRecognitionResult result : response.getResultsList()) { + // First alternative is the most probable result + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript: %s\n", alternative.getTranscript()); + } + } catch (Exception exception) { + System.err.println("Failed to create the client due to: " + exception); + } + } + // [END speech_transcribe_enhanced_model] + + public static void main(String[] args) throws Exception { + Options options = new Options(); + options.addOption( + Option.builder("").required(false).hasArg(true).longOpt("local_file_path").build()); + + CommandLine cl = (new DefaultParser()).parse(options, args); + String localFilePath = cl.getOptionValue("local_file_path", "resources/hello.wav"); + + sampleRecognize(localFilePath); + } +} diff --git a/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeModelSelection.java b/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeModelSelection.java new file mode 100644 index 00000000000..5994ae7c916 --- /dev/null +++ b/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeModelSelection.java @@ -0,0 +1,105 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_model_selection") +// sample-metadata: +// title: Selecting a Transcription Model (Local File) +// description: Transcribe a short audio file using a specified transcription model +// usage: gradle run -PmainClass=com.google.cloud.examples.speech.v1.SpeechTranscribeModelSelection [--args='[--local_file_path "resources/hello.wav"] [--model "phone_call"]'] + +package com.google.cloud.examples.speech.v1; + +import com.google.cloud.speech.v1.RecognitionAudio; +import com.google.cloud.speech.v1.RecognitionConfig; +import com.google.cloud.speech.v1.RecognizeRequest; +import com.google.cloud.speech.v1.RecognizeResponse; +import com.google.cloud.speech.v1.SpeechClient; +import com.google.cloud.speech.v1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1.SpeechRecognitionResult; +import com.google.protobuf.ByteString; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +public class SpeechTranscribeModelSelection { + // [START speech_transcribe_model_selection] + /* + * Please include the following imports to run this sample. + * + * import com.google.cloud.speech.v1.RecognitionAudio; + * import com.google.cloud.speech.v1.RecognitionConfig; + * import com.google.cloud.speech.v1.RecognizeRequest; + * import com.google.cloud.speech.v1.RecognizeResponse; + * import com.google.cloud.speech.v1.SpeechClient; + * import com.google.cloud.speech.v1.SpeechRecognitionAlternative; + * import com.google.cloud.speech.v1.SpeechRecognitionResult; + * import com.google.protobuf.ByteString; + * import java.nio.file.Files; + * import java.nio.file.Path; + * import java.nio.file.Paths; + */ + + /** + * Transcribe a short audio file using a specified transcription model + * + * @param localFilePath Path to local audio file, e.g. /path/audio.wav + * @param model The transcription model to use, e.g. video, phone_call, default For a list of + * available transcription models, see: + * https://cloud.google.com/speech-to-text/docs/transcription-model#transcription_models + */ + public static void sampleRecognize(String localFilePath, String model) { + try (SpeechClient speechClient = SpeechClient.create()) { + // localFilePath = "resources/hello.wav"; + // model = "phone_call"; + + // The language of the supplied audio + String languageCode = "en-US"; + RecognitionConfig config = + RecognitionConfig.newBuilder().setModel(model).setLanguageCode(languageCode).build(); + Path path = Paths.get(localFilePath); + byte[] data = Files.readAllBytes(path); + ByteString content = ByteString.copyFrom(data); + RecognitionAudio audio = RecognitionAudio.newBuilder().setContent(content).build(); + RecognizeRequest request = + RecognizeRequest.newBuilder().setConfig(config).setAudio(audio).build(); + RecognizeResponse response = speechClient.recognize(request); + for (SpeechRecognitionResult result : response.getResultsList()) { + // First alternative is the most probable result + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript: %s\n", alternative.getTranscript()); + } + } catch (Exception exception) { + System.err.println("Failed to create the client due to: " + exception); + } + } + // [END speech_transcribe_model_selection] + + public static void main(String[] args) throws Exception { + Options options = new Options(); + options.addOption( + Option.builder("").required(false).hasArg(true).longOpt("local_file_path").build()); + options.addOption(Option.builder("").required(false).hasArg(true).longOpt("model").build()); + + CommandLine cl = (new DefaultParser()).parse(options, args); + String localFilePath = cl.getOptionValue("local_file_path", "resources/hello.wav"); + String model = cl.getOptionValue("model", "phone_call"); + + sampleRecognize(localFilePath, model); + } +} diff --git a/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeModelSelectionGcs.java b/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeModelSelectionGcs.java new file mode 100644 index 00000000000..99d17c94455 --- /dev/null +++ b/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeModelSelectionGcs.java @@ -0,0 +1,95 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_model_selection_gcs") +// sample-metadata: +// title: Selecting a Transcription Model (Cloud Storage) +// description: Transcribe a short audio file from Cloud Storage using a specified transcription model +// usage: gradle run -PmainClass=com.google.cloud.examples.speech.v1.SpeechTranscribeModelSelectionGcs [--args='[--storage_uri "gs://cloud-samples-data/speech/hello.wav"] [--model "phone_call"]'] + +package com.google.cloud.examples.speech.v1; + +import com.google.cloud.speech.v1.RecognitionAudio; +import com.google.cloud.speech.v1.RecognitionConfig; +import com.google.cloud.speech.v1.RecognizeRequest; +import com.google.cloud.speech.v1.RecognizeResponse; +import com.google.cloud.speech.v1.SpeechClient; +import com.google.cloud.speech.v1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1.SpeechRecognitionResult; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +public class SpeechTranscribeModelSelectionGcs { + // [START speech_transcribe_model_selection_gcs] + /* + * Please include the following imports to run this sample. + * + * import com.google.cloud.speech.v1.RecognitionAudio; + * import com.google.cloud.speech.v1.RecognitionConfig; + * import com.google.cloud.speech.v1.RecognizeRequest; + * import com.google.cloud.speech.v1.RecognizeResponse; + * import com.google.cloud.speech.v1.SpeechClient; + * import com.google.cloud.speech.v1.SpeechRecognitionAlternative; + * import com.google.cloud.speech.v1.SpeechRecognitionResult; + */ + + /** + * Transcribe a short audio file from Cloud Storage using a specified transcription model + * + * @param storageUri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] + * @param model The transcription model to use, e.g. video, phone_call, default For a list of + * available transcription models, see: + * https://cloud.google.com/speech-to-text/docs/transcription-model#transcription_models + */ + public static void sampleRecognize(String storageUri, String model) { + try (SpeechClient speechClient = SpeechClient.create()) { + // storageUri = "gs://cloud-samples-data/speech/hello.wav"; + // model = "phone_call"; + + // The language of the supplied audio + String languageCode = "en-US"; + RecognitionConfig config = + RecognitionConfig.newBuilder().setModel(model).setLanguageCode(languageCode).build(); + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(storageUri).build(); + RecognizeRequest request = + RecognizeRequest.newBuilder().setConfig(config).setAudio(audio).build(); + RecognizeResponse response = speechClient.recognize(request); + for (SpeechRecognitionResult result : response.getResultsList()) { + // First alternative is the most probable result + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript: %s\n", alternative.getTranscript()); + } + } catch (Exception exception) { + System.err.println("Failed to create the client due to: " + exception); + } + } + // [END speech_transcribe_model_selection_gcs] + + public static void main(String[] args) throws Exception { + Options options = new Options(); + options.addOption( + Option.builder("").required(false).hasArg(true).longOpt("storage_uri").build()); + options.addOption(Option.builder("").required(false).hasArg(true).longOpt("model").build()); + + CommandLine cl = (new DefaultParser()).parse(options, args); + String storageUri = + cl.getOptionValue("storage_uri", "gs://cloud-samples-data/speech/hello.wav"); + String model = cl.getOptionValue("model", "phone_call"); + + sampleRecognize(storageUri, model); + } +} diff --git a/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeMultichannel.java b/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeMultichannel.java new file mode 100644 index 00000000000..c03e12d4735 --- /dev/null +++ b/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeMultichannel.java @@ -0,0 +1,113 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_multichannel") +// sample-metadata: +// title: Multi-Channel Audio Transcription (Local File) +// description: Transcribe a short audio file with multiple channels +// usage: gradle run -PmainClass=com.google.cloud.examples.speech.v1.SpeechTranscribeMultichannel [--args='[--local_file_path "resources/multi.wav"]'] + +package com.google.cloud.examples.speech.v1; + +import com.google.cloud.speech.v1.RecognitionAudio; +import com.google.cloud.speech.v1.RecognitionConfig; +import com.google.cloud.speech.v1.RecognizeRequest; +import com.google.cloud.speech.v1.RecognizeResponse; +import com.google.cloud.speech.v1.SpeechClient; +import com.google.cloud.speech.v1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1.SpeechRecognitionResult; +import com.google.protobuf.ByteString; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +public class SpeechTranscribeMultichannel { + // [START speech_transcribe_multichannel] + /* + * Please include the following imports to run this sample. + * + * import com.google.cloud.speech.v1.RecognitionAudio; + * import com.google.cloud.speech.v1.RecognitionConfig; + * import com.google.cloud.speech.v1.RecognizeRequest; + * import com.google.cloud.speech.v1.RecognizeResponse; + * import com.google.cloud.speech.v1.SpeechClient; + * import com.google.cloud.speech.v1.SpeechRecognitionAlternative; + * import com.google.cloud.speech.v1.SpeechRecognitionResult; + * import com.google.protobuf.ByteString; + * import java.nio.file.Files; + * import java.nio.file.Path; + * import java.nio.file.Paths; + */ + + /** + * Transcribe a short audio file with multiple channels + * + * @param localFilePath Path to local audio file, e.g. /path/audio.wav + */ + public static void sampleRecognize(String localFilePath) { + try (SpeechClient speechClient = SpeechClient.create()) { + // localFilePath = "resources/multi.wav"; + + // The number of channels in the input audio file (optional) + int audioChannelCount = 2; + + // When set to true, each audio channel will be recognized separately. + // The recognition result will contain a channel_tag field to state which + // channel that result belongs to + boolean enableSeparateRecognitionPerChannel = true; + + // The language of the supplied audio + String languageCode = "en-US"; + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setAudioChannelCount(audioChannelCount) + .setEnableSeparateRecognitionPerChannel(enableSeparateRecognitionPerChannel) + .setLanguageCode(languageCode) + .build(); + Path path = Paths.get(localFilePath); + byte[] data = Files.readAllBytes(path); + ByteString content = ByteString.copyFrom(data); + RecognitionAudio audio = RecognitionAudio.newBuilder().setContent(content).build(); + RecognizeRequest request = + RecognizeRequest.newBuilder().setConfig(config).setAudio(audio).build(); + RecognizeResponse response = speechClient.recognize(request); + for (SpeechRecognitionResult result : response.getResultsList()) { + // channelTag to recognize which audio channel this result is for + System.out.printf("Channel tag: %s\n", result.getChannelTag()); + // First alternative is the most probable result + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript: %s\n", alternative.getTranscript()); + } + } catch (Exception exception) { + System.err.println("Failed to create the client due to: " + exception); + } + } + // [END speech_transcribe_multichannel] + + public static void main(String[] args) throws Exception { + Options options = new Options(); + options.addOption( + Option.builder("").required(false).hasArg(true).longOpt("local_file_path").build()); + + CommandLine cl = (new DefaultParser()).parse(options, args); + String localFilePath = cl.getOptionValue("local_file_path", "resources/multi.wav"); + + sampleRecognize(localFilePath); + } +} diff --git a/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeMultichannelGcs.java b/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeMultichannelGcs.java new file mode 100644 index 00000000000..a3fca349733 --- /dev/null +++ b/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeMultichannelGcs.java @@ -0,0 +1,103 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_multichannel_gcs") +// sample-metadata: +// title: Multi-Channel Audio Transcription (Cloud Storage) +// description: Transcribe a short audio file from Cloud Storage with multiple channels +// usage: gradle run -PmainClass=com.google.cloud.examples.speech.v1.SpeechTranscribeMultichannelGcs [--args='[--storage_uri "gs://cloud-samples-data/speech/multi.wav"]'] + +package com.google.cloud.examples.speech.v1; + +import com.google.cloud.speech.v1.RecognitionAudio; +import com.google.cloud.speech.v1.RecognitionConfig; +import com.google.cloud.speech.v1.RecognizeRequest; +import com.google.cloud.speech.v1.RecognizeResponse; +import com.google.cloud.speech.v1.SpeechClient; +import com.google.cloud.speech.v1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1.SpeechRecognitionResult; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +public class SpeechTranscribeMultichannelGcs { + // [START speech_transcribe_multichannel_gcs] + /* + * Please include the following imports to run this sample. + * + * import com.google.cloud.speech.v1.RecognitionAudio; + * import com.google.cloud.speech.v1.RecognitionConfig; + * import com.google.cloud.speech.v1.RecognizeRequest; + * import com.google.cloud.speech.v1.RecognizeResponse; + * import com.google.cloud.speech.v1.SpeechClient; + * import com.google.cloud.speech.v1.SpeechRecognitionAlternative; + * import com.google.cloud.speech.v1.SpeechRecognitionResult; + */ + + /** + * Transcribe a short audio file from Cloud Storage with multiple channels + * + * @param storageUri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] + */ + public static void sampleRecognize(String storageUri) { + try (SpeechClient speechClient = SpeechClient.create()) { + // storageUri = "gs://cloud-samples-data/speech/multi.wav"; + + // The number of channels in the input audio file (optional) + int audioChannelCount = 2; + + // When set to true, each audio channel will be recognized separately. + // The recognition result will contain a channel_tag field to state which + // channel that result belongs to + boolean enableSeparateRecognitionPerChannel = true; + + // The language of the supplied audio + String languageCode = "en-US"; + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setAudioChannelCount(audioChannelCount) + .setEnableSeparateRecognitionPerChannel(enableSeparateRecognitionPerChannel) + .setLanguageCode(languageCode) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(storageUri).build(); + RecognizeRequest request = + RecognizeRequest.newBuilder().setConfig(config).setAudio(audio).build(); + RecognizeResponse response = speechClient.recognize(request); + for (SpeechRecognitionResult result : response.getResultsList()) { + // channelTag to recognize which audio channel this result is for + System.out.printf("Channel tag: %s\n", result.getChannelTag()); + // First alternative is the most probable result + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript: %s\n", alternative.getTranscript()); + } + } catch (Exception exception) { + System.err.println("Failed to create the client due to: " + exception); + } + } + // [END speech_transcribe_multichannel_gcs] + + public static void main(String[] args) throws Exception { + Options options = new Options(); + options.addOption( + Option.builder("").required(false).hasArg(true).longOpt("storage_uri").build()); + + CommandLine cl = (new DefaultParser()).parse(options, args); + String storageUri = + cl.getOptionValue("storage_uri", "gs://cloud-samples-data/speech/multi.wav"); + + sampleRecognize(storageUri); + } +} diff --git a/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeSync.java b/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeSync.java new file mode 100644 index 00000000000..8ee11816ced --- /dev/null +++ b/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeSync.java @@ -0,0 +1,110 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_sync") +// sample-metadata: +// title: Transcribe Audio File (Local File) +// description: Transcribe a short audio file using synchronous speech recognition +// usage: gradle run -PmainClass=com.google.cloud.examples.speech.v1.SpeechTranscribeSync [--args='[--local_file_path "resources/brooklyn_bridge.raw"]'] + +package com.google.cloud.examples.speech.v1; + +import com.google.cloud.speech.v1.RecognitionAudio; +import com.google.cloud.speech.v1.RecognitionConfig; +import com.google.cloud.speech.v1.RecognizeRequest; +import com.google.cloud.speech.v1.RecognizeResponse; +import com.google.cloud.speech.v1.SpeechClient; +import com.google.cloud.speech.v1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1.SpeechRecognitionResult; +import com.google.protobuf.ByteString; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +public class SpeechTranscribeSync { + // [START speech_transcribe_sync] + /* + * Please include the following imports to run this sample. + * + * import com.google.cloud.speech.v1.RecognitionAudio; + * import com.google.cloud.speech.v1.RecognitionConfig; + * import com.google.cloud.speech.v1.RecognizeRequest; + * import com.google.cloud.speech.v1.RecognizeResponse; + * import com.google.cloud.speech.v1.SpeechClient; + * import com.google.cloud.speech.v1.SpeechRecognitionAlternative; + * import com.google.cloud.speech.v1.SpeechRecognitionResult; + * import com.google.protobuf.ByteString; + * import java.nio.file.Files; + * import java.nio.file.Path; + * import java.nio.file.Paths; + */ + + /** + * Transcribe a short audio file using synchronous speech recognition + * + * @param localFilePath Path to local audio file, e.g. /path/audio.wav + */ + public static void sampleRecognize(String localFilePath) { + try (SpeechClient speechClient = SpeechClient.create()) { + // localFilePath = "resources/brooklyn_bridge.raw"; + + // The language of the supplied audio + String languageCode = "en-US"; + + // Sample rate in Hertz of the audio data sent + int sampleRateHertz = 16000; + + // Encoding of audio data sent. This sample sets this explicitly. + // This field is optional for FLAC and WAV audio formats. + RecognitionConfig.AudioEncoding encoding = RecognitionConfig.AudioEncoding.LINEAR16; + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setLanguageCode(languageCode) + .setSampleRateHertz(sampleRateHertz) + .setEncoding(encoding) + .build(); + Path path = Paths.get(localFilePath); + byte[] data = Files.readAllBytes(path); + ByteString content = ByteString.copyFrom(data); + RecognitionAudio audio = RecognitionAudio.newBuilder().setContent(content).build(); + RecognizeRequest request = + RecognizeRequest.newBuilder().setConfig(config).setAudio(audio).build(); + RecognizeResponse response = speechClient.recognize(request); + for (SpeechRecognitionResult result : response.getResultsList()) { + // First alternative is the most probable result + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript: %s\n", alternative.getTranscript()); + } + } catch (Exception exception) { + System.err.println("Failed to create the client due to: " + exception); + } + } + // [END speech_transcribe_sync] + + public static void main(String[] args) throws Exception { + Options options = new Options(); + options.addOption( + Option.builder("").required(false).hasArg(true).longOpt("local_file_path").build()); + + CommandLine cl = (new DefaultParser()).parse(options, args); + String localFilePath = cl.getOptionValue("local_file_path", "resources/brooklyn_bridge.raw"); + + sampleRecognize(localFilePath); + } +} diff --git a/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeSyncGcs.java b/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeSyncGcs.java new file mode 100644 index 00000000000..de8474241b8 --- /dev/null +++ b/speech/src/main/java/com/google/cloud/examples/speech/v1/SpeechTranscribeSyncGcs.java @@ -0,0 +1,100 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_sync_gcs") +// sample-metadata: +// title: Transcript Audio File (Cloud Storage) +// description: Transcribe short audio file from Cloud Storage using synchronous speech recognition +// usage: gradle run -PmainClass=com.google.cloud.examples.speech.v1.SpeechTranscribeSyncGcs [--args='[--storage_uri "gs://cloud-samples-data/speech/brooklyn_bridge.raw"]'] + +package com.google.cloud.examples.speech.v1; + +import com.google.cloud.speech.v1.RecognitionAudio; +import com.google.cloud.speech.v1.RecognitionConfig; +import com.google.cloud.speech.v1.RecognizeRequest; +import com.google.cloud.speech.v1.RecognizeResponse; +import com.google.cloud.speech.v1.SpeechClient; +import com.google.cloud.speech.v1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1.SpeechRecognitionResult; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +public class SpeechTranscribeSyncGcs { + // [START speech_transcribe_sync_gcs] + /* + * Please include the following imports to run this sample. + * + * import com.google.cloud.speech.v1.RecognitionAudio; + * import com.google.cloud.speech.v1.RecognitionConfig; + * import com.google.cloud.speech.v1.RecognizeRequest; + * import com.google.cloud.speech.v1.RecognizeResponse; + * import com.google.cloud.speech.v1.SpeechClient; + * import com.google.cloud.speech.v1.SpeechRecognitionAlternative; + * import com.google.cloud.speech.v1.SpeechRecognitionResult; + */ + + /** + * Transcribe short audio file from Cloud Storage using synchronous speech recognition + * + * @param storageUri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] + */ + public static void sampleRecognize(String storageUri) { + try (SpeechClient speechClient = SpeechClient.create()) { + // storageUri = "gs://cloud-samples-data/speech/brooklyn_bridge.raw"; + + // Sample rate in Hertz of the audio data sent + int sampleRateHertz = 16000; + + // The language of the supplied audio + String languageCode = "en-US"; + + // Encoding of audio data sent. This sample sets this explicitly. + // This field is optional for FLAC and WAV audio formats. + RecognitionConfig.AudioEncoding encoding = RecognitionConfig.AudioEncoding.LINEAR16; + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setSampleRateHertz(sampleRateHertz) + .setLanguageCode(languageCode) + .setEncoding(encoding) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(storageUri).build(); + RecognizeRequest request = + RecognizeRequest.newBuilder().setConfig(config).setAudio(audio).build(); + RecognizeResponse response = speechClient.recognize(request); + for (SpeechRecognitionResult result : response.getResultsList()) { + // First alternative is the most probable result + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript: %s\n", alternative.getTranscript()); + } + } catch (Exception exception) { + System.err.println("Failed to create the client due to: " + exception); + } + } + // [END speech_transcribe_sync_gcs] + + public static void main(String[] args) throws Exception { + Options options = new Options(); + options.addOption( + Option.builder("").required(false).hasArg(true).longOpt("storage_uri").build()); + + CommandLine cl = (new DefaultParser()).parse(options, args); + String storageUri = + cl.getOptionValue("storage_uri", "gs://cloud-samples-data/speech/brooklyn_bridge.raw"); + + sampleRecognize(storageUri); + } +} diff --git a/speech/src/main/java/com/google/cloud/examples/speech/v1/speech.java.20191017.083206.manifest.yaml b/speech/src/main/java/com/google/cloud/examples/speech/v1/speech.java.20191017.083206.manifest.yaml new file mode 100644 index 00000000000..9a823d3810c --- /dev/null +++ b/speech/src/main/java/com/google/cloud/examples/speech/v1/speech.java.20191017.083206.manifest.yaml @@ -0,0 +1,60 @@ +--- +type: manifest/samples +schema_version: 3 +java: &java + environment: java + bin: mvn exec:java + base_path: samples/src/main/java/com/google/cloud/examples/speech/v1 + package: com.google.cloud.examples.speech.v1 + invocation: {bin} -Dexec.mainClass={class} -Dexec.args='@args' +samples: +- <<: *java + sample: "speech_transcribe_model_selection_gcs" + path: "{base_path}/SpeechTranscribeModelSelectionGcs.java" + class: {package}.SpeechTranscribeModelSelectionGcs + region_tag: "speech_transcribe_model_selection_gcs" +- <<: *java + sample: "speech_transcribe_model_selection" + path: "{base_path}/SpeechTranscribeModelSelection.java" + class: {package}.SpeechTranscribeModelSelection + region_tag: "speech_transcribe_model_selection" +- <<: *java + sample: "speech_transcribe_multichannel_gcs" + path: "{base_path}/SpeechTranscribeMultichannelGcs.java" + class: {package}.SpeechTranscribeMultichannelGcs + region_tag: "speech_transcribe_multichannel_gcs" +- <<: *java + sample: "speech_transcribe_sync_gcs" + path: "{base_path}/SpeechTranscribeSyncGcs.java" + class: {package}.SpeechTranscribeSyncGcs + region_tag: "speech_transcribe_sync_gcs" +- <<: *java + sample: "speech_transcribe_enhanced_model" + path: "{base_path}/SpeechTranscribeEnhancedModel.java" + class: {package}.SpeechTranscribeEnhancedModel + region_tag: "speech_transcribe_enhanced_model" +- <<: *java + sample: "speech_transcribe_multichannel" + path: "{base_path}/SpeechTranscribeMultichannel.java" + class: {package}.SpeechTranscribeMultichannel + region_tag: "speech_transcribe_multichannel" +- <<: *java + sample: "speech_transcribe_sync" + path: "{base_path}/SpeechTranscribeSync.java" + class: {package}.SpeechTranscribeSync + region_tag: "speech_transcribe_sync" +- <<: *java + sample: "speech_transcribe_async" + path: "{base_path}/SpeechTranscribeAsync.java" + class: {package}.SpeechTranscribeAsync + region_tag: "speech_transcribe_async" +- <<: *java + sample: "speech_transcribe_async_gcs" + path: "{base_path}/SpeechTranscribeAsyncGcs.java" + class: {package}.SpeechTranscribeAsyncGcs + region_tag: "speech_transcribe_async_gcs" +- <<: *java + sample: "speech_transcribe_async_word_time_offsets_gcs" + path: "{base_path}/SpeechTranscribeAsyncWordTimeOffsetsGcs.java" + class: {package}.SpeechTranscribeAsyncWordTimeOffsetsGcs + region_tag: "speech_transcribe_async_word_time_offsets_gcs" diff --git a/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechAdaptationBeta.java b/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechAdaptationBeta.java new file mode 100644 index 00000000000..d137ae43cd7 --- /dev/null +++ b/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechAdaptationBeta.java @@ -0,0 +1,123 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// DO NOT EDIT! This is a generated sample ("Request", "speech_adaptation_beta") +// sample-metadata: +// title: Speech Adaptation (Cloud Storage) +// description: Transcribe a short audio file with speech adaptation. +// usage: gradle run -PmainClass=com.google.cloud.examples.speech.v1p1beta1.SpeechAdaptationBeta [--args='[--storage_uri "gs://cloud-samples-data/speech/brooklyn_bridge.mp3"] [--phrase "Brooklyn Bridge"]'] + +package com.google.cloud.examples.speech.v1p1beta1; + +import com.google.cloud.speech.v1p1beta1.RecognitionAudio; +import com.google.cloud.speech.v1p1beta1.RecognitionConfig; +import com.google.cloud.speech.v1p1beta1.RecognizeRequest; +import com.google.cloud.speech.v1p1beta1.RecognizeResponse; +import com.google.cloud.speech.v1p1beta1.SpeechClient; +import com.google.cloud.speech.v1p1beta1.SpeechContext; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; +import java.util.Arrays; +import java.util.List; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +public class SpeechAdaptationBeta { + // [START speech_adaptation_beta] + /* + * Please include the following imports to run this sample. + * + * import com.google.cloud.speech.v1p1beta1.RecognitionAudio; + * import com.google.cloud.speech.v1p1beta1.RecognitionConfig; + * import com.google.cloud.speech.v1p1beta1.RecognizeRequest; + * import com.google.cloud.speech.v1p1beta1.RecognizeResponse; + * import com.google.cloud.speech.v1p1beta1.SpeechClient; + * import com.google.cloud.speech.v1p1beta1.SpeechContext; + * import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; + * import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; + * import java.util.Arrays; + * import java.util.List; + */ + + /** + * Transcribe a short audio file with speech adaptation. + * + * @param storageUri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] + * @param phrase Phrase "hints" help recognize the specified phrases from your audio. + */ + public static void sampleRecognize(String storageUri, String phrase) { + try (SpeechClient speechClient = SpeechClient.create()) { + // storageUri = "gs://cloud-samples-data/speech/brooklyn_bridge.mp3"; + // phrase = "Brooklyn Bridge"; + List phrases = Arrays.asList(phrase); + + // Hint Boost. This value increases the probability that a specific + // phrase will be recognized over other similar sounding phrases. + // The higher the boost, the higher the chance of false positive + // recognition as well. Can accept wide range of positive values. + // Most use cases are best served with values between 0 and 20. + // Using a binary search happroach may help you find the optimal value. + float boost = 20.0F; + SpeechContext speechContextsElement = + SpeechContext.newBuilder().addAllPhrases(phrases).setBoost(boost).build(); + List speechContexts = Arrays.asList(speechContextsElement); + + // Sample rate in Hertz of the audio data sent + int sampleRateHertz = 44100; + + // The language of the supplied audio + String languageCode = "en-US"; + + // Encoding of audio data sent. This sample sets this explicitly. + // This field is optional for FLAC and WAV audio formats. + RecognitionConfig.AudioEncoding encoding = RecognitionConfig.AudioEncoding.MP3; + RecognitionConfig config = + RecognitionConfig.newBuilder() + .addAllSpeechContexts(speechContexts) + .setSampleRateHertz(sampleRateHertz) + .setLanguageCode(languageCode) + .setEncoding(encoding) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(storageUri).build(); + RecognizeRequest request = + RecognizeRequest.newBuilder().setConfig(config).setAudio(audio).build(); + RecognizeResponse response = speechClient.recognize(request); + for (SpeechRecognitionResult result : response.getResultsList()) { + // First alternative is the most probable result + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript: %s\n", alternative.getTranscript()); + } + } catch (Exception exception) { + System.err.println("Failed to create the client due to: " + exception); + } + } + // [END speech_adaptation_beta] + + public static void main(String[] args) throws Exception { + Options options = new Options(); + options.addOption( + Option.builder("").required(false).hasArg(true).longOpt("storage_uri").build()); + options.addOption(Option.builder("").required(false).hasArg(true).longOpt("phrase").build()); + + CommandLine cl = (new DefaultParser()).parse(options, args); + String storageUri = + cl.getOptionValue("storage_uri", "gs://cloud-samples-data/speech/brooklyn_bridge.mp3"); + String phrase = cl.getOptionValue("phrase", "Brooklyn Bridge"); + + sampleRecognize(storageUri, phrase); + } +} diff --git a/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechContextsClassesBeta.java b/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechContextsClassesBeta.java new file mode 100644 index 00000000000..670cfff6ca3 --- /dev/null +++ b/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechContextsClassesBeta.java @@ -0,0 +1,116 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// DO NOT EDIT! This is a generated sample ("Request", "speech_contexts_classes_beta") +// sample-metadata: +// title: Using Context Classes (Cloud Storage) +// description: Transcribe a short audio file with static context classes. +// usage: gradle run -PmainClass=com.google.cloud.examples.speech.v1p1beta1.SpeechContextsClassesBeta [--args='[--storage_uri "gs://cloud-samples-data/speech/time.mp3"] [--phrase "$TIME"]'] + +package com.google.cloud.examples.speech.v1p1beta1; + +import com.google.cloud.speech.v1p1beta1.RecognitionAudio; +import com.google.cloud.speech.v1p1beta1.RecognitionConfig; +import com.google.cloud.speech.v1p1beta1.RecognizeRequest; +import com.google.cloud.speech.v1p1beta1.RecognizeResponse; +import com.google.cloud.speech.v1p1beta1.SpeechClient; +import com.google.cloud.speech.v1p1beta1.SpeechContext; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; +import java.util.Arrays; +import java.util.List; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +public class SpeechContextsClassesBeta { + // [START speech_contexts_classes_beta] + /* + * Please include the following imports to run this sample. + * + * import com.google.cloud.speech.v1p1beta1.RecognitionAudio; + * import com.google.cloud.speech.v1p1beta1.RecognitionConfig; + * import com.google.cloud.speech.v1p1beta1.RecognizeRequest; + * import com.google.cloud.speech.v1p1beta1.RecognizeResponse; + * import com.google.cloud.speech.v1p1beta1.SpeechClient; + * import com.google.cloud.speech.v1p1beta1.SpeechContext; + * import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; + * import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; + * import java.util.Arrays; + * import java.util.List; + */ + + /** + * Transcribe a short audio file with static context classes. + * + * @param storageUri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] + * @param phrase Phrase "hints" help recognize the specified phrases from your audio. In this + * sample we are using a static class phrase ($TIME). Classes represent groups of words that + * represent common concepts that occur in natural language. + */ + public static void sampleRecognize(String storageUri, String phrase) { + try (SpeechClient speechClient = SpeechClient.create()) { + // storageUri = "gs://cloud-samples-data/speech/time.mp3"; + // phrase = "$TIME"; + List phrases = Arrays.asList(phrase); + SpeechContext speechContextsElement = + SpeechContext.newBuilder().addAllPhrases(phrases).build(); + List speechContexts = Arrays.asList(speechContextsElement); + + // The language of the supplied audio + String languageCode = "en-US"; + + // Sample rate in Hertz of the audio data sent + int sampleRateHertz = 24000; + + // Encoding of audio data sent. This sample sets this explicitly. + // This field is optional for FLAC and WAV audio formats. + RecognitionConfig.AudioEncoding encoding = RecognitionConfig.AudioEncoding.MP3; + RecognitionConfig config = + RecognitionConfig.newBuilder() + .addAllSpeechContexts(speechContexts) + .setLanguageCode(languageCode) + .setSampleRateHertz(sampleRateHertz) + .setEncoding(encoding) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(storageUri).build(); + RecognizeRequest request = + RecognizeRequest.newBuilder().setConfig(config).setAudio(audio).build(); + RecognizeResponse response = speechClient.recognize(request); + for (SpeechRecognitionResult result : response.getResultsList()) { + // First alternative is the most probable result + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript: %s\n", alternative.getTranscript()); + } + } catch (Exception exception) { + System.err.println("Failed to create the client due to: " + exception); + } + } + // [END speech_contexts_classes_beta] + + public static void main(String[] args) throws Exception { + Options options = new Options(); + options.addOption( + Option.builder("").required(false).hasArg(true).longOpt("storage_uri").build()); + options.addOption(Option.builder("").required(false).hasArg(true).longOpt("phrase").build()); + + CommandLine cl = (new DefaultParser()).parse(options, args); + String storageUri = cl.getOptionValue("storage_uri", "gs://cloud-samples-data/speech/time.mp3"); + String phrase = cl.getOptionValue("phrase", "$TIME"); + + sampleRecognize(storageUri, phrase); + } +} diff --git a/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechQuickstartBeta.java b/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechQuickstartBeta.java new file mode 100644 index 00000000000..6b35b0adae2 --- /dev/null +++ b/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechQuickstartBeta.java @@ -0,0 +1,100 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// DO NOT EDIT! This is a generated sample ("Request", "speech_quickstart_beta") +// sample-metadata: +// title: Quickstart Beta +// description: Performs synchronous speech recognition on an audio file +// usage: gradle run -PmainClass=com.google.cloud.examples.speech.v1p1beta1.SpeechQuickstartBeta [--args='[--storage_uri "gs://cloud-samples-data/speech/brooklyn_bridge.mp3"]'] + +package com.google.cloud.examples.speech.v1p1beta1; + +import com.google.cloud.speech.v1p1beta1.RecognitionAudio; +import com.google.cloud.speech.v1p1beta1.RecognitionConfig; +import com.google.cloud.speech.v1p1beta1.RecognizeRequest; +import com.google.cloud.speech.v1p1beta1.RecognizeResponse; +import com.google.cloud.speech.v1p1beta1.SpeechClient; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +public class SpeechQuickstartBeta { + // [START speech_quickstart_beta] + /* + * Please include the following imports to run this sample. + * + * import com.google.cloud.speech.v1p1beta1.RecognitionAudio; + * import com.google.cloud.speech.v1p1beta1.RecognitionConfig; + * import com.google.cloud.speech.v1p1beta1.RecognizeRequest; + * import com.google.cloud.speech.v1p1beta1.RecognizeResponse; + * import com.google.cloud.speech.v1p1beta1.SpeechClient; + * import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; + * import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; + */ + + /** + * Performs synchronous speech recognition on an audio file + * + * @param storageUri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] + */ + public static void sampleRecognize(String storageUri) { + try (SpeechClient speechClient = SpeechClient.create()) { + // storageUri = "gs://cloud-samples-data/speech/brooklyn_bridge.mp3"; + + // The language of the supplied audio + String languageCode = "en-US"; + + // Sample rate in Hertz of the audio data sent + int sampleRateHertz = 44100; + + // Encoding of audio data sent. This sample sets this explicitly. + // This field is optional for FLAC and WAV audio formats. + RecognitionConfig.AudioEncoding encoding = RecognitionConfig.AudioEncoding.MP3; + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setLanguageCode(languageCode) + .setSampleRateHertz(sampleRateHertz) + .setEncoding(encoding) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(storageUri).build(); + RecognizeRequest request = + RecognizeRequest.newBuilder().setConfig(config).setAudio(audio).build(); + RecognizeResponse response = speechClient.recognize(request); + for (SpeechRecognitionResult result : response.getResultsList()) { + // First alternative is the most probable result + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript: %s\n", alternative.getTranscript()); + } + } catch (Exception exception) { + System.err.println("Failed to create the client due to: " + exception); + } + } + // [END speech_quickstart_beta] + + public static void main(String[] args) throws Exception { + Options options = new Options(); + options.addOption( + Option.builder("").required(false).hasArg(true).longOpt("storage_uri").build()); + + CommandLine cl = (new DefaultParser()).parse(options, args); + String storageUri = + cl.getOptionValue("storage_uri", "gs://cloud-samples-data/speech/brooklyn_bridge.mp3"); + + sampleRecognize(storageUri); + } +} diff --git a/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechTranscribeAutoPunctuationBeta.java b/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechTranscribeAutoPunctuationBeta.java new file mode 100644 index 00000000000..3c0cbb82b29 --- /dev/null +++ b/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechTranscribeAutoPunctuationBeta.java @@ -0,0 +1,107 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_auto_punctuation_beta") +// sample-metadata: +// title: Getting punctuation in results (Local File) (Beta) +// description: Transcribe a short audio file with punctuation +// usage: gradle run -PmainClass=com.google.cloud.examples.speech.v1p1beta1.SpeechTranscribeAutoPunctuationBeta [--args='[--local_file_path "resources/commercial_mono.wav"]'] + +package com.google.cloud.examples.speech.v1p1beta1; + +import com.google.cloud.speech.v1p1beta1.RecognitionAudio; +import com.google.cloud.speech.v1p1beta1.RecognitionConfig; +import com.google.cloud.speech.v1p1beta1.RecognizeRequest; +import com.google.cloud.speech.v1p1beta1.RecognizeResponse; +import com.google.cloud.speech.v1p1beta1.SpeechClient; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; +import com.google.protobuf.ByteString; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +public class SpeechTranscribeAutoPunctuationBeta { + // [START speech_transcribe_auto_punctuation_beta] + /* + * Please include the following imports to run this sample. + * + * import com.google.cloud.speech.v1p1beta1.RecognitionAudio; + * import com.google.cloud.speech.v1p1beta1.RecognitionConfig; + * import com.google.cloud.speech.v1p1beta1.RecognizeRequest; + * import com.google.cloud.speech.v1p1beta1.RecognizeResponse; + * import com.google.cloud.speech.v1p1beta1.SpeechClient; + * import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; + * import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; + * import com.google.protobuf.ByteString; + * import java.nio.file.Files; + * import java.nio.file.Path; + * import java.nio.file.Paths; + */ + + /** + * Transcribe a short audio file with punctuation + * + * @param localFilePath Path to local audio file, e.g. /path/audio.wav + */ + public static void sampleRecognize(String localFilePath) { + try (SpeechClient speechClient = SpeechClient.create()) { + // localFilePath = "resources/commercial_mono.wav"; + + // When enabled, trascription results may include punctuation + // (available for select languages). + boolean enableAutomaticPunctuation = true; + + // The language of the supplied audio. Even though additional languages are + // provided by alternative_language_codes, a primary language is still required. + String languageCode = "en-US"; + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEnableAutomaticPunctuation(enableAutomaticPunctuation) + .setLanguageCode(languageCode) + .build(); + Path path = Paths.get(localFilePath); + byte[] data = Files.readAllBytes(path); + ByteString content = ByteString.copyFrom(data); + RecognitionAudio audio = RecognitionAudio.newBuilder().setContent(content).build(); + RecognizeRequest request = + RecognizeRequest.newBuilder().setConfig(config).setAudio(audio).build(); + RecognizeResponse response = speechClient.recognize(request); + for (SpeechRecognitionResult result : response.getResultsList()) { + // First alternative is the most probable result + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript: %s\n", alternative.getTranscript()); + } + } catch (Exception exception) { + System.err.println("Failed to create the client due to: " + exception); + } + } + // [END speech_transcribe_auto_punctuation_beta] + + public static void main(String[] args) throws Exception { + Options options = new Options(); + options.addOption( + Option.builder("").required(false).hasArg(true).longOpt("local_file_path").build()); + + CommandLine cl = (new DefaultParser()).parse(options, args); + String localFilePath = cl.getOptionValue("local_file_path", "resources/commercial_mono.wav"); + + sampleRecognize(localFilePath); + } +} diff --git a/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechTranscribeDiarizationBeta.java b/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechTranscribeDiarizationBeta.java new file mode 100644 index 00000000000..efd87cf873f --- /dev/null +++ b/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechTranscribeDiarizationBeta.java @@ -0,0 +1,127 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// DO NOT EDIT! This is a generated sample ("LongRunningRequestAsync", "speech_transcribe_diarization_beta") +// sample-metadata: +// title: Separating different speakers (Local File) (LRO) (Beta) +// description: Print confidence level for individual words in a transcription of a short audio file +// Separating different speakers in an audio file recording +// usage: gradle run -PmainClass=com.google.cloud.examples.speech.v1p1beta1.SpeechTranscribeDiarizationBeta [--args='[--local_file_path "resources/commercial_mono.wav"]'] + +package com.google.cloud.examples.speech.v1p1beta1; + +import com.google.api.gax.longrunning.OperationFuture; +import com.google.cloud.speech.v1p1beta1.LongRunningRecognizeMetadata; +import com.google.cloud.speech.v1p1beta1.LongRunningRecognizeRequest; +import com.google.cloud.speech.v1p1beta1.LongRunningRecognizeResponse; +import com.google.cloud.speech.v1p1beta1.RecognitionAudio; +import com.google.cloud.speech.v1p1beta1.RecognitionConfig; +import com.google.cloud.speech.v1p1beta1.SpeechClient; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; +import com.google.cloud.speech.v1p1beta1.WordInfo; +import com.google.protobuf.ByteString; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +public class SpeechTranscribeDiarizationBeta { + // [START speech_transcribe_diarization_beta] + /* + * Please include the following imports to run this sample. + * + * import com.google.api.gax.longrunning.OperationFuture; + * import com.google.cloud.speech.v1p1beta1.LongRunningRecognizeMetadata; + * import com.google.cloud.speech.v1p1beta1.LongRunningRecognizeRequest; + * import com.google.cloud.speech.v1p1beta1.LongRunningRecognizeResponse; + * import com.google.cloud.speech.v1p1beta1.RecognitionAudio; + * import com.google.cloud.speech.v1p1beta1.RecognitionConfig; + * import com.google.cloud.speech.v1p1beta1.SpeechClient; + * import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; + * import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; + * import com.google.cloud.speech.v1p1beta1.WordInfo; + * import com.google.protobuf.ByteString; + * import java.nio.file.Files; + * import java.nio.file.Path; + * import java.nio.file.Paths; + */ + + /** + * Print confidence level for individual words in a transcription of a short audio file Separating + * different speakers in an audio file recording + * + * @param localFilePath Path to local audio file, e.g. /path/audio.wav + */ + public static void sampleLongRunningRecognize(String localFilePath) { + try (SpeechClient speechClient = SpeechClient.create()) { + // localFilePath = "resources/commercial_mono.wav"; + + // If enabled, each word in the first alternative of each result will be + // tagged with a speaker tag to identify the speaker. + boolean enableSpeakerDiarization = true; + + // Optional. Specifies the estimated number of speakers in the conversation. + int diarizationSpeakerCount = 2; + + // The language of the supplied audio + String languageCode = "en-US"; + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEnableSpeakerDiarization(enableSpeakerDiarization) + .setDiarizationSpeakerCount(diarizationSpeakerCount) + .setLanguageCode(languageCode) + .build(); + Path path = Paths.get(localFilePath); + byte[] data = Files.readAllBytes(path); + ByteString content = ByteString.copyFrom(data); + RecognitionAudio audio = RecognitionAudio.newBuilder().setContent(content).build(); + LongRunningRecognizeRequest request = + LongRunningRecognizeRequest.newBuilder().setConfig(config).setAudio(audio).build(); + OperationFuture future = + speechClient.longRunningRecognizeAsync(request); + + System.out.println("Waiting for operation to complete..."); + LongRunningRecognizeResponse response = future.get(); + for (SpeechRecognitionResult result : response.getResultsList()) { + // First alternative has words tagged with speakers + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript: %s\n", alternative.getTranscript()); + // Print the speakerTag of each word + for (WordInfo word : alternative.getWordsList()) { + System.out.printf("Word: %s\n", word.getWord()); + System.out.printf("Speaker tag: %s\n", word.getSpeakerTag()); + } + } + } catch (Exception exception) { + System.err.println("Failed to create the client due to: " + exception); + } + } + // [END speech_transcribe_diarization_beta] + + public static void main(String[] args) throws Exception { + Options options = new Options(); + options.addOption( + Option.builder("").required(false).hasArg(true).longOpt("local_file_path").build()); + + CommandLine cl = (new DefaultParser()).parse(options, args); + String localFilePath = cl.getOptionValue("local_file_path", "resources/commercial_mono.wav"); + + sampleLongRunningRecognize(localFilePath); + } +} diff --git a/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechTranscribeMultilanguageBeta.java b/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechTranscribeMultilanguageBeta.java new file mode 100644 index 00000000000..05a994de41b --- /dev/null +++ b/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechTranscribeMultilanguageBeta.java @@ -0,0 +1,116 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_multilanguage_beta") +// sample-metadata: +// title: Detecting language spoken automatically (Local File) (Beta) +// description: Transcribe a short audio file with language detected from a list of possible languages +// usage: gradle run -PmainClass=com.google.cloud.examples.speech.v1p1beta1.SpeechTranscribeMultilanguageBeta [--args='[--local_file_path "resources/brooklyn_bridge.flac"]'] + +package com.google.cloud.examples.speech.v1p1beta1; + +import com.google.cloud.speech.v1p1beta1.RecognitionAudio; +import com.google.cloud.speech.v1p1beta1.RecognitionConfig; +import com.google.cloud.speech.v1p1beta1.RecognizeRequest; +import com.google.cloud.speech.v1p1beta1.RecognizeResponse; +import com.google.cloud.speech.v1p1beta1.SpeechClient; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; +import com.google.protobuf.ByteString; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.List; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +public class SpeechTranscribeMultilanguageBeta { + // [START speech_transcribe_multilanguage_beta] + /* + * Please include the following imports to run this sample. + * + * import com.google.cloud.speech.v1p1beta1.RecognitionAudio; + * import com.google.cloud.speech.v1p1beta1.RecognitionConfig; + * import com.google.cloud.speech.v1p1beta1.RecognizeRequest; + * import com.google.cloud.speech.v1p1beta1.RecognizeResponse; + * import com.google.cloud.speech.v1p1beta1.SpeechClient; + * import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; + * import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; + * import com.google.protobuf.ByteString; + * import java.nio.file.Files; + * import java.nio.file.Path; + * import java.nio.file.Paths; + * import java.util.Arrays; + * import java.util.List; + */ + + /** + * Transcribe a short audio file with language detected from a list of possible languages + * + * @param localFilePath Path to local audio file, e.g. /path/audio.wav + */ + public static void sampleRecognize(String localFilePath) { + try (SpeechClient speechClient = SpeechClient.create()) { + // localFilePath = "resources/brooklyn_bridge.flac"; + + // The language of the supplied audio. Even though additional languages are + // provided by alternative_language_codes, a primary language is still required. + String languageCode = "fr"; + + // Specify up to 3 additional languages as possible alternative languages + // of the supplied audio. + String alternativeLanguageCodesElement = "es"; + String alternativeLanguageCodesElement2 = "en"; + List alternativeLanguageCodes = + Arrays.asList(alternativeLanguageCodesElement, alternativeLanguageCodesElement2); + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setLanguageCode(languageCode) + .addAllAlternativeLanguageCodes(alternativeLanguageCodes) + .build(); + Path path = Paths.get(localFilePath); + byte[] data = Files.readAllBytes(path); + ByteString content = ByteString.copyFrom(data); + RecognitionAudio audio = RecognitionAudio.newBuilder().setContent(content).build(); + RecognizeRequest request = + RecognizeRequest.newBuilder().setConfig(config).setAudio(audio).build(); + RecognizeResponse response = speechClient.recognize(request); + for (SpeechRecognitionResult result : response.getResultsList()) { + // The languageCode which was detected as the most likely being spoken in the audio + System.out.printf("Detected language: %s\n", result.getLanguageCode()); + // First alternative is the most probable result + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript: %s\n", alternative.getTranscript()); + } + } catch (Exception exception) { + System.err.println("Failed to create the client due to: " + exception); + } + } + // [END speech_transcribe_multilanguage_beta] + + public static void main(String[] args) throws Exception { + Options options = new Options(); + options.addOption( + Option.builder("").required(false).hasArg(true).longOpt("local_file_path").build()); + + CommandLine cl = (new DefaultParser()).parse(options, args); + String localFilePath = cl.getOptionValue("local_file_path", "resources/brooklyn_bridge.flac"); + + sampleRecognize(localFilePath); + } +} diff --git a/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechTranscribeRecognitionMetadataBeta.java b/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechTranscribeRecognitionMetadataBeta.java new file mode 100644 index 00000000000..732f4cb9781 --- /dev/null +++ b/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechTranscribeRecognitionMetadataBeta.java @@ -0,0 +1,123 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_recognition_metadata_beta") +// sample-metadata: +// title: Adding recognition metadata (Local File) (Beta) +// description: Adds additional details short audio file included in this recognition request +// usage: gradle run -PmainClass=com.google.cloud.examples.speech.v1p1beta1.SpeechTranscribeRecognitionMetadataBeta [--args='[--local_file_path "resources/commercial_mono.wav"]'] + +package com.google.cloud.examples.speech.v1p1beta1; + +import com.google.cloud.speech.v1p1beta1.RecognitionAudio; +import com.google.cloud.speech.v1p1beta1.RecognitionConfig; +import com.google.cloud.speech.v1p1beta1.RecognitionMetadata; +import com.google.cloud.speech.v1p1beta1.RecognizeRequest; +import com.google.cloud.speech.v1p1beta1.RecognizeResponse; +import com.google.cloud.speech.v1p1beta1.SpeechClient; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; +import com.google.protobuf.ByteString; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +public class SpeechTranscribeRecognitionMetadataBeta { + // [START speech_transcribe_recognition_metadata_beta] + /* + * Please include the following imports to run this sample. + * + * import com.google.cloud.speech.v1p1beta1.RecognitionAudio; + * import com.google.cloud.speech.v1p1beta1.RecognitionConfig; + * import com.google.cloud.speech.v1p1beta1.RecognitionMetadata; + * import com.google.cloud.speech.v1p1beta1.RecognizeRequest; + * import com.google.cloud.speech.v1p1beta1.RecognizeResponse; + * import com.google.cloud.speech.v1p1beta1.SpeechClient; + * import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; + * import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; + * import com.google.protobuf.ByteString; + * import java.nio.file.Files; + * import java.nio.file.Path; + * import java.nio.file.Paths; + */ + + /** + * Adds additional details short audio file included in this recognition request + * + * @param localFilePath Path to local audio file, e.g. /path/audio.wav + */ + public static void sampleRecognize(String localFilePath) { + try (SpeechClient speechClient = SpeechClient.create()) { + // localFilePath = "resources/commercial_mono.wav"; + + // The use case of the audio, e.g. PHONE_CALL, DISCUSSION, PRESENTATION, et al. + RecognitionMetadata.InteractionType interactionType = + RecognitionMetadata.InteractionType.VOICE_SEARCH; + + // The kind of device used to capture the audio + RecognitionMetadata.RecordingDeviceType recordingDeviceType = + RecognitionMetadata.RecordingDeviceType.SMARTPHONE; + + // The device used to make the recording. + // Arbitrary string, e.g. 'Pixel XL', 'VoIP', 'Cardioid Microphone', or other value. + String recordingDeviceName = "Pixel 3"; + RecognitionMetadata metadata = + RecognitionMetadata.newBuilder() + .setInteractionType(interactionType) + .setRecordingDeviceType(recordingDeviceType) + .setRecordingDeviceName(recordingDeviceName) + .build(); + + // The language of the supplied audio. Even though additional languages are + // provided by alternative_language_codes, a primary language is still required. + String languageCode = "en-US"; + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setMetadata(metadata) + .setLanguageCode(languageCode) + .build(); + Path path = Paths.get(localFilePath); + byte[] data = Files.readAllBytes(path); + ByteString content = ByteString.copyFrom(data); + RecognitionAudio audio = RecognitionAudio.newBuilder().setContent(content).build(); + RecognizeRequest request = + RecognizeRequest.newBuilder().setConfig(config).setAudio(audio).build(); + RecognizeResponse response = speechClient.recognize(request); + for (SpeechRecognitionResult result : response.getResultsList()) { + // First alternative is the most probable result + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript: %s\n", alternative.getTranscript()); + } + } catch (Exception exception) { + System.err.println("Failed to create the client due to: " + exception); + } + } + // [END speech_transcribe_recognition_metadata_beta] + + public static void main(String[] args) throws Exception { + Options options = new Options(); + options.addOption( + Option.builder("").required(false).hasArg(true).longOpt("local_file_path").build()); + + CommandLine cl = (new DefaultParser()).parse(options, args); + String localFilePath = cl.getOptionValue("local_file_path", "resources/commercial_mono.wav"); + + sampleRecognize(localFilePath); + } +} diff --git a/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechTranscribeWordLevelConfidenceBeta.java b/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechTranscribeWordLevelConfidenceBeta.java new file mode 100644 index 00000000000..979565c5b59 --- /dev/null +++ b/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/SpeechTranscribeWordLevelConfidenceBeta.java @@ -0,0 +1,113 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_word_level_confidence_beta") +// sample-metadata: +// title: Enabling word-level confidence (Local File) (Beta) +// description: Print confidence level for individual words in a transcription of a short audio file. +// usage: gradle run -PmainClass=com.google.cloud.examples.speech.v1p1beta1.SpeechTranscribeWordLevelConfidenceBeta [--args='[--local_file_path "resources/brooklyn_bridge.flac"]'] + +package com.google.cloud.examples.speech.v1p1beta1; + +import com.google.cloud.speech.v1p1beta1.RecognitionAudio; +import com.google.cloud.speech.v1p1beta1.RecognitionConfig; +import com.google.cloud.speech.v1p1beta1.RecognizeRequest; +import com.google.cloud.speech.v1p1beta1.RecognizeResponse; +import com.google.cloud.speech.v1p1beta1.SpeechClient; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; +import com.google.cloud.speech.v1p1beta1.WordInfo; +import com.google.protobuf.ByteString; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +public class SpeechTranscribeWordLevelConfidenceBeta { + // [START speech_transcribe_word_level_confidence_beta] + /* + * Please include the following imports to run this sample. + * + * import com.google.cloud.speech.v1p1beta1.RecognitionAudio; + * import com.google.cloud.speech.v1p1beta1.RecognitionConfig; + * import com.google.cloud.speech.v1p1beta1.RecognizeRequest; + * import com.google.cloud.speech.v1p1beta1.RecognizeResponse; + * import com.google.cloud.speech.v1p1beta1.SpeechClient; + * import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; + * import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; + * import com.google.cloud.speech.v1p1beta1.WordInfo; + * import com.google.protobuf.ByteString; + * import java.nio.file.Files; + * import java.nio.file.Path; + * import java.nio.file.Paths; + */ + + /** + * Print confidence level for individual words in a transcription of a short audio file. + * + * @param localFilePath Path to local audio file, e.g. /path/audio.wav + */ + public static void sampleRecognize(String localFilePath) { + try (SpeechClient speechClient = SpeechClient.create()) { + // localFilePath = "resources/brooklyn_bridge.flac"; + + // When enabled, the first result returned by the API will include a list + // of words and the confidence level for each of those words. + boolean enableWordConfidence = true; + + // The language of the supplied audio + String languageCode = "en-US"; + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEnableWordConfidence(enableWordConfidence) + .setLanguageCode(languageCode) + .build(); + Path path = Paths.get(localFilePath); + byte[] data = Files.readAllBytes(path); + ByteString content = ByteString.copyFrom(data); + RecognitionAudio audio = RecognitionAudio.newBuilder().setContent(content).build(); + RecognizeRequest request = + RecognizeRequest.newBuilder().setConfig(config).setAudio(audio).build(); + RecognizeResponse response = speechClient.recognize(request); + // The first result includes confidence levels per word + SpeechRecognitionResult result = response.getResultsList().get(0); + // First alternative is the most probable result + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript: %s\n", alternative.getTranscript()); + // Print the confidence level of each word + for (WordInfo word : alternative.getWordsList()) { + System.out.printf("Word: %s\n", word.getWord()); + System.out.printf("Confidence: %s\n", word.getConfidence()); + } + } catch (Exception exception) { + System.err.println("Failed to create the client due to: " + exception); + } + } + // [END speech_transcribe_word_level_confidence_beta] + + public static void main(String[] args) throws Exception { + Options options = new Options(); + options.addOption( + Option.builder("").required(false).hasArg(true).longOpt("local_file_path").build()); + + CommandLine cl = (new DefaultParser()).parse(options, args); + String localFilePath = cl.getOptionValue("local_file_path", "resources/brooklyn_bridge.flac"); + + sampleRecognize(localFilePath); + } +} diff --git a/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/speech.java.20191017.083221.manifest.yaml b/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/speech.java.20191017.083221.manifest.yaml new file mode 100644 index 00000000000..cf1d363d5cc --- /dev/null +++ b/speech/src/main/java/com/google/cloud/examples/speech/v1p1beta1/speech.java.20191017.083221.manifest.yaml @@ -0,0 +1,50 @@ +--- +type: manifest/samples +schema_version: 3 +java: &java + environment: java + bin: mvn exec:java + base_path: samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1 + package: com.google.cloud.examples.speech.v1p1beta1 + invocation: {bin} -Dexec.mainClass={class} -Dexec.args='@args' +samples: +- <<: *java + sample: "speech_contexts_classes_beta" + path: "{base_path}/SpeechContextsClassesBeta.java" + class: {package}.SpeechContextsClassesBeta + region_tag: "speech_contexts_classes_beta" +- <<: *java + sample: "speech_transcribe_auto_punctuation_beta" + path: "{base_path}/SpeechTranscribeAutoPunctuationBeta.java" + class: {package}.SpeechTranscribeAutoPunctuationBeta + region_tag: "speech_transcribe_auto_punctuation_beta" +- <<: *java + sample: "speech_quickstart_beta" + path: "{base_path}/SpeechQuickstartBeta.java" + class: {package}.SpeechQuickstartBeta + region_tag: "speech_quickstart_beta" +- <<: *java + sample: "speech_transcribe_word_level_confidence_beta" + path: "{base_path}/SpeechTranscribeWordLevelConfidenceBeta.java" + class: {package}.SpeechTranscribeWordLevelConfidenceBeta + region_tag: "speech_transcribe_word_level_confidence_beta" +- <<: *java + sample: "speech_adaptation_beta" + path: "{base_path}/SpeechAdaptationBeta.java" + class: {package}.SpeechAdaptationBeta + region_tag: "speech_adaptation_beta" +- <<: *java + sample: "speech_transcribe_multilanguage_beta" + path: "{base_path}/SpeechTranscribeMultilanguageBeta.java" + class: {package}.SpeechTranscribeMultilanguageBeta + region_tag: "speech_transcribe_multilanguage_beta" +- <<: *java + sample: "speech_transcribe_recognition_metadata_beta" + path: "{base_path}/SpeechTranscribeRecognitionMetadataBeta.java" + class: {package}.SpeechTranscribeRecognitionMetadataBeta + region_tag: "speech_transcribe_recognition_metadata_beta" +- <<: *java + sample: "speech_transcribe_diarization_beta" + path: "{base_path}/SpeechTranscribeDiarizationBeta.java" + class: {package}.SpeechTranscribeDiarizationBeta + region_tag: "speech_transcribe_diarization_beta"