Skip to content

Commit

Permalink
samples: Due to API backend changes, update the samples to match (#1595)
Browse files Browse the repository at this point in the history
  • Loading branch information
nnegrey authored Oct 1, 2019
1 parent a181f3a commit cbb55b6
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 34 deletions.
107 changes: 77 additions & 30 deletions speech/snippets/src/main/java/com/example/speech/Recognize.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,18 @@
import com.google.cloud.speech.v1p1beta1.RecognitionMetadata.MicrophoneDistance;
import com.google.cloud.speech.v1p1beta1.RecognitionMetadata.RecordingDeviceType;
import com.google.cloud.speech.v1p1beta1.RecognizeResponse;
import com.google.cloud.speech.v1p1beta1.SpeakerDiarizationConfig;
import com.google.cloud.speech.v1p1beta1.SpeechClient;

import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult;
import com.google.cloud.speech.v1p1beta1.WordInfo;
import com.google.protobuf.ByteString;

import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

public class Recognize {

Expand Down Expand Up @@ -154,32 +156,52 @@ public static void transcribeDiarization(String fileName) throws Exception {
RecognitionAudio recognitionAudio =
RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();

SpeakerDiarizationConfig speakerDiarizationConfig = SpeakerDiarizationConfig.newBuilder()
.setEnableSpeakerDiarization(true)
.setMinSpeakerCount(2)
.setMaxSpeakerCount(2)
.build();

// Configure request to enable Speaker diarization
RecognitionConfig config =
RecognitionConfig.newBuilder()
RecognitionConfig config = RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.LINEAR16)
.setLanguageCode("en-US")
.setSampleRateHertz(8000)
.setEnableSpeakerDiarization(true)
.setDiarizationSpeakerCount(2)
.setDiarizationConfig(speakerDiarizationConfig)
.build();

// Perform the transcription request
RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio);

// Print out the results
for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) {
// There can be several alternative transcripts for a given chunk of speech. Just
// use the first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternatives(0);
System.out.format("Transcript : %s\n", alternative.getTranscript());
// The words array contains the entire transcript up until that point.
// Referencing the last spoken word to get the associated Speaker tag
System.out.format(
"Speaker Tag %s: %s\n",
alternative.getWords((alternative.getWordsCount() - 1)).getSpeakerTag(),
alternative.getTranscript());
// Speaker Tags are only included in the last result object, which has only one alternative.
SpeechRecognitionAlternative alternative =
recognizeResponse.getResults(
recognizeResponse.getResultsCount() - 1).getAlternatives(0);

// The alternative is made up of WordInfo objects that contain the speaker_tag.
WordInfo wordInfo = alternative.getWords(0);
int currentSpeakerTag = wordInfo.getSpeakerTag();

// For each word, get all the words associated with one speaker, once the speaker changes,
// add a new line with the new speaker and their spoken words.
StringBuilder speakerWords = new StringBuilder(
String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));

for (int i = 1; i < alternative.getWordsCount(); i++) {
wordInfo = alternative.getWords(i);
if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
speakerWords.append(" ");
speakerWords.append(wordInfo.getWord());
} else {
speakerWords.append(
String.format("\nSpeaker %d: %s",
wordInfo.getSpeakerTag(),
wordInfo.getWord()));
currentSpeakerTag = wordInfo.getSpeakerTag();
}
}

System.out.println(speakerWords.toString());
}
}
// [END speech_transcribe_diarization_beta]
Expand All @@ -192,14 +214,19 @@ public static void transcribeDiarization(String fileName) throws Exception {
*/
public static void transcribeDiarizationGcs(String gcsUri) throws Exception {
try (SpeechClient speechClient = SpeechClient.create()) {
SpeakerDiarizationConfig speakerDiarizationConfig = SpeakerDiarizationConfig.newBuilder()
.setEnableSpeakerDiarization(true)
.setMinSpeakerCount(2)
.setMaxSpeakerCount(2)
.build();

// Configure request to enable Speaker diarization
RecognitionConfig config =
RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.LINEAR16)
.setLanguageCode("en-US")
.setSampleRateHertz(8000)
.setEnableSpeakerDiarization(true)
.setDiarizationSpeakerCount(2)
.setDiarizationConfig(speakerDiarizationConfig)
.build();

// Set the remote path for the audio file
Expand All @@ -214,17 +241,37 @@ public static void transcribeDiarizationGcs(String gcsUri) throws Exception {
Thread.sleep(10000);
}

for (SpeechRecognitionResult result : response.get().getResultsList()) {
// There can be several alternative transcripts for a given chunk of speech. Just
// use the first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternatives(0);
// The words array contains the entire transcript up until that point.
// Referencing the last spoken word to get the associated Speaker tag
System.out.format(
"Speaker Tag %s:%s\n",
alternative.getWords((alternative.getWordsCount() - 1)).getSpeakerTag(),
alternative.getTranscript());
// Speaker Tags are only included in the last result object, which has only one alternative.
LongRunningRecognizeResponse longRunningRecognizeResponse = response.get();
SpeechRecognitionAlternative alternative =
longRunningRecognizeResponse.getResults(
longRunningRecognizeResponse.getResultsCount() - 1)
.getAlternatives(0);

// The alternative is made up of WordInfo objects that contain the speaker_tag.
WordInfo wordInfo = alternative.getWords(0);
int currentSpeakerTag = wordInfo.getSpeakerTag();

// For each word, get all the words associated with one speaker, once the speaker changes,
// add a new line with the new speaker and their spoken words.
StringBuilder speakerWords = new StringBuilder(
String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));

for (int i = 1; i < alternative.getWordsCount(); i++) {
wordInfo = alternative.getWords(i);
if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
speakerWords.append(" ");
speakerWords.append(wordInfo.getWord());
} else {
speakerWords.append(
String.format("\nSpeaker %d: %s",
wordInfo.getSpeakerTag(),
wordInfo.getWord()));
currentSpeakerTag = wordInfo.getSpeakerTag();
}
}

System.out.println(speakerWords.toString());
}
}
// [END speech_transcribe_diarization_gcs_beta]
Expand Down Expand Up @@ -454,7 +501,7 @@ public static void transcribeWordLevelConfidenceGcs(String gcsUri) throws Except
RecognitionConfig config =
RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.FLAC)
.setSampleRateHertz(16000)
.setSampleRateHertz(44100)
.setLanguageCode("en-US")
.setEnableWordConfidence(true)
.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
@RunWith(JUnit4.class)
@SuppressWarnings("checkstyle:abbreviationaswordinname")
public class RecognizeIT {
private static final String BUCKET = "cloud-samples-tests";
private static final String BUCKET = "cloud-samples-data";

private ByteArrayOutputStream bout;
private PrintStream out;
Expand All @@ -39,7 +39,7 @@ public class RecognizeIT {
private String audioFileName = "./resources/audio.raw";
private String multiChannelAudioFileName = "./resources/commercial_stereo.wav";
private String gcsMultiChannelAudioPath = "gs://" + BUCKET + "/speech/commercial_stereo.wav";
private String gcsAudioPath = "gs://" + BUCKET + "/speech/brooklyn.flac";
private String gcsAudioPath = "gs://" + BUCKET + "/speech/brooklyn_bridge.flac";
private String gcsDiarizationAudioPath = "gs://" + BUCKET + "/speech/commercial_mono.wav";

// The path to the video file to transcribe
Expand Down Expand Up @@ -71,14 +71,16 @@ public void testMetadata() throws Exception {
public void testTranscribeDiarization() throws Exception {
Recognize.transcribeDiarization(recognitionAudioFile);
String got = bout.toString();
assertThat(got).contains("Speaker Tag 2:");
assertThat(got).contains("Speaker 1: I'm here");
assertThat(got).contains("Speaker 2: hi I'd like to buy a Chrome Cast");
}

@Test
public void testTranscribeDiarizationGcs() throws Exception {
Recognize.transcribeDiarizationGcs(gcsDiarizationAudioPath);
String got = bout.toString();
assertThat(got).contains("Speaker Tag 2:");
assertThat(got).contains("Speaker 1: I'm here");
assertThat(got).contains("Speaker 2: hi I'd like to buy a Chrome Cast");
}

@Test
Expand Down

0 comments on commit cbb55b6

Please sign in to comment.