Skip to content

Commit

Permalink
samples: feat: video speech transcription (#1264)
Browse files Browse the repository at this point in the history
  • Loading branch information
anguillanneuf authored and chingor13 committed Feb 24, 2021
1 parent fd74bbe commit 43eb235
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 1 deletion.
77 changes: 76 additions & 1 deletion video/src/main/java/com/example/video/Detect.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,21 @@
import com.google.cloud.videointelligence.v1.Feature;
import com.google.cloud.videointelligence.v1.LabelAnnotation;
import com.google.cloud.videointelligence.v1.LabelSegment;
import com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative;
import com.google.cloud.videointelligence.v1.SpeechTranscription;
import com.google.cloud.videointelligence.v1.SpeechTranscriptionConfig;
import com.google.cloud.videointelligence.v1.VideoAnnotationResults;
import com.google.cloud.videointelligence.v1.VideoContext;
import com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient;
import com.google.cloud.videointelligence.v1.VideoSegment;
import com.google.cloud.videointelligence.v1.WordInfo;
import com.google.protobuf.ByteString;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.concurrent.TimeUnit;

import org.apache.commons.codec.binary.Base64;


Expand Down Expand Up @@ -83,6 +90,9 @@ public static void argsHelper(String[] args) throws Exception {
if (command.equals("explicit-content")) {
analyzeExplicitContent(path);
}
if (command.equals("speech-transcription")) {
speechTranscription(path);
}
}

/**
Expand Down Expand Up @@ -322,4 +332,69 @@ public static void analyzeExplicitContent(String gcsUri) throws Exception {
// [END video_analyze_explicit_content]
}
}
}

/**
* Transcribe speech from a video stored on GCS.
*
* @param gcsUri the path to the video file to analyze.
*/
public static void speechTranscription(String gcsUri) throws Exception {
// [START video_speech_transcription_gcs]
// Instantiate a com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient
try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) {
// Set the language code
SpeechTranscriptionConfig config = SpeechTranscriptionConfig.newBuilder()
.setLanguageCode("en-US")
.setEnableAutomaticPunctuation(true)
.build();

// Set the video context with the above configuration
VideoContext context = VideoContext.newBuilder()
.setSpeechTranscriptionConfig(config)
.build();

// Create the request
AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder()
.setInputUri(gcsUri)
.addFeatures(Feature.SPEECH_TRANSCRIPTION)
.setVideoContext(context)
.build();

// asynchronously perform speech transcription on videos
OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> response =
client.annotateVideoAsync(request);

System.out.println("Waiting for operation to complete...");
// Display the results
for (VideoAnnotationResults results : response.get(600, TimeUnit.SECONDS)
.getAnnotationResultsList()) {
for (SpeechTranscription speechTranscription : results.getSpeechTranscriptionsList()) {
try {
// Print the transcription
if (speechTranscription.getAlternativesCount() > 0) {
SpeechRecognitionAlternative alternative = speechTranscription.getAlternatives(0);

System.out.printf("Transcript: %s\n", alternative.getTranscript());
System.out.printf("Confidence: %.2f\n", alternative.getConfidence());

System.out.println("Word level information:");
for (WordInfo wordInfo : alternative.getWordsList()) {
double startTime = wordInfo.getStartTime().getSeconds()
+ wordInfo.getStartTime().getNanos() / 1e9;
double endTime = wordInfo.getEndTime().getSeconds()
+ wordInfo.getEndTime().getNanos() / 1e9;
System.out.printf("\t%4.2fs - %4.2fs: %s\n",
startTime, endTime, wordInfo.getWord());
}
} else {
System.out.println("No transcription found");
}
} catch (IndexOutOfBoundsException ioe) {
System.out.println("Could not retrieve frame: " + ioe.getMessage());
}
}
}
}
// [END video_speech_transcription_gcs]
}
}
11 changes: 11 additions & 0 deletions video/src/test/java/com/example/video/DetectIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ public class DetectIT {
static final String LABEL_FILE_LOCATION = "./resources/cat.mp4";
static final String SHOTS_FILE_LOCATION = "gs://demomaker/gbikes_dinosaur.mp4";
static final String EXPLICIT_CONTENT_LOCATION = "gs://demomaker/cat.mp4";
static final String SPEECH_GCS_LOCATION =
"gs://java-docs-samples-testing/video/googlework_short.mp4";

@Before
public void setUp() {
Expand Down Expand Up @@ -84,4 +86,13 @@ public void testShots() throws Exception {
assertThat(got).contains("Shots:");
assertThat(got).contains("Location: 0");
}

@Test
public void testSpeechTranscription() throws Exception {
String[] args = {"speech-transcription", SPEECH_GCS_LOCATION};
Detect.argsHelper(args);
String got = bout.toString();

assertThat(got).contains("cultural");
}
}

0 comments on commit 43eb235

Please sign in to comment.