From a806f803bdd699ec1c9a15ad0239d5c061bf353b Mon Sep 17 00:00:00 2001
From: Nirupa Anantha Kumar <nirupa-kumar@users.noreply.github.com>
Date: Tue, 5 Mar 2019 12:54:51 -0800
Subject: [PATCH] samples: Video Intelligence : Object tracking and Text
 detection (OCR) - GA (#1347)

* Video Intelligence : Object tracking and Text detection (OCR) - GA

* Updated config version
---
 .../main/java/com/example/video/Detect.java   |   1 -
 .../java/com/example/video/TextDetection.java | 166 +++++++++++++++++
 .../java/com/example/video/TrackObjects.java  | 176 ++++++++++++++++++
 .../test/java/com/example/video/DetectIT.java |  72 +++++++
 4 files changed, 414 insertions(+), 1 deletion(-)
 create mode 100644 video/src/main/java/com/example/video/TextDetection.java
 create mode 100644 video/src/main/java/com/example/video/TrackObjects.java

diff --git a/video/src/main/java/com/example/video/Detect.java b/video/src/main/java/com/example/video/Detect.java
index ee5dad37194..a2c5bd1c36f 100644
--- a/video/src/main/java/com/example/video/Detect.java
+++ b/video/src/main/java/com/example/video/Detect.java
@@ -40,7 +40,6 @@
 import java.nio.file.Paths;
 import java.util.concurrent.TimeUnit;
 
-import org.apache.commons.codec.binary.Base64;
 
 
 public class Detect {
diff --git a/video/src/main/java/com/example/video/TextDetection.java b/video/src/main/java/com/example/video/TextDetection.java
new file mode 100644
index 00000000000..f21b3c9e832
--- /dev/null
+++ b/video/src/main/java/com/example/video/TextDetection.java
@@ -0,0 +1,166 @@
+/*
+ * Copyright 2019 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.video;
+
+import com.google.api.gax.longrunning.OperationFuture;
+import com.google.cloud.videointelligence.v1.AnnotateVideoProgress;
+import com.google.cloud.videointelligence.v1.AnnotateVideoRequest;
+import com.google.cloud.videointelligence.v1.AnnotateVideoResponse;
+import com.google.cloud.videointelligence.v1.Feature;
+import com.google.cloud.videointelligence.v1.NormalizedVertex;
+import com.google.cloud.videointelligence.v1.TextAnnotation;
+import com.google.cloud.videointelligence.v1.TextFrame;
+import com.google.cloud.videointelligence.v1.TextSegment;
+import com.google.cloud.videointelligence.v1.VideoAnnotationResults;
+import com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient;
+import com.google.cloud.videointelligence.v1.VideoSegment;
+import com.google.protobuf.ByteString;
+
+import com.google.protobuf.Duration;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+public class TextDetection {
+
+  // [START video_detect_text]
+  /**
+   * Detect text in a video.
+   *
+   * @param filePath the path to the video file to analyze.
+   */
+  public static VideoAnnotationResults detectText(String filePath) throws Exception {
+    try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) {
+      // Read file
+      Path path = Paths.get(filePath);
+      byte[] data = Files.readAllBytes(path);
+
+      // Create the request
+      AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder()
+          .setInputContent(ByteString.copyFrom(data))
+          .addFeatures(Feature.TEXT_DETECTION)
+          .build();
+
+      // asynchronously perform object tracking on videos
+      OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> future =
+          client.annotateVideoAsync(request);
+
+      System.out.println("Waiting for operation to complete...");
+      // The first result is retrieved because a single video was processed.
+      AnnotateVideoResponse response = future.get(300, TimeUnit.SECONDS);
+      VideoAnnotationResults results = response.getAnnotationResults(0);
+
+      // Get only the first annotation for demo purposes.
+      TextAnnotation annotation = results.getTextAnnotations(0);
+      System.out.println("Text: " + annotation.getText());
+
+      // Get the first text segment.
+      TextSegment textSegment = annotation.getSegments(0);
+      System.out.println("Confidence: " + textSegment.getConfidence());
+      // For the text segment display it's time offset
+      VideoSegment videoSegment = textSegment.getSegment();
+      Duration startTimeOffset = videoSegment.getStartTimeOffset();
+      Duration endTimeOffset = videoSegment.getEndTimeOffset();
+      // Display the offset times in seconds, 1e9 is part of the formula to convert nanos to seconds
+      System.out.println(String.format("Start time: %.2f",
+          startTimeOffset.getSeconds() + startTimeOffset.getNanos() / 1e9));
+      System.out.println(String.format("End time: %.2f",
+          endTimeOffset.getSeconds() + endTimeOffset.getNanos() / 1e9));
+
+      // Show the first result for the first frame in the segment.
+      TextFrame textFrame = textSegment.getFrames(0);
+      Duration timeOffset = textFrame.getTimeOffset();
+      System.out.println(String.format("Time offset for the first frame: %.2f",
+          timeOffset.getSeconds() + timeOffset.getNanos() / 1e9));
+
+      // Display the rotated bounding box for where the text is on the frame.
+      System.out.println("Rotated Bounding Box Vertices:");
+      List<NormalizedVertex> vertices = textFrame.getRotatedBoundingBox().getVerticesList();
+      for (NormalizedVertex normalizedVertex : vertices) {
+        System.out.println(String.format(
+            "\tVertex.x: %.2f, Vertex.y: %.2f",
+            normalizedVertex.getX(),
+            normalizedVertex.getY()));
+      }
+      return results;
+    }
+  }
+  // [END video_detect_text]
+
+  // [START video_detect_text_gcs]
+  /**
+   * Detect Text in a video.
+   *
+   * @param gcsUri the path to the video file to analyze.
+   */
+  public static VideoAnnotationResults detectTextGcs(String gcsUri) throws Exception {
+    try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) {
+      // Create the request
+      AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder()
+          .setInputUri(gcsUri)
+          .addFeatures(Feature.TEXT_DETECTION)
+          .build();
+
+      // asynchronously perform object tracking on videos
+      OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> future =
+          client.annotateVideoAsync(request);
+
+      System.out.println("Waiting for operation to complete...");
+      // The first result is retrieved because a single video was processed.
+      AnnotateVideoResponse response = future.get(300, TimeUnit.SECONDS);
+      VideoAnnotationResults results = response.getAnnotationResults(0);
+
+      // Get only the first annotation for demo purposes.
+      TextAnnotation annotation = results.getTextAnnotations(0);
+      System.out.println("Text: " + annotation.getText());
+
+      // Get the first text segment.
+      TextSegment textSegment = annotation.getSegments(0);
+      System.out.println("Confidence: " + textSegment.getConfidence());
+      // For the text segment display it's time offset
+      VideoSegment videoSegment = textSegment.getSegment();
+      Duration startTimeOffset = videoSegment.getStartTimeOffset();
+      Duration endTimeOffset = videoSegment.getEndTimeOffset();
+      // Display the offset times in seconds, 1e9 is part of the formula to convert nanos to seconds
+      System.out.println(String.format("Start time: %.2f",
+          startTimeOffset.getSeconds() + startTimeOffset.getNanos() / 1e9));
+      System.out.println(String.format("End time: %.2f",
+          endTimeOffset.getSeconds() + endTimeOffset.getNanos() / 1e9));
+
+      // Show the first result for the first frame in the segment.
+      TextFrame textFrame = textSegment.getFrames(0);
+      Duration timeOffset = textFrame.getTimeOffset();
+      System.out.println(String.format("Time offset for the first frame: %.2f",
+          timeOffset.getSeconds() + timeOffset.getNanos() / 1e9));
+
+      // Display the rotated bounding box for where the text is on the frame.
+      System.out.println("Rotated Bounding Box Vertices:");
+      List<NormalizedVertex> vertices = textFrame.getRotatedBoundingBox().getVerticesList();
+      for (NormalizedVertex normalizedVertex : vertices) {
+        System.out.println(String.format(
+            "\tVertex.x: %.2f, Vertex.y: %.2f",
+            normalizedVertex.getX(),
+            normalizedVertex.getY()));
+      }
+      return results;
+    }
+  }
+  // [END video_detect_text_gcs]
+}
+
diff --git a/video/src/main/java/com/example/video/TrackObjects.java b/video/src/main/java/com/example/video/TrackObjects.java
new file mode 100644
index 00000000000..61fc6434c7f
--- /dev/null
+++ b/video/src/main/java/com/example/video/TrackObjects.java
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2019 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.video;
+
+import com.google.api.gax.longrunning.OperationFuture;
+import com.google.cloud.videointelligence.v1.AnnotateVideoProgress;
+import com.google.cloud.videointelligence.v1.AnnotateVideoRequest;
+import com.google.cloud.videointelligence.v1.AnnotateVideoResponse;
+import com.google.cloud.videointelligence.v1.Entity;
+import com.google.cloud.videointelligence.v1.Feature;
+import com.google.cloud.videointelligence.v1.NormalizedBoundingBox;
+import com.google.cloud.videointelligence.v1.ObjectTrackingAnnotation;
+import com.google.cloud.videointelligence.v1.ObjectTrackingFrame;
+import com.google.cloud.videointelligence.v1.VideoAnnotationResults;
+import com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient;
+import com.google.cloud.videointelligence.v1.VideoSegment;
+import com.google.protobuf.ByteString;
+
+import com.google.protobuf.Duration;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.concurrent.TimeUnit;
+
+public class TrackObjects {
+
+  // [START video_object_tracking]
+  /**
+   * Track objects in a video.
+   *
+   * @param filePath the path to the video file to analyze.
+   */
+  public static VideoAnnotationResults trackObjects(String filePath) throws Exception {
+    try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) {
+      // Read file
+      Path path = Paths.get(filePath);
+      byte[] data = Files.readAllBytes(path);
+
+      // Create the request
+      AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder()
+          .setInputContent(ByteString.copyFrom(data))
+          .addFeatures(Feature.OBJECT_TRACKING)
+          .setLocationId("us-east1")
+          .build();
+
+      // asynchronously perform object tracking on videos
+      OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> future =
+          client.annotateVideoAsync(request);
+
+      System.out.println("Waiting for operation to complete...");
+      // The first result is retrieved because a single video was processed.
+      AnnotateVideoResponse response = future.get(300, TimeUnit.SECONDS);
+      VideoAnnotationResults results = response.getAnnotationResults(0);
+
+      // Get only the first annotation for demo purposes.
+      ObjectTrackingAnnotation annotation = results.getObjectAnnotations(0);
+      System.out.println("Confidence: " + annotation.getConfidence());
+
+      if (annotation.hasEntity()) {
+        Entity entity = annotation.getEntity();
+        System.out.println("Entity description: " + entity.getDescription());
+        System.out.println("Entity id:: " + entity.getEntityId());
+      }
+
+      if (annotation.hasSegment()) {
+        VideoSegment videoSegment = annotation.getSegment();
+        Duration startTimeOffset = videoSegment.getStartTimeOffset();
+        Duration endTimeOffset = videoSegment.getEndTimeOffset();
+        // Display the segment time in seconds, 1e9 converts nanos to seconds
+        System.out.println(String.format(
+            "Segment: %.2fs to %.2fs",
+            startTimeOffset.getSeconds() + startTimeOffset.getNanos() / 1e9,
+            endTimeOffset.getSeconds() + endTimeOffset.getNanos() / 1e9));
+      }
+
+      // Here we print only the bounding box of the first frame in this segment.
+      ObjectTrackingFrame frame = annotation.getFrames(0);
+      // Display the offset time in seconds, 1e9 converts nanos to seconds
+      Duration timeOffset = frame.getTimeOffset();
+      System.out.println(String.format(
+          "Time offset of the first frame: %.2fs",
+          timeOffset.getSeconds() + timeOffset.getNanos() / 1e9));
+
+      // Display the bounding box of the detected object
+      NormalizedBoundingBox normalizedBoundingBox = frame.getNormalizedBoundingBox();
+      System.out.println("Bounding box position:");
+      System.out.println("\tleft: " + normalizedBoundingBox.getLeft());
+      System.out.println("\ttop: " + normalizedBoundingBox.getTop());
+      System.out.println("\tright: " + normalizedBoundingBox.getRight());
+      System.out.println("\tbottom: " + normalizedBoundingBox.getBottom());
+      return results;
+    }
+  }
+  // [END video_object_tracking]
+
+  // [START video_object_tracking_gcs]
+  /**
+   * Track objects in a video.
+   *
+   * @param gcsUri the path to the video file to analyze.
+   */
+  public static VideoAnnotationResults trackObjectsGcs(String gcsUri) throws Exception {
+    try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) {
+      // Create the request
+      AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder()
+          .setInputUri(gcsUri)
+          .addFeatures(Feature.OBJECT_TRACKING)
+          .setLocationId("us-east1")
+          .build();
+
+      // asynchronously perform object tracking on videos
+      OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> future =
+          client.annotateVideoAsync(request);
+
+      System.out.println("Waiting for operation to complete...");
+      // The first result is retrieved because a single video was processed.
+      AnnotateVideoResponse response = future.get(300, TimeUnit.SECONDS);
+      VideoAnnotationResults results = response.getAnnotationResults(0);
+
+      // Get only the first annotation for demo purposes.
+      ObjectTrackingAnnotation annotation = results.getObjectAnnotations(0);
+      System.out.println("Confidence: " + annotation.getConfidence());
+
+      if (annotation.hasEntity()) {
+        Entity entity = annotation.getEntity();
+        System.out.println("Entity description: " + entity.getDescription());
+        System.out.println("Entity id:: " + entity.getEntityId());
+      }
+
+      if (annotation.hasSegment()) {
+        VideoSegment videoSegment = annotation.getSegment();
+        Duration startTimeOffset = videoSegment.getStartTimeOffset();
+        Duration endTimeOffset = videoSegment.getEndTimeOffset();
+        // Display the segment time in seconds, 1e9 converts nanos to seconds
+        System.out.println(String.format(
+            "Segment: %.2fs to %.2fs",
+            startTimeOffset.getSeconds() + startTimeOffset.getNanos() / 1e9,
+            endTimeOffset.getSeconds() + endTimeOffset.getNanos() / 1e9));
+      }
+
+      // Here we print only the bounding box of the first frame in this segment.
+      ObjectTrackingFrame frame = annotation.getFrames(0);
+      // Display the offset time in seconds, 1e9 converts nanos to seconds
+      Duration timeOffset = frame.getTimeOffset();
+      System.out.println(String.format(
+          "Time offset of the first frame: %.2fs",
+          timeOffset.getSeconds() + timeOffset.getNanos() / 1e9));
+
+      // Display the bounding box of the detected object
+      NormalizedBoundingBox normalizedBoundingBox = frame.getNormalizedBoundingBox();
+      System.out.println("Bounding box position:");
+      System.out.println("\tleft: " + normalizedBoundingBox.getLeft());
+      System.out.println("\ttop: " + normalizedBoundingBox.getTop());
+      System.out.println("\tright: " + normalizedBoundingBox.getRight());
+      System.out.println("\tbottom: " + normalizedBoundingBox.getBottom());
+      return results;
+    }
+  }
+  // [END video_object_tracking_gcs]
+}
+
+
diff --git a/video/src/test/java/com/example/video/DetectIT.java b/video/src/test/java/com/example/video/DetectIT.java
index 1404ec66888..37cc580845d 100644
--- a/video/src/test/java/com/example/video/DetectIT.java
+++ b/video/src/test/java/com/example/video/DetectIT.java
@@ -18,8 +18,13 @@
 
 import static com.google.common.truth.Truth.assertThat;
 
+import com.google.cloud.videointelligence.v1.ObjectTrackingAnnotation;
+import com.google.cloud.videointelligence.v1.TextAnnotation;
+import com.google.cloud.videointelligence.v1.VideoAnnotationResults;
 import java.io.ByteArrayOutputStream;
 import java.io.PrintStream;
+import java.util.Arrays;
+import java.util.List;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -39,6 +44,9 @@ public class DetectIT {
   static final String EXPLICIT_CONTENT_LOCATION =  "gs://demomaker/cat.mp4";
   static final String SPEECH_GCS_LOCATION =
           "gs://java-docs-samples-testing/video/googlework_short.mp4";
+  private static final List<String> POSSIBLE_TEXTS = Arrays.asList(
+      "Google", "SUR", "SUR", "ROTO", "Vice President", "58oo9", "LONDRES", "OMAR", "PARIS",
+      "METRO", "RUE", "CARLO");
 
   @Before
   public void setUp() {
@@ -95,4 +103,68 @@ public void testSpeechTranscription() throws Exception {
 
     assertThat(got).contains("cultural");
   }
+
+  @Test
+  public void testTrackObjects() throws Exception {
+    VideoAnnotationResults result = TrackObjects.trackObjects("resources/cat.mp4");
+
+    boolean textExists = false;
+    for (ObjectTrackingAnnotation objectTrackingAnnotation : result.getObjectAnnotationsList()) {
+      if (objectTrackingAnnotation.getEntity().getDescription().toUpperCase().contains("CAT")) {
+        textExists = true;
+        break;
+      }
+    }
+
+    assertThat(textExists).isTrue();
+  }
+
+  @Test
+  public void testTrackObjectsGcs() throws Exception {
+    VideoAnnotationResults result = TrackObjects.trackObjectsGcs("gs://demomaker/cat.mp4");
+
+    boolean textExists = false;
+    for (ObjectTrackingAnnotation objectTrackingAnnotation : result.getObjectAnnotationsList()) {
+      if (objectTrackingAnnotation.getEntity().getDescription().toUpperCase().contains("CAT")) {
+        textExists = true;
+        break;
+      }
+    }
+
+    assertThat(textExists).isTrue();
+  }
+
+  @Test
+  public void testTextDetection() throws Exception {
+    VideoAnnotationResults result = TextDetection.detectText("resources/googlework_short.mp4");
+
+    boolean textExists = false;
+    for (TextAnnotation textAnnotation : result.getTextAnnotationsList()) {
+      for (String possibleText : POSSIBLE_TEXTS) {
+        if (textAnnotation.getText().toUpperCase().contains(possibleText.toUpperCase())) {
+          textExists = true;
+          break;
+        }
+      }
+    }
+
+    assertThat(textExists).isTrue();
+  }
+
+  @Test
+  public void testTextDetectionGcs() throws Exception {
+    VideoAnnotationResults result = TextDetection.detectTextGcs(SPEECH_GCS_LOCATION);
+
+    boolean textExists = false;
+    for (TextAnnotation textAnnotation : result.getTextAnnotationsList()) {
+      for (String possibleText : POSSIBLE_TEXTS) {
+        if (textAnnotation.getText().toUpperCase().contains(possibleText.toUpperCase())) {
+          textExists = true;
+          break;
+        }
+      }
+    }
+
+    assertThat(textExists).isTrue();
+  }
 }