opencv · bluehatch · Jun 27, 2024 · Jun 28, 2024 · Jun 28, 2024 · opencv-alalek
diff --git a/.gitignore b/.gitignore
@@ -3,7 +3,12 @@
 **/__pycache__/**
 
 .vscode
+.idea
 
 build/
 **/build
 **/build/**
+
+target/
+**/target
+**/target/**
diff --git a/models/object_tracking_vittrack/README.md b/models/object_tracking_vittrack/README.md
@@ -40,6 +40,21 @@ cmake --build build
 ./build/opencv_zoo_object_tracking_vittrack -h
 ```
 
+## Java
+
+Install Maven to get started.
+
+```shell
+# tracking on camera input
+mvn compile exec:java -q
+
+# tracking on video
+mvn compile exec:java -q -Dexec.args="-i /path/to/video"
+
+# get help messages
+mvn compile exec:java -q -Dexec.args="-h"
+```
+
 # Example outputs
 
 <img src="example_outputs/vittrack_demo.gif" style="zoom:200%;" />

diff --git a/models/object_tracking_vittrack/demo.java b/models/object_tracking_vittrack/demo.java
@@ -0,0 +1,206 @@
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import com.beust.jcommander.UnixStyleUsageFormatter;
+import org.bytedeco.javacpp.BytePointer;
+import org.bytedeco.opencv.global.opencv_dnn;
+import org.bytedeco.opencv.opencv_core.*;
+import org.bytedeco.opencv.opencv_video.TrackerVit;
+import org.bytedeco.opencv.opencv_videoio.VideoCapture;
+import org.bytedeco.opencv.opencv_videoio.VideoWriter;
+
+import static org.bytedeco.opencv.global.opencv_highgui.*;
+import static org.bytedeco.opencv.global.opencv_imgproc.*;
+import static org.bytedeco.opencv.global.opencv_videoio.CAP_PROP_FPS;
+
+public class demo {
+
+    // Valid combinations of backends and targets
+    static int[][] backendTargetPairs = {
+            {opencv_dnn.DNN_BACKEND_OPENCV, opencv_dnn.DNN_TARGET_CPU},
+            {opencv_dnn.DNN_BACKEND_CUDA, opencv_dnn.DNN_TARGET_CUDA},
+            {opencv_dnn.DNN_BACKEND_CUDA, opencv_dnn.DNN_TARGET_CUDA_FP16},
+            {opencv_dnn.DNN_BACKEND_TIMVX, opencv_dnn.DNN_TARGET_NPU},
+            {opencv_dnn.DNN_BACKEND_CANN, opencv_dnn.DNN_TARGET_NPU}
+    };
+
+    static class Args {
+        @Parameter(names = {"--help", "-h"}, order = 0, help = true,
+                description = "Print help message.")
+        boolean help;
+        @Parameter(names = {"--input", "-i"}, order = 1,
+                description = "Set path to the input video. Omit for using default camera.")
+        String input;
+        @Parameter(names = {"--model_path", "-m"}, order = 2,
+                description = "Set model path.")
+        String modelPath = "object_tracking_vittrack_2023sep.onnx";
+        @Parameter(names = {"--backend_target", "-bt"}, order = 3,
+                description = "Choose one of the backend-target pair to run this demo:" +
+                        " 0: OpenCV implementation + CPU," +
+                        " 1: CUDA + GPU (CUDA), " +
+                        " 2: CUDA + GPU (CUDA FP16)," +
+                        " 3: TIM-VX + NPU," +
+                        " 4: CANN + NPU")
+        int backendTarget = 0;
+        @Parameter(names = {"--save", "-s"}, order = 4,
+                description = "Specify to save a file with results.")
+        boolean save;
+        @Parameter(names = {"--vis", "-v"}, order = 5, arity = 1,
+                description = "Specify to open a new window to show results.")
+        boolean vis = true;
+    }
+
+    static class TrackingResult {
+        boolean isLocated;
+        Rect bbox;
+        float score;
+    }
+
+    static class VitTrack {
+        private final TrackerVit model;
+
+        VitTrack(String modelPath, int backendId, int targetId) {
+            final TrackerVit.Params params = new TrackerVit.Params();
+            params.net(new BytePointer(modelPath))
+                    .backend(backendId)
+                    .target(targetId);
+            model = TrackerVit.create(params);
+        }
+
+        void init(Mat image, Rect roi) {
+            model.init(image, roi);
+        }
+
+        TrackingResult infer(Mat image) {
+            final TrackingResult result = new TrackingResult();
+            result.bbox = new Rect();
+            result.isLocated = model.update(image, result.bbox);
+            result.score = model.getTrackingScore();
+            return result;
+        }
+    }
+
+    static Mat visualize(Mat image, Rect bbox, float score, boolean isLocated, double fps, Scalar boxColor,
+                         Scalar textColor, double fontScale, int fontSize) {
+        final Mat output = image.clone();
+        final int h = output.rows();
+        final int w = output.cols();
+        if (fps >= 0) {
+            putText(output, String.format("FPS: %.2f", fps), new Point(0, 30), FONT_HERSHEY_DUPLEX, fontScale,
+                    textColor);
+        }
+
+        if (isLocated && score >= 0.3) {
+            rectangle(output, bbox, boxColor, 2, LINE_8, 0);
+            putText(output, String.format("%.2f", score), new Point(bbox.x(), bbox.y() + 25),
+                    FONT_HERSHEY_DUPLEX, fontScale, textColor, fontSize, LINE_8, false);
+        } else {
+            final Size textSize = getTextSize("Target lost!", FONT_HERSHEY_DUPLEX, fontScale, fontSize, new int[]{0});
+            final int textX = (w - textSize.width()) / 2;
+            final int textY = (h - textSize.height()) / 2;
+            putText(output, "Target lost!", new Point(textX, textY), FONT_HERSHEY_DUPLEX,
+                    fontScale, new Scalar(0, 0, 255, 0), fontSize, LINE_8, false);
+        }
+
+        return output;
+    }
+
+    /**
+     * Execute: mvn compile exec:java -q -Dexec.args=""
+     */
+    public static void main(String[] argv) {
+        final Args args = new Args();
+        final JCommander jc = JCommander.newBuilder()
+                .addObject(args)
+                .build();
+        jc.setUsageFormatter(new UnixStyleUsageFormatter(jc));
+        jc.parse(argv);
+        if (args.help) {
+            jc.usage();
+            return;
+        }
+        final int backendId = backendTargetPairs[args.backendTarget][0];
+        final int targetId = backendTargetPairs[args.backendTarget][1];
+        VitTrack tracker = new VitTrack(args.modelPath, backendId, targetId);
+
+        final VideoCapture video = new VideoCapture();
+        if (args.input == null) {
+            video.open(0);
+        } else {
+            video.open(args.input);
+        }
+        if (!video.isOpened()) {
+            System.err.println("Error: Could not open video source");
+            return;
+        }
+
+        Mat firstFrame = new Mat();
+        video.read(firstFrame);
+
+        if (firstFrame.empty()) {
+            System.err.println("No frames grabbed!");
+            return;
+        }
+
+        Mat firstFrameCopy = firstFrame.clone();
+        putText(firstFrameCopy, "1. Drag a bounding box to track.", new Point(0, 25), FONT_HERSHEY_SIMPLEX, 1, new Scalar(0, 255, 0, 0));
+        putText(firstFrameCopy, "2. Press ENTER to confirm", new Point(0, 50), FONT_HERSHEY_SIMPLEX, 1, new Scalar(0, 255, 0, 0));
+        final Rect roi = selectROI("VitTrack Demo", firstFrameCopy);
+
+        if (roi.area() == 0) {
+            System.err.println("No ROI is selected! Exiting...");
+            return;
+        } else {
+            System.out.printf("Selected ROI: (x: %d, y: %d, width: %d, height: %d)%n", roi.x(), roi.y(), roi.width(),
+                    roi.height());
+        }
+
+        // Create VideoWriter if save option is specified
+        final VideoWriter outputVideo = new VideoWriter();
+        if (args.save) {
+            final Size frameSize = firstFrame.size();
+            outputVideo.open("output.mp4", VideoWriter.fourcc((byte) 'm', (byte) 'p', (byte) '4', (byte) 'v'),
+                    video.get(CAP_PROP_FPS), frameSize);
+            if (!outputVideo.isOpened()) {
+                System.err.println("Error: Could not create output video stream");
+                return;
+            }
+        }
+
+        // Initialize tracker with ROI
+        tracker.init(firstFrame, roi);
+
+        // Track frame by frame
+        final TickMeter tm = new TickMeter();
+        while (waitKey(1) < 0) {
+            video.read(firstFrame);
+            if (firstFrame.empty()) {
+                System.out.println("End of video");
+                break;
+            }
+
+            // Inference
+            tm.start();
+            final TrackingResult result = tracker.infer(firstFrame);
+            tm.stop();
+
+            // Visualize
+            Mat frame = firstFrame.clone();
+            frame = visualize(frame, result.bbox, result.score, result.isLocated, tm.getFPS(),
+                    new Scalar(0, 255, 0, 0), new Scalar(0, 255, 0, 0), 1.0, 1);
+
+            if (args.save) {
+                outputVideo.write(frame);
+            }
+            if (args.vis) {
+                imshow("VitTrack Demo", frame);
+            }
+            tm.reset();
+        }
+        if (args.save) {
+            outputVideo.release();
+        }
+
+        video.release();
+    }
+
+}
diff --git a/models/object_tracking_vittrack/pom.xml b/models/object_tracking_vittrack/pom.xml
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>opencv_zoo</groupId>
+        <artifactId>demo</artifactId>
+        <version>1.0.0-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>object_tracking_vittrack</artifactId>
+
+    <build>
+        <sourceDirectory>${project.basedir}</sourceDirectory>
+        <plugins>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>exec-maven-plugin</artifactId>
+                <version>3.3.0</version>
+                <configuration>
+                    <executable>java</executable>
+                    <mainClass>demo</mainClass>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
diff --git a/models/pom.xml b/models/pom.xml
@@ -0,0 +1,98 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <groupId>opencv_zoo</groupId>
+    <artifactId>demo</artifactId>
+    <version>1.0.0-SNAPSHOT</version>
+    <name>OpenCV Zoo demo application</name>
+    <packaging>pom</packaging>
+
+    <build>
+        <sourceDirectory>${project.basedir}</sourceDirectory>
+        <plugins>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>exec-maven-plugin</artifactId>
+                <version>3.3.0</version>
+                <configuration>
+                    <executable>java</executable>
+                    <mainClass>demo</mainClass>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+    <modules>
+        <module>object_tracking_vittrack</module>
+        <module>text_detection_ppocr</module>
+    </modules>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.bytedeco</groupId>
+            <artifactId>javacv-platform</artifactId>
+            <version>1.5.10</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.bytedeco</groupId>
+                    <artifactId>flycapture-platform</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.bytedeco</groupId>
+                    <artifactId>libdc1394-platform</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.bytedeco</groupId>
+                    <artifactId>libfreenect-platform</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.bytedeco</groupId>
+                    <artifactId>libfreenect2-platform</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.bytedeco</groupId>
+                    <artifactId>librealsense-platform</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.bytedeco</groupId>
+                    <artifactId>librealsense2-platform</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.bytedeco</groupId>
+                    <artifactId>videoinput-platform</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.bytedeco</groupId>
+                    <artifactId>artoolkitplus-platform</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.bytedeco</groupId>
+                    <artifactId>leptonica-platform</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.bytedeco</groupId>
+                    <artifactId>tesseract-platform</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>org.bytedeco</groupId>
+            <artifactId>opencv-platform-gpu</artifactId>
+            <version>4.9.0-1.5.10</version>
+        </dependency>
+        <dependency>
+            <groupId>org.bytedeco</groupId>
+            <artifactId>cuda-platform-redist</artifactId>
+            <version>12.3-8.9-1.5.10</version>
+        </dependency>
+        <dependency>
+            <groupId>com.beust</groupId>
+            <artifactId>jcommander</artifactId>
+            <version>1.82</version>
+        </dependency>
+    </dependencies>
+</project>
diff --git a/models/text_detection_ppocr/README.md b/models/text_detection_ppocr/README.md
@@ -43,6 +43,19 @@ cmake --build build
 ./build/opencv_zoo_text_detection_ppocr -h
 ```
 
+### Java
+
+Install Maven to get started with:
+
+```shell
+# detect on camera input
+mvn compile exec:java -q
+# detect on an image
+mvn compile exec:java -q -Dexec.args="--input /path/to/image -v"
+# get help messages
+mvn compile exec:java -q -Dexec.args="--help"
+```
+
 ### Example outputs
 
 ![mask](./example_outputs/mask.jpg)