Merged commit includes the following changes:

-- 435451085 by Waymo Research: Add depth in the camera projection. -- 435546971 by Waymo Research: Internal change -- 436651324 by Waymo Research: Add Localization Error Tolerant (LET) Metrics, including LET-3D-AP and LET-3D-APL. -- 437126042 by Waymo Research: Remove lateral error constraint in LET metrics. --
DavidsShi · Apr 1, 2022 · 7fd7da0 · 7fd7da0
1 parent acd2caf
commit 7fd7da0
Show file tree

Hide file tree

Showing 18 changed files with 1,060 additions and 83 deletions.
diff --git a/third_party/camera/camera_model.cc b/third_party/camera/camera_model.cc
@@ -339,12 +339,13 @@ void CameraModel::PrepareProjection(const CameraImage& camera_image) {
 // coordinate space instead of going to the distortion space. The testing
 // results show that we get sufficiently good results already in normalized
 // coordinate space.
-bool CameraModel::WorldToImage(double x, double y, double z,
-                               bool check_image_bounds, double* u_d,
-                               double* v_d) const {
+bool CameraModel::WorldToImageWithDepth(double x, double y, double z,
+                                        bool check_image_bounds, double* u_d,
+                                        double* v_d, double* depth) const {
   if (calibration_.rolling_shutter_direction() ==
       CameraCalibration::GLOBAL_SHUTTER) {
-    return WorldToImageGlobalShutter(x, y, z, check_image_bounds, u_d, v_d);
+    return WorldToImageWithDepthGlobalShutter(x, y, z, check_image_bounds, u_d,
+                                              v_d, depth);
   }
 
   // The initial guess is the center of the image.
@@ -361,10 +362,16 @@ bool CameraModel::WorldToImage(double x, double y, double z,
   Eigen::Vector2d normalized_coord;
   double residual = 2 * kThreshold;
   double jacobian = 0.;
+  double point_depth = -1;
 
   while (std::fabs(residual) > kThreshold && iter_num < kMaxIterNum) {
-    if (!ComputeResidualAndJacobian(n_pos_f, t_h, &normalized_coord, &residual,
-                                    &jacobian)) {
+    if (!ComputeDepthResidualAndJacobian(n_pos_f, t_h, &normalized_coord,
+                                         &point_depth, &residual, &jacobian)) {
+      // The point is behind camera or the radial distortion is too large.
+      *u_d = -1;
+      *v_d = -1;
+      if (depth)
+        *depth = -1;
       return false;
     }
 
@@ -375,11 +382,20 @@ bool CameraModel::WorldToImage(double x, double y, double z,
   }
 
   // Get normalized coordinate.
-  if (!ComputeResidualAndJacobian(n_pos_f, t_h, &normalized_coord, &residual,
-                                  /*jacobian=*/nullptr)) {
+  if (!ComputeDepthResidualAndJacobian(n_pos_f, t_h, &normalized_coord,
+                                       &point_depth, &residual,
+                                       /*jacobian=*/nullptr)) {
+    // The point is behind camera or the radial distortion is too large.
+    *u_d = -1;
+    *v_d = -1;
+    if (depth)
+      *depth = -1;
     return false;
   }
 
+  if (depth)
+    *depth = point_depth;
+
   if (!DirectionToImage(normalized_coord(0), normalized_coord(1), u_d, v_d)) {
     return false;
   }
@@ -392,6 +408,13 @@ bool CameraModel::WorldToImage(double x, double y, double z,
   return true;
 }
 
+bool CameraModel::WorldToImage(double x, double y, double z,
+                               bool check_image_bounds, double* u_d,
+                               double* v_d) const {
+  return WorldToImageWithDepth(
+      x, y, z, check_image_bounds, u_d, v_d, nullptr);
+}
+
 void CameraModel::ImageToWorld(double u_d, double v_d, double depth, double* x,
                                double* y, double* z) const {
   if (calibration_.rolling_shutter_direction() ==
@@ -440,40 +463,60 @@ void CameraModel::ImageToWorldGlobalShutter(double u_d, double v_d,
   *z = wp(2);
 }
 
-bool CameraModel::CameraToImage(double x, double y, double z,
-                                bool check_image_bounds, double* u_d,
-                                double* v_d) const {
+bool CameraModel::CameraToImageWithDepth(double x, double y, double z,
+                                         bool check_image_bounds, double* u_d,
+                                         double* v_d, double* depth) const {
   // Return if the 3D point is behind the camera.
   if (x <= 0.0) {
     *u_d = -1.0;
     *v_d = -1.0;
+    if (depth)
+      *depth = -1.0;
     return false;
   }
 
   // Convert the 3D point to a direction vector. If the distortion is out of
   // the limits, still compute u_d and v_d but return false.
   const double u = -y / x;
   const double v = -z / x;
+  if (depth)
+    *depth = x;
   if (!DirectionToImage(u, v, u_d, v_d)) return false;
 
   // If requested, check if the projected pixel is inside the image.
   return check_image_bounds ? InImage(*u_d, *v_d) : true;
 }
 
+bool CameraModel::CameraToImage(double x, double y, double z,
+                                bool check_image_bounds, double* u_d,
+                                double* v_d) const {
+  return CameraToImageWithDepth(x, y, z, check_image_bounds, u_d, v_d, nullptr);
+}
+
 bool CameraModel::InImage(double u, double v) const {
   const double max_u = static_cast<double>(calibration_.width());
   const double max_v = static_cast<double>(calibration_.height());
   return u >= 0.0 && u < max_u && v >= 0.0 && v < max_v;
 }
 
-bool CameraModel::WorldToImageGlobalShutter(double x, double y, double z,
-                                            bool check_image_bounds,
-                                            double* u_d, double* v_d) const {
+bool CameraModel::WorldToImageWithDepthGlobalShutter(double x, double y,
+                                                     double z,
+                                                     bool check_image_bounds,
+                                                     double* u_d, double* v_d,
+                                                     double* depth) const {
   CHECK(u_d);
   CHECK(v_d);
   const Eigen::Vector3d cp =
       global_shutter_state_->cam_tfm_n * Eigen::Vector3d(x, y, z);
-  return CameraToImage(cp(0), cp(1), cp(2), check_image_bounds, u_d, v_d);
+  return CameraToImageWithDepth(cp(0), cp(1), cp(2), check_image_bounds, u_d,
+                                v_d, depth);
+}
+
+bool CameraModel::WorldToImageGlobalShutter(double x, double y, double z,
+                                            bool check_image_bounds,
+                                            double* u_d, double* v_d) const {
+  return WorldToImageWithDepthGlobalShutter(x, y, z, check_image_bounds, u_d,
+                                            v_d, nullptr);
 }
 
 void CameraModel::ImageToDirection(double u_d, double v_d, double* u_n,
@@ -537,12 +580,12 @@ bool CameraModel::DirectionToImage(double u_n, double v_n, double* u_d,
   return true;
 }
 
-bool CameraModel::ComputeResidualAndJacobian(const Eigen::Vector3d& n_pos_f,
-                                             double t_h,
-                                             Eigen::Vector2d* normalized_coord,
-                                             double* residual,
-                                             double* jacobian) const {
+bool CameraModel::ComputeDepthResidualAndJacobian(
+    const Eigen::Vector3d& n_pos_f, double t_h,
+    Eigen::Vector2d* normalized_coord, double* depth, double* residual,
+    double* jacobian) const {
   // The jacobian is allowed to be a nullptr.
+  // The depth is allowed to be a nullptr.
   CHECK(normalized_coord);
   CHECK(residual);
   CHECK(rolling_shutter_state_);
@@ -562,6 +605,8 @@ bool CameraModel::ComputeResidualAndJacobian(const Eigen::Vector3d& n_pos_f,
 
   (*normalized_coord)(0) = -cam_pos_f(1) / cam_pos_f(0);
   (*normalized_coord)(1) = -cam_pos_f(2) / cam_pos_f(0);
+  if (depth)
+    *depth = cam_pos_f(0);
 
   const double normalized_spacing =
       rolling_shutter_state.readout_horizontal_direction
@@ -592,5 +637,14 @@ bool CameraModel::ComputeResidualAndJacobian(const Eigen::Vector3d& n_pos_f,
   return true;
 }
 
+bool CameraModel::ComputeResidualAndJacobian(const Eigen::Vector3d& n_pos_f,
+                                             double t_h,
+                                             Eigen::Vector2d* normalized_coord,
+                                             double* residual,
+                                             double* jacobian) const {
+  return ComputeDepthResidualAndJacobian(n_pos_f, t_h, normalized_coord,
+                                         nullptr, residual, jacobian);
+}
+
 }  // namespace open_dataset
 }  // namespace waymo
diff --git a/third_party/camera/camera_model.h b/third_party/camera/camera_model.h
@@ -74,6 +74,13 @@ class CameraModel {
   bool WorldToImage(double x, double y, double z, bool check_image_bounds,
                     double* u_d, double* v_d) const;
 
+  // Projects a 3D point in global coordinates into the lens distorted image
+  // coordinates (u_d, v_d, depth). These projections are in original image
+  // frame (x: image width, y: image height).
+  bool WorldToImageWithDepth(double x, double y, double z,
+                             bool check_image_bounds, double* u_d, double* v_d,
+                             double* depth) const;
+
   // Converts a point in the image with a known depth into world coordinates.
   // Similar as `WorldToImage`. This method also compensates for rolling shutter
   // effect if applicable.
@@ -89,16 +96,22 @@ class CameraModel {
 
  private:
   // Projects a point in the 3D camera frame into the lens distorted image
-  // coordinates (u_d, v_d).
+  // coordinates (u_d, v_d) and the corresponding depth.
   //
   // Returns false if the point is behind the camera or if the coordinates
   // cannot be trusted because the radial distortion is too large. When the
-  // point is not within the field of view of the camera, u_d, v_d are still
-  // assigned meaningful values. If the point is in front of the camera image
-  // plane, actual u_d and v_d values are calculated.
+  // point is not within the field of view of the camera, u_d, v_d and depth
+  // are still assigned meaningful values. If the point is in front of the
+  // camera image plane, actual u_d and v_d values are calculated.
   //
   // If the flag check_image_bounds is true, also returns false if the point is
   // not within the field of view of the camera.
+  bool CameraToImageWithDepth(double x, double y, double z,
+                              bool check_image_bounds, double* u_d, double* v_d,
+                              double* depth) const;
+
+  // Projects a point in the 3D camera frame into the lens distorted image
+  // coordinates (u_d, v_d).
   bool CameraToImage(double x, double y, double z, bool check_image_bounds,
                      double* u_d, double* v_d) const;
 
@@ -107,6 +120,13 @@ class CameraModel {
   bool WorldToImageGlobalShutter(double x, double y, double z,
                                  bool check_image_bounds, double* u_d,
                                  double* v_d) const;
+
+  // Similar as `WorldToImageWithDepth` but only for global shutter.
+  // Requires: `PrepareProjection` is called.
+  bool WorldToImageWithDepthGlobalShutter(double x, double y, double z,
+                                          bool check_image_bounds, double* u_d,
+                                          double* v_d, double* depth) const;
+
   // Similar as `ImageToWorld` but only for global shutter.
   // Requires: `PrepareProjection` is called.
   void ImageToWorldGlobalShutter(double u_d, double v_d, double depth,
@@ -125,12 +145,19 @@ class CameraModel {
   // This is a helper function for rolling shutter projection.
   // It takes the rolling shutter state variable, position of landmark in ENU
   // frame, estimated time t_h, and computes projected feature in normalized
-  // coordinate frame, the residual and the jacobian.
+  // coordinate frame, the corresponding depth, the residual and the jacobian.
   // If the jacobian is given as nullptr we will skip its computation.
   bool ComputeResidualAndJacobian(const Eigen::Vector3d& n_pos_f, double t_h,
                                   Eigen::Vector2d* normalized_coord,
                                   double* residual, double* jacobian) const;
 
+  // Similar as `ComputeResidualAndJacobian` but returns depth.
+  bool ComputeDepthResidualAndJacobian(const Eigen::Vector3d& n_pos_f,
+                                       double t_h,
+                                       Eigen::Vector2d* normalized_coord,
+                                       double* depth, double* residual,
+                                       double* jacobian) const;
+
   // Forward declaration of an internal state used for global shutter projection
   // computation.
   struct GlobalShutterState;

diff --git a/third_party/camera/camera_model_test.cc b/third_party/camera/camera_model_test.cc
@@ -156,6 +156,121 @@ TEST_F(CameraModelTest, GlobalShutter) {
   EXPECT_NEAR(z, 84.46586, 0.1);
 }
 
+TEST_F(CameraModelTest, WorldToImageBehindCameraRollingShutter) {
+  CameraModel camera_model(calibration_);
+  camera_model.PrepareProjection(camera_image_);
+
+  double x, y, z;
+  camera_model.ImageToWorld(100, 1000, 20, &x, &y, &z);
+  EXPECT_NEAR(x, -4091.97180, 0.1);
+  EXPECT_NEAR(y, 11527.48092, 0.1);
+  EXPECT_NEAR(z, 84.46586, 0.1);
+
+  double x_shift = x + 200;
+  double u_d, v_d;
+  EXPECT_FALSE(camera_model.WorldToImage(x_shift, y, z,
+                                         /*check_image_bounds=*/true, &u_d,
+                                         &v_d));
+  EXPECT_NEAR(u_d, -1, 1e-3);
+  EXPECT_NEAR(v_d, -1, 1e-3);
+}
+
+TEST_F(CameraModelTest, WorldToImageBehindCameraGlobalShutter) {
+  calibration_.set_rolling_shutter_direction(CameraCalibration::GLOBAL_SHUTTER);
+  CameraModel camera_model(calibration_);
+  camera_model.PrepareProjection(camera_image_);
+
+  double x, y, z;
+  camera_model.ImageToWorld(100, 1000, 20, &x, &y, &z);
+  EXPECT_NEAR(x, -4091.97180, 0.1);
+  EXPECT_NEAR(y, 11527.48092, 0.1);
+  EXPECT_NEAR(z, 84.46586, 0.1);
+
+  double x_shift = x + 200;
+  double u_d, v_d;
+  EXPECT_FALSE(camera_model.WorldToImage(x_shift, y, z,
+                                         /*check_image_bounds=*/true, &u_d,
+                                         &v_d));
+  EXPECT_NEAR(u_d, -1, 1e-3);
+  EXPECT_NEAR(v_d, -1, 1e-3);
+}
+
+TEST_F(CameraModelTest, RollingShutterWithDepth) {
+  CameraModel camera_model(calibration_);
+  camera_model.PrepareProjection(camera_image_);
+
+  double x, y, z;
+  camera_model.ImageToWorld(100, 1000, 20, &x, &y, &z);
+
+  double u_d, v_d, depth;
+  EXPECT_TRUE(camera_model.WorldToImageWithDepth(
+      x, y, z, /*check_image_bounds=*/true, &u_d, &v_d, &depth));
+  EXPECT_NEAR(u_d, 100, 0.1);
+  EXPECT_NEAR(v_d, 1000, 0.1);
+  EXPECT_NEAR(depth, 20, 1e-3);
+  EXPECT_NEAR(x, -4091.88016, 0.1);
+  EXPECT_NEAR(y, 11527.42299, 0.1);
+  EXPECT_NEAR(z, 84.46667, 0.1);
+}
+
+TEST_F(CameraModelTest, GlobalShutterWithDepth) {
+  calibration_.set_rolling_shutter_direction(CameraCalibration::GLOBAL_SHUTTER);
+  CameraModel camera_model(calibration_);
+  camera_model.PrepareProjection(camera_image_);
+
+  double x, y, z;
+  camera_model.ImageToWorld(100, 1000, 20, &x, &y, &z);
+
+  double u_d, v_d, depth;
+  EXPECT_TRUE(camera_model.WorldToImageWithDepth(
+      x, y, z, /*check_image_bounds=*/true, &u_d, &v_d, &depth));
+  EXPECT_NEAR(u_d, 100, 0.1);
+  EXPECT_NEAR(v_d, 1000, 0.1);
+  EXPECT_NEAR(depth, 20, 1e-3);
+  EXPECT_NEAR(x, -4091.97180, 0.1);
+  EXPECT_NEAR(y, 11527.48092, 0.1);
+  EXPECT_NEAR(z, 84.46586, 0.1);
+}
+
+TEST_F(CameraModelTest, WorldToImageWithDepthBehindCameraRollingShutter) {
+  CameraModel camera_model(calibration_);
+  camera_model.PrepareProjection(camera_image_);
+
+  double x, y, z;
+  camera_model.ImageToWorld(100, 1000, 20, &x, &y, &z);
+  EXPECT_NEAR(x, -4091.97180, 0.1);
+  EXPECT_NEAR(y, 11527.48092, 0.1);
+  EXPECT_NEAR(z, 84.46586, 0.1);
+
+  double x_shift = x + 200;
+  double u_d, v_d, depth;
+  EXPECT_FALSE(camera_model.WorldToImageWithDepth(
+      x_shift, y, z, /*check_image_bounds=*/true, &u_d, &v_d, &depth));
+  EXPECT_NEAR(u_d, -1, 1e-3);
+  EXPECT_NEAR(v_d, -1, 1e-3);
+  EXPECT_NEAR(depth, -1, 1e-3);
+}
+
+TEST_F(CameraModelTest, WorldToImageWithDepthBehindCameraGlobalShutter) {
+  calibration_.set_rolling_shutter_direction(CameraCalibration::GLOBAL_SHUTTER);
+  CameraModel camera_model(calibration_);
+  camera_model.PrepareProjection(camera_image_);
+
+  double x, y, z;
+  camera_model.ImageToWorld(100, 1000, 20, &x, &y, &z);
+  EXPECT_NEAR(x, -4091.97180, 0.1);
+  EXPECT_NEAR(y, 11527.48092, 0.1);
+  EXPECT_NEAR(z, 84.46586, 0.1);
+
+  double x_shift = x + 200;
+  double u_d, v_d, depth;
+  EXPECT_FALSE(camera_model.WorldToImageWithDepth(
+      x_shift, y, z, /*check_image_bounds=*/true, &u_d, &v_d, &depth));
+  EXPECT_NEAR(u_d, -1, 1e-3);
+  EXPECT_NEAR(v_d, -1, 1e-3);
+  EXPECT_NEAR(depth, -1, 1e-3);
+}
+
 TEST_F(CameraModelTest, SubPixelChangeInPrinciplePointChangesPoseTimeOffset) {
   int center_x = static_cast<int>(calibration_.intrinsic(2));
   int center_y = static_cast<int>(calibration_.intrinsic(3));