utils.py

# -- coding: utf-8 --

"""DNPC/utils.py: Utility code for DNPC Method."""

import math
from dataclasses import dataclass

import numpy as np
import torch
from torch.amp import custom_fwd, custom_bwd

from Cameras.Base import BaseCamera
from Cameras.Perspective import PerspectiveCamera
import Framework
from Datasets.Base import BaseDataset
from Methods.Base.Model import BaseModel


def div_round_up(a: float, b: float) -> int:
    """Round up division.

    Args:
        a (float): Dvidend
        b (float): Divisor

    Returns:
        int: Quotient, rounded up to the nearest integer
    """
    return (a + b - 1) // b


# Helper functions for weight decay on grid features
def next_multiple(a, b):
    return div_round_up(a, b) * b


def grid_scale(level, log2_per_level_scale, base_resolution):
    return 2 ** (level * log2_per_level_scale) * base_resolution - 1


def grid_resolution(grid_scale):
    return math.ceil(grid_scale) + 1


def normalize(v: torch.Tensor, eps: float = 1.0e-6) -> torch.Tensor:
    """Normalize a tensor along the last dimension.

    Args:
        v (torch.Tensor): Input Tensor.
        eps (float, optional): Epsilon. Defaults to 1.0e-6.

    Returns:
        torch.Tensor: Normalized Tensor.
    """
    return torch.nn.functional.normalize(v, dim=-1, p=2, eps=eps)


def blendBackgroundColor(camera: BaseCamera, rgb: torch.Tensor, alpha: torch.Tensor, background_color: torch.Tensor | None = None,
                         lerp: bool = False) -> torch.Tensor:
    """Blend given background color in RGB image using provided alpha mask.
    Assumes current BG color is black (0, 0, 0).
    If no background color is provided, the camera's background color is used.

    Args:
        camera (BaseCamera): Camera object used for rendering.
        rgb (torch.Tensor): Input RGB image.
        alpha (torch.Tensor): Input alpha mask.
        background_color (torch.Tensor | None, optional): Optional background color. Defaults to None.
        lerp (bool, optional): Use linear interpolation according to alpha mask (in case alpha was not accounted for during rendering). Defaults to False.

    Returns:
        torch.Tensor: _description_
    """
    if background_color is None:
        background_color = camera.background_color
    return (rgb * alpha if lerp else rgb) + ((1.0 - alpha) * background_color[:, None, None])


def projectPointWithTolerance(camera: PerspectiveCamera, points: torch.Tensor, tolerance: float = 0.0) -> list[torch.Tensor, torch.Tensor, torch.Tensor]:
    """Project 3D points to 2D image plane with tolerance.

    Args:
        camera (PerspectiveCamera): Camera object used for rendering.
        points (torch.Tensor): 3D points to project. Shape: (N, 3).
        tolerance (float, optional): Tolerance value. Defaults to 0.0.

    Returns:
        torch.Tensor: Projected 2D points. Shape: (N, 2).
    """
    points, _, depths = camera.projectPoints(points)
    screen_size = torch.tensor((camera.properties.width, camera.properties.height), device=points.device, dtype=points.dtype)
    focal = torch.tensor((camera.properties.focal_x, camera.properties.focal_y), device=points.device, dtype=points.dtype)
    tolerance_screen_space = tolerance * focal / depths[:, None]
    valid_mask = ((depths > (camera.near_plane - tolerance)) & (depths < (camera.far_plane + tolerance)))
    valid_mask &= ((points + tolerance_screen_space >= 0) & (points - tolerance_screen_space < screen_size - 1)).all(dim=-1)
    # points[valid_mask] = points[valid_mask].clamp(0, screen_size - 1)
    depths[valid_mask] = depths[valid_mask].clamp(camera.near_plane, camera.far_plane)
    return points, valid_mask, depths


@dataclass(frozen=True)
class LRDecayPolicy(object):
    """Learning rate scheduler for flexible definition of decay policy.
    Adapted from https://github.com/sxyu/svox2/blob/master/opt/util/util.py#L78
    """

    lr_init: float = 1.0
    lr_final: float = 1.0
    lr_delay_steps: int = 0
    lr_delay_mult: float = 1.0
    max_steps: int = 1000000
    decay_stride: int = 1

    def set_decay_stride(self, stride: int) -> None:
        """Set decay stride for learning rate scheduler.

        Args:
            stride (int): stride between decay steps.
        """
        object.__setattr__(self, 'decay_stride', stride)
        object.__setattr__(self, 'max_steps', div_round_up(self.max_steps, self.decay_stride))
        object.__setattr__(self, 'lr_delay_steps', div_round_up(self.lr_delay_steps, self.decay_stride))

    def __call__(self, iteration: int) -> float:
        """Calculates learning rate for the given iteration.

        Args:
            iteration (int): Current iteration.

        Returns:
            float: Target learning rate for the given iteration.
        """
        # apply stride
        iteration = iteration // self.decay_stride
        # check for undefined values in input configuration
        if iteration < 0 or (self.lr_init == 0.0 and self.lr_final == 0.0):
            return 0.0
        # calculate delay factor using sine (reverse cosine decay).
        if self.lr_delay_steps > 0 and iteration < self.lr_delay_steps:
            delay_rate = self.lr_delay_mult + (1 - self.lr_delay_mult) * np.sin(
                0.5 * np.pi * np.clip(iteration / self.lr_delay_steps, 0, 1)
            )
        else:
            delay_rate = 1.0
        # calculate linear interpolation between initial and final learning rates.
        t = np.clip(iteration / self.max_steps, 0, 1)
        log_lerp = np.exp(np.log(self.lr_init) * (1 - t) + np.log(self.lr_final) * t)
        # return final lr as product of delay and target lr.
        return delay_rate * log_lerp


class TruncExp(torch.autograd.Function):
    """Autograd function for truncated exponential activation, cutting gradients for numerical stability with half precision."""

    @staticmethod
    @custom_fwd(cast_inputs=torch.float32, device_type='cuda')
    def forward(ctx, x: torch.Tensor) -> torch.Tensor:
        """Forward pass for truncated exponential activation.

        Args:
            ctx: Internal autograd context.
            x (torch.Tensor): input tensor

        Returns:
            torch.Tensor: activated output tensor
        """
        ctx.save_for_backward(x)
        return torch.exp(x)

    @staticmethod
    @custom_bwd(device_type='cuda')
    def backward(ctx, dL_dout: torch.Tensor) -> torch.Tensor:
        """Backward pass for truncated exponential activation.

        Args:
            ctx: Internal autograd context.
            dL_dout (torch.Tensor): Backward gradient.

        Returns:
            torch.Tensor: Truncated gradient.
        """
        x = ctx.saved_tensors[0]
        return dL_dout * torch.exp(x.clamp(-15, 15))


@torch.no_grad()
def logScene(model: BaseModel, iteration: int, dataset: BaseDataset, timestamp: float | None = None, log_string: str = 'scene') -> None:
    """Visualize the scene representation using wandb.

    Args:
        model (BaseModel): DNPC Model.
        iteration (int): Current iteration.
        dataset (BaseDataset): Dataset object.
        timestamp (float | None, optional): Timeslice in [0, 1] to be visualized. Defaults to None.
        log_string (str, optional): String identifier for wandb. Defaults to 'scene'.
    """
    # set dataset to train mode
    dataset.train()
    # generate array of camera positions and view directions
    cameras: list[np.array] = []
    for i in range(len(dataset)):
        dataset.camera.setProperties(dataset[i])
        data = dataset.camera.getPositionAndViewdir().cpu().numpy()
        cameras.append({"start": data[0].tolist(), "end": (data[0] + (0.1 * data[1])).tolist()})
    cameras: np.ndarray = np.array(cameras)
    # define bounding boxes for unit cube, scene (-> sampling grid) and hash grids
    bounding_box_points = torch.tensor([
        [0, 0, 0],
        [0, 1, 0],
        [0, 0, 1],
        [1, 0, 0],
        [1, 1, 0],
        [0, 1, 1],
        [1, 0, 1],
        [1, 1, 1]
    ])
    bounding_box_static_points = bounding_box_points * model.static_grid.bounding_box_size + model.static_grid.bounding_box_min
    bounding_box_dynamic_points = bounding_box_points * model.dynamic_grid.bounding_box_size + model.dynamic_grid.bounding_box_min
    bounding_box_points = bounding_box_points * model.probability_field.bounding_box_size + model.probability_field.bounding_box_min
    # define colors
    bounding_box_color = torch.tensor([255, 0, 0], dtype=torch.float)
    static_color = torch.tensor([240, 120, 46], dtype=torch.float)
    dynamic_color = torch.tensor([50, 140, 34], dtype=torch.float)
    # define boxes
    boxes = np.array([
        {
            "corners": bounding_box_points.cpu().numpy().tolist(),
            "label": "Bounding Box",
            "color": (bounding_box_color / 255.0).cpu().numpy().tolist(),
        },
        {
            "corners": bounding_box_static_points.cpu().numpy().tolist(),
            "label": "Static Hash Grid",
            "color": (static_color / 255.0).cpu().numpy().tolist(),
        },
        {
            "corners": bounding_box_dynamic_points.cpu().numpy().tolist(),
            "label": "Dynamic Hash Grid",
            "color": (dynamic_color / 255.0).cpu().numpy().tolist(),
        }
    ])
    # define point clouds
    edge_points = torch.cat([bounding_box_points, bounding_box_color.expand(bounding_box_points.shape[0], 3)], dim=-1)
    points = model.probability_field.centers.repeat(1, 2)
    points[..., 3:] = static_color
    if timestamp is not None:
        time_idx = round(timestamp * (model.probability_field.local_dynamic_masks.shape[0] - 1))
        dynamic_mask_timestamp = model.probability_field.local_dynamic_masks[time_idx]
        dynamic_mask = model.probability_field.global_dynamic_mask.clone()
        dynamic_mask[model.probability_field.global_dynamic_mask] = dynamic_mask_timestamp
    else:
        dynamic_mask = model.probability_field.global_dynamic_mask
    points[dynamic_mask, 3:] = dynamic_color
    # reduce point cloud size
    max_points = 50000
    if (fac := (points.shape[0] // max_points)) > 1:
        points = points[::fac]
    points = torch.cat((points, edge_points), dim=0)
    # log in wandb
    scene = Framework.wandb.Object3D({
        "type": "lidar/beta",
        "points": points.cpu().numpy(),
        "boxes": boxes,
        "vectors": cameras
        })
    Framework.wandb.log(
        data={log_string: scene},
        step=iteration
    )


def eval_sh(view_dirs: torch.Tensor, n_bases: int) -> torch.Tensor:
    """Evaluate spherical harmonics bases at unit directions, without taking linear combination.

    adapted from multiple sources:
    1. https://www.ppsloan.org/publications/StupidSH36.pdf
    2. https://github.com/sxyu/svox2/blob/59984d6c4fd3d713353bafdcb011646e64647cc7/svox2/utils.py#L115
    3. https://github.com/NVlabs/tiny-cuda-nn/blob/212104156403bd87616c1a4f73a1c5f2c2e172a9/include/tiny-cuda-nn/common_device.h#L340

    Args:
        view_dirs (torch.Tensor): Normalized input view directions. Shape: (..., 3).
        n_bases (int): number of SH bases to evaluate.

    Returns:
        torch.Tensor: Evaluated SH bases at input view directions. Shape: (..., n_bases).
    """
    result = torch.empty((*view_dirs.shape[:-1], n_bases), dtype=view_dirs.dtype, device=view_dirs.device)
    result[..., 0] = 0.28209479177387814
    if n_bases == 1:
        return result
    x, y, z = view_dirs.unbind(-1)
    result[..., 1] = -0.48860251190291987 * y
    result[..., 2] = 0.48860251190291987 * z
    result[..., 3] = -0.48860251190291987 * x
    if n_bases == 4:
        return result
    x2, y2, z2 = x * x, y * y, z * z
    xy, yz, xz = x * y, y * z, x * z
    result[..., 4] = 1.0925484305920792 * xy
    result[..., 5] = -1.0925484305920792 * yz
    result[..., 6] = 0.94617469575755997 * z2 - 0.31539156525251999
    result[..., 7] = -1.0925484305920792 * xz
    result[..., 8] = 0.54627421529603959 * x2 - 0.54627421529603959 * y2
    if n_bases == 9:
        return result
    result[..., 9] = 0.59004358992664352 * y * (-3 * x2 + y2)
    result[..., 10] = 2.8906114426405538 * xy * z
    result[..., 11] = 0.45704579946446572 * y * (1.0 - 5.0 * z2)
    result[..., 12] = 0.3731763325901154 * z * (5.0 * z2 - 3.0)
    result[..., 13] = 0.45704579946446572 * x * (1.0 - 5.0 * z2)
    result[..., 14] = 1.4453057213202769 * z * (x2 - y2)
    result[..., 15] = 0.59004358992664352 * x * (-x2 + 3.0 * y2)
    if n_bases == 16:
        return result
    x4, y4, z4 = x2 * x2, y2 * y2, z2 * z2
    result[..., 16] = 2.5033429417967046 * xy * (x2 - y2)
    result[..., 17] = 1.7701307697799304 * yz * (-3.0 * x2 + y2)
    result[..., 18] = 0.94617469575756008 * xy * (7.0 * z2 - 1.0)
    result[..., 19] = 0.66904654355728921 * yz * (3.0 - 7.0 * z2)
    result[..., 20] = -3.1735664074561294 * z2 + 3.7024941420321507 * z4 + 0.31735664074561293
    result[..., 21] = 0.66904654355728921 * xz * (3.0 - 7.0 * z2)
    result[..., 22] = 0.47308734787878004 * (x2 - y2) * (7.0 * z2 - 1.0)
    result[..., 23] = 1.7701307697799304 * xz * (-x2 + 3.0 * y2)
    result[..., 24] = -3.7550144126950569 * x2 * y2 + 0.62583573544917614 * x4 + 0.62583573544917614 * y4
    if n_bases == 25:
        return result
    result[..., 25] = 0.65638205684017015 * y * (10.0 * x2 * y2 - 5.0 * x4 - y4)
    result[..., 26] = 8.3026492595241645 * xy * z * (x2 - y2)
    result[..., 27] = -0.48923829943525038 * y * (3.0 * x2 - y2) * (9.0 * z2 - 1.0)
    result[..., 28] = 4.7935367849733241 * xy * z * (3.0 * z2 - 1.0)
    result[..., 29] = 0.45294665119569694 * y * (14.0 * z2 - 21.0 * z4 - 1.0)
    result[..., 30] = 0.1169503224534236 * z * (-70.0 * z2 + 63.0 * z4 + 15.0)
    result[..., 31] = 0.45294665119569694 * x * (14.0 * z2 - 21.0 * z4 - 1.0)
    result[..., 32] = 2.3967683924866621 * z * (x2 - y2) * (3.0 * z2 - 1.0)
    result[..., 33] = -0.48923829943525038 * x * (x2 - 3.0 * y2) * (9.0 * z2 - 1.0)
    result[..., 34] = 2.0756623148810411 * z * (-6.0 * x2 * y2 + x4 + y4)
    result[..., 35] = 0.65638205684017015 * x * (10.0 * x2 * y2 - x4 - 5.0 * y4)
    if n_bases == 36:
        return result
    x6, y6, z6 = x2 * x4, y2 * y4, z2 * z4
    result[..., 36] = 1.3663682103838286 * xy * (-10.0 * x2 * y2 + 3.0 * x4 + 3.0 * y4)
    result[..., 37] = 2.3666191622317521 * yz * (10.0 * x2 * y2 - 5.0 * x4 - y4)
    result[..., 38] = 2.0182596029148963 * xy * (x2 - y2) * (11.0 * z2 - 1.0)
    result[..., 39] = -0.92120525951492349 * yz * (3.0 * x2 - y2) * (11.0 * z2 - 3.0)
    result[..., 40] = 0.92120525951492349 * xy * (-18.0 * z2 + 33.0 * z4 + 1.0)
    result[..., 41] = 0.58262136251873131 * yz * (30.0 * z2 - 33.0 * z4 - 5.0)
    result[..., 42] = 6.6747662381009842 * z2 - 20.024298714302954 * z4 + 14.684485723822165 * z6 - 0.31784601133814211
    result[..., 43] = 0.58262136251873131 * xz * (30.0 * z2 - 33.0 * z4 - 5.0)
    result[..., 44] = 0.46060262975746175 * (x2 - y2) * (11.0 * z2 * (3.0 * z2 - 1.0) - 7.0 * z2 + 1.0)
    result[..., 45] = -0.92120525951492349 * xz * (x2 - 3.0 * y2) * (11.0 * z2 - 3.0)
    result[..., 46] = 0.50456490072872406 * (11.0 * z2 - 1.0) * (-6.0 * x2 * y2 + x4 + y4)
    result[..., 47] = 2.3666191622317521 * xz * (10.0 * x2 * y2 - x4 - 5.0 * y4)
    result[..., 48] = 10.247761577878714 * x2 * y4 - 10.247761577878714 * x4 * y2 + 0.6831841051919143 * x6 - 0.6831841051919143 * y6
    if n_bases == 49:
        return result
    result[..., 49] = 0.70716273252459627 * y * (-21.0 * x2 * y4 + 35.0 * x4 * y2 - 7.0 * x6 + y6)
    result[..., 50] = 5.2919213236038001 * xy * z * (-10.0 * x2 * y2 + 3.0 * x4 + 3.0 * y4)
    result[..., 51] = -0.51891557872026028 * y * (13.0 * z2 - 1.0) * (-10.0 * x2 * y2 + 5.0 * x4 + y4)
    result[..., 52] = 4.1513246297620823 * xy * z * (x2 - y2) * (13.0 * z2 - 3.0)
    result[..., 53] = -0.15645893386229404 * y * (3.0 * x2 - y2) * (13.0 * z2 * (11.0 * z2 - 3.0) - 27.0 * z2 + 3.0)
    result[..., 54] = 0.44253269244498261 * xy * z * (-110.0 * z2 + 143.0 * z4 + 15.0)
    result[..., 55] = 0.090331607582517306 * y * (-135.0 * z2 + 495.0 * z4 - 429.0 * z6 + 5.0)
    result[..., 56] = 0.068284276912004949 * z * (315.0 * z2 - 693.0 * z4 + 429.0 * z6 - 35.0)
    result[..., 57] = 0.090331607582517306 * x * (-135.0 * z2 + 495.0 * z4 - 429.0 * z6 + 5.0)
    result[..., 58] = 0.07375544874083044 * z * (x2 - y2) * (143.0 * z2 * (3.0 * z2 - 1.0) - 187.0 * z2 + 45.0)
    result[..., 59] = -0.15645893386229404 * x * (x2 - 3.0 * y2) * (13.0 * z2 * (11.0 * z2 - 3.0) - 27.0 * z2 + 3.0)
    result[..., 60] = 1.0378311574405206 * z * (13.0 * z2 - 3.0) * (-6.0 * x2 * y2 + x4 + y4)
    result[..., 61] = -0.51891557872026028 * x * (13.0 * z2 - 1.0) * (-10.0 * x2 * y2 + x4 + 5.0 * y4)
    result[..., 62] = 2.6459606618019 * z * (15.0 * x2 * y4 - 15.0 * x4 * y2 + x6 - y6)
    result[..., 63] = 0.70716273252459627 * x * (-35.0 * x2 * y4 + 21.0 * x4 * y2 - x6 + 7.0 * y6)
    return result


def generateHaltonSequence(n: int, d: int) -> torch.Tensor:
    """ Generate Halton sequence of n points in d dimensions.

    Args:
        n (int): Number of points to generate.
        d (int): Number of dimensions.

    Returns:
        torch.Tensor: Output Halton sequence. Shape: (n, d)
    """

    def haltonBase(index, base):
        result = 0
        f = 1 / base
        i = index
        while i > 0:
            result += f * (i % base)
            i = i // base
            f /= base
        return result

    primes = [2, 3, 5]

    halton_points = torch.zeros((n, d))
    for i in range(n):
        for j in range(d):
            halton_points[i][j] = haltonBase(i + 1, primes[j])
    return halton_points