Source code for ct.raycast

"""
Functions for raycasting meshes and generating rays.
"""

import numpy as np
import open3d as o3d
from typing import Tuple
from jaxtyping import Float

from . import sanity
from . import convert



[docs]
def gen_rays(
    K: Float[np.ndarray, "3 3"],
    T: Float[np.ndarray, "4 4"],
    pixels: Float[np.ndarray, "n 2"],
) -> Tuple[Float[np.ndarray, "n 3"], Float[np.ndarray, "n 3"]]:
    """
    Generate camera rays in world coordinates for given pixel coordinates.

    Args:
        K: (3, 3) camera intrinsic matrix.
        T: (4, 4) camera extrinsic matrix (world-to-camera transformation).
        pixels: (N, 2) array of pixel coordinates in (col, row) order.

    Returns:
        A tuple of (centers, dirs). All camera centers are identical since they
        originate from the same camera. The ray directions are in world
        coordinates and normalized to unit length.

    Examples:
        .. code-block:: python

            # Generate rays for all pixels in a 640x480 image
            height, width = 480, 640
            pixels = np.array([[x, y] for y in range(height) for x in range(width)])
            centers, dirs = ct.raycast.gen_rays(K, T, pixels)
    """
    sanity.assert_K(K)
    sanity.assert_T(T)
    sanity.assert_shape_nx2(pixels, name="pixels")

    # Concat xs_ys into homogeneous coordinates.
    points = np.concatenate([pixels, np.ones_like(pixels[:, :1])], axis=1)

    # Transform to camera space
    points = (np.linalg.inv(K) @ points.T).T

    # Normalize to have 1 distance
    points = points / np.linalg.norm(points, axis=1, keepdims=True)

    # Transform to world space
    R, _ = convert.T_to_R_t(T)
    C = convert.T_to_C(T)
    dirs = (np.linalg.inv(R) @ points.T).T

    # Tile camera center C
    centers = np.tile(C, (dirs.shape[0], 1))

    return centers, dirs




[docs]
def mesh_to_im_distance(
    mesh: o3d.geometry.TriangleMesh,
    K: Float[np.ndarray, "3 3"],
    T: Float[np.ndarray, "4 4"],
    height: int,
    width: int,
) -> Float[np.ndarray, "h w"]:
    """
    Generate a distance image by ray casting a mesh from a given camera view.

    The distance image contains the Euclidean distance from the camera center to
    the mesh surface for each pixel.

    Args:
        mesh: Open3D TriangleMesh to be ray casted.
        K: (3, 3) camera intrinsic matrix.
        T: (4, 4) camera extrinsic matrix (world-to-camera transformation).
        height: Image height in pixels.
        width: Image width in pixels.

    Returns:
        (height, width) float32 array representing the distance image. Each
        pixel contains the distance from the camera center to the mesh surface.
        Invalid distances (no intersection) are set to np.inf.

    Note: For casting the same mesh with multiple camera views, use
    mesh_to_im_distances for better efficiency as it avoids repeated scene
    setup.

    Examples:
        .. code-block:: python

            # Create distance image for a 640x480 view
            distance_image = ct.raycast.mesh_to_im_distance(mesh, K, T, 480, 640)
            plt.imshow(distance_image)
            plt.colorbar()
    """
    im_distances = mesh_to_im_distances(
        mesh=mesh,
        Ks=[K],
        Ts=[T],
        height=height,
        width=width,
    )
    im_distance = im_distances[0]

    return im_distance




[docs]
def mesh_to_im_distances(
    mesh: o3d.geometry.TriangleMesh,
    Ks: Float[np.ndarray, "n 3 3"],
    Ts: Float[np.ndarray, "n 4 4"],
    height: int,
    width: int,
) -> Float[np.ndarray, "n h w"]:
    """
    Generate multiple distance images by ray casting a mesh from different views.

    For each camera view, generates a distance image containing the Euclidean
    distance from the camera center to the mesh surface.

    Args:
        mesh: Open3D TriangleMesh to be ray casted.
        Ks: (N, 3, 3) array of camera intrinsic matrices for N views.
        Ts: (N, 4, 4) array of camera extrinsic matrices (world-to-camera
            transformations) for N views.
        height: Image height in pixels.
        width: Image width in pixels.

    Returns:
        (N, height, width) float32 array representing the distance images. Each
        image contains the distances from the corresponding camera center to the
        mesh surface. Invalid distances (no intersection) are set to np.inf.

    Note: This function is more efficient than calling mesh_to_im_distance
    multiple times as it only sets up the ray casting scene once.

    Examples:
        .. code-block:: python

            # Create distance images for 3 different views
            Ks = [K0, K1, K2]  # 3 intrinsic matrices
            Ts = [T0, T1, T2]  # 3 extrinsic matrices
            distances = ct.raycast.mesh_to_im_distances(mesh, Ks, Ts, 480, 640)
            plt.imshow(distances[0])
            plt.colorbar()
    """
    for K in Ks:
        sanity.assert_K(K)
    for T in Ts:
        sanity.assert_T(T)

    t_mesh = o3d.t.geometry.TriangleMesh(
        vertex_positions=np.asarray(mesh.vertices).astype(np.float32),
        triangle_indices=np.asarray(mesh.triangles),
    )
    scene = o3d.t.geometry.RaycastingScene()
    scene.add_triangles(t_mesh)

    im_distances = []
    for K, T in zip(Ks, Ts):
        rays = o3d.t.geometry.RaycastingScene.create_rays_pinhole(
            intrinsic_matrix=K,
            extrinsic_matrix=T,
            width_px=width,
            height_px=height,
        )
        ray_lengths = np.linalg.norm(rays[:, :, 3:].numpy(), axis=2)
        ans = scene.cast_rays(rays)
        im_distance = ans["t_hit"].numpy() * ray_lengths
        im_distances.append(im_distance)
    im_distances = np.stack(im_distances, axis=0)

    return im_distances




[docs]
def mesh_to_im_depth(
    mesh: o3d.geometry.TriangleMesh,
    K: Float[np.ndarray, "3 3"],
    T: Float[np.ndarray, "4 4"],
    height: int,
    width: int,
) -> Float[np.ndarray, "h w"]:
    """
    Generate a depth image by ray casting a mesh from a given camera view.

    The depth image contains the z-coordinate of the mesh surface in the camera
    coordinate system for each pixel.

    Args:
        mesh: Open3D TriangleMesh to be ray casted.
        K: (3, 3) camera intrinsic matrix.
        T: (4, 4) camera extrinsic matrix (world-to-camera transformation).
        height: Image height in pixels.
        width: Image width in pixels.

    Returns:
        (height, width) float32 array representing the depth image. Each
        pixel contains the z-coordinate of the mesh surface in camera space.
        Invalid depths (no intersection) are set to np.inf.

    Note: This function internally uses mesh_to_im_distance and converts the
    distances to depths using the camera intrinsic parameters.

    Examples:
        .. code-block:: python

            # Create depth image for a 640x480 view
            im_depth = ct.raycast.mesh_to_im_depth(mesh, K, T, 480, 640)
            plt.imshow(im_depth)
            plt.colorbar()
    """
    im_distance = mesh_to_im_distance(mesh, K, T, height, width)
    im_depth = convert.im_distance_to_im_depth(im_distance, K)
    return im_depth




[docs]
def mesh_to_im_depths(
    mesh: o3d.geometry.TriangleMesh,
    Ks: Float[np.ndarray, "n 3 3"],
    Ts: Float[np.ndarray, "n 4 4"],
    height: int,
    width: int,
) -> Float[np.ndarray, "n h w"]:
    """
    Generate multiple depth images by ray casting a mesh from different views.

    For each camera view, generates a depth image containing the z-coordinate of
    the mesh surface in the camera coordinate system.

    Args:
        mesh: Open3D TriangleMesh to be ray casted.
        Ks: (N, 3, 3) array of camera intrinsic matrices for N views.
        Ts: (N, 4, 4) array of camera extrinsic matrices (world-to-camera
            transformations) for N views.
        height: Image height in pixels.
        width: Image width in pixels.

    Returns:
        (N, height, width) float32 array representing the depth images. Each
        image contains the z-coordinates of the mesh surface in camera space.
        Invalid depths (no intersection) are set to np.inf.

    Note: This function internally uses mesh_to_im_distances and converts the
    distances to depths using the camera intrinsic parameters.

    Examples:
        .. code-block:: python

            # Create depth images for 3 different views
            Ks = [K0, K1, K2]  # 3 intrinsic matrices
            Ts = [T0, T1, T2]  # 3 extrinsic matrices
            im_depths = ct.raycast.mesh_to_im_depths(mesh, Ks, Ts, 480, 640)
            plt.imshow(im_depths[0])
            plt.colorbar()
    """
    im_distances = mesh_to_im_distances(mesh, Ks, Ts, height, width)
    im_depths = np.stack(
        [
            convert.im_distance_to_im_depth(im_distance, K)
            for im_distance, K in zip(im_distances, Ks)
        ],
        axis=0,
    )
    return im_depths




[docs]
def mesh_to_im_mask(
    mesh: o3d.geometry.TriangleMesh,
    K: Float[np.ndarray, "3 3"],
    T: Float[np.ndarray, "4 4"],
    height: int,
    width: int,
) -> Float[np.ndarray, "h w"]:
    """
    Generate a binary mask image by ray casting a mesh from a given camera view.

    The mask image indicates which pixels contain the mesh (foreground) and which
    do not (background). Foreground pixels are set to 1.0 and background pixels
    are set to 0.0.

    Args:
        mesh: Open3D TriangleMesh to be ray casted.
        K: (3, 3) camera intrinsic matrix.
        T: (4, 4) camera extrinsic matrix (world-to-camera transformation).
        height: Image height in pixels.
        width: Image width in pixels.

    Returns:
        (height, width) float32 array representing the binary mask image.
        Foreground pixels (mesh visible) are set to 1.0, background pixels
        (no mesh) are set to 0.0.

    Note: This function is not optimized for repeated use with the same mesh.
    For multiple ray casts with the same mesh, create the ray casting scene
    manually for better performance.

    Examples:
        .. code-block:: python

            # Create mask image for a 640x480 view
            mask = ct.raycast.mesh_to_im_mask(mesh, K, T, 480, 640)
            plt.imshow(mask, cmap='gray')
    """
    im_distance = mesh_to_im_distance(mesh, K, T, height, width)
    im_mask = (im_distance != np.inf).astype(np.float32)

    return im_mask