Source code for ct.project

"""
Functions for projecting 2D->3D or 3D->2D.
"""

import cv2
import numpy as np
from typing import Optional, Tuple, Union
from jaxtyping import Float
from . import convert, image, sanity



[docs]
def points_to_pixels(
    points: Float[np.ndarray, "n 3"],
    K: Float[np.ndarray, "3 3"],
    T: Float[np.ndarray, "4 4"],
) -> Float[np.ndarray, "n 2"]:
    """
    Project 3D points in world coordinates to 2D pixel coordinates using the
    camera intrinsic and extrinsic parameters.

    Args:
        points: (N, 3) array of 3D points in world coordinates.
        K: (3, 3) camera intrinsic matrix.
        T: (4, 4) camera extrinsic matrix (world-to-camera transformation).

    Returns:
        (N, 2) array of pixel coordinates, where each row contains [x, y]
        coordinates. The x-coordinate corresponds to the image width (columns)
        and the y-coordinate corresponds to the image height (rows).

    Examples:
        .. code-block:: python

            pixels = ct.project.points_to_pixels(points, K, T)

            # Extract and round pixel coordinates
            cols = pixels[:, 0]  # x-coordinates (width dimension)
            rows = pixels[:, 1]  # y-coordinates (height dimension)
            cols = np.round(cols).astype(np.int32)
            rows = np.round(rows).astype(np.int32)

            # Clamp to image boundaries
            cols[cols >= width] = width - 1
            cols[cols < 0] = 0
            rows[rows >= height] = height - 1
            rows[rows < 0] = 0
    """
    sanity.assert_K(K)
    sanity.assert_T(T)
    sanity.assert_shape_nx3(points, name="points")

    W2P = convert.K_T_to_W2P(K, T)

    # (N, 3) -> (N, 4)
    points = convert.to_homo(points)
    # (N, 4)
    pixels = (W2P @ points.T).T
    # (N, 4) -> (N, 3), discard the last column
    pixels = pixels[:, :3]
    # (N, 3) -> (N, 2)
    pixels = convert.from_homo(pixels)

    return pixels




[docs]
def im_depth_to_point_cloud(
    im_depth: Float[np.ndarray, "h w"],
    K: Float[np.ndarray, "3 3"],
    T: Float[np.ndarray, "4 4"],
    im_color: Optional[Float[np.ndarray, "h w 3"]] = None,
    to_image: bool = False,
    ignore_invalid: bool = True,
    scale_factor: float = 1.0,
) -> Union[
    Float[np.ndarray, "n 3"],
    Float[np.ndarray, "h w 3"],
    Tuple[Float[np.ndarray, "n 3"], Float[np.ndarray, "n 3"]],
    Tuple[Float[np.ndarray, "h w 3"], Float[np.ndarray, "h w 3"]],
]:
    """
    Convert a depth image to a 3D point cloud in world coordinates, optionally
    including color information. The point cloud can be returned in either a
    sparse format (N, 3) or a dense format matching the input image dimensions
    (H, W, 3).

    Args:
        im_depth: (H, W) depth image in world scale, float32 or float64.
        K: (3, 3) camera intrinsic matrix.
        T: (4, 4) camera extrinsic matrix (world-to-camera transformation).
        im_color: Optional (H, W, 3) color image in range [0, 1], float32/float64.
        to_image: If True, returns a dense point cloud with shape (H, W, 3).
            If False, returns a sparse point cloud with shape (N, 3).
        ignore_invalid: If True, filters out points with invalid depths
            (<= 0 or >= inf).
        scale_factor: Scaling factor for the input images. When scale_factor < 1,
            the images are downsampled and the intrinsic matrix is adjusted
            accordingly.

    Returns:
        Single array or a tuple of two arrays:
            - ``im_color == None``, ``to_image == False``:
              returns (N, 3) array of 3D points
            - ``im_color == None``, ``to_image == True``:
              returns (H, W, 3) array of 3D points
            - ``im_color != None``, ``to_image == False``:
              returns (N, 3) array of 3D points and (N, 3) array of colors
            - ``im_color != None``, ``to_image == True``:
              returns (H, W, 3) array of 3D points and (H, W, 3) array of colors
    """
    # Sanity checks
    sanity.assert_K(K)
    sanity.assert_T(T)
    if not isinstance(im_depth, np.ndarray):
        raise TypeError("im_depth must be a numpy array")
    if im_depth.dtype not in [np.float32, np.float64]:
        raise TypeError("im_depth must be of type float32 or float64")
    if im_depth.ndim != 2:
        raise ValueError("im_depth must be a 2D array")
    if im_color is not None:
        if not isinstance(im_color, np.ndarray):
            raise TypeError("im_color must be a numpy array")
        if im_color.shape[:2] != im_depth.shape or im_color.ndim != 3:
            raise ValueError(
                f"im_color must be (H, W, 3), and have the same "
                f"shape as im_depth, but got {im_color.shape}."
            )
        if im_color.dtype not in [np.float32, np.float64]:
            raise TypeError("im_color must be of type float32 or float64")
        if im_color.max() > 1.0 or im_color.min() < 0.0:
            raise ValueError("im_color values must be in the range [0, 1]")
    if to_image and ignore_invalid:
        print("Warning: ignore_invalid is ignored when to_image is True.")
        ignore_invalid = False

    # Make copies as K may be modified inplace
    K = np.copy(K)
    T = np.copy(T)

    if scale_factor != 1.0:
        # Calculate new dimensions
        new_width = int(im_depth.shape[1] * scale_factor)
        new_height = int(im_depth.shape[0] * scale_factor)

        # Resize images
        im_depth = image.resize(
            im_depth,
            shape_wh=(new_width, new_height),
            interpolation=cv2.INTER_NEAREST,
        )
        if im_color is not None:
            im_color = image.resize(
                im_color,
                shape_wh=(new_width, new_height),
                interpolation=cv2.INTER_LINEAR,
            )

        # Adjust the intrinsic matrix K for the new image dimensions
        K[0, 0] *= scale_factor
        K[1, 1] *= scale_factor
        K[0, 2] *= scale_factor
        K[1, 2] *= scale_factor

    height, width = im_depth.shape
    pose = convert.T_to_pose(T)

    # pixels.shape == (height, width, 2)
    # pixels[r, c] == [c, r], since x-axis goes from top-left to top-right.
    pixels = np.transpose(np.indices((width, height)), (2, 1, 0))
    # (height * width, 2)
    pixels = pixels.reshape((-1, 2))
    # (height * width, 3)
    pixels_homo = convert.to_homo(pixels)
    # (height * width, )
    depths = im_depth.flatten()

    if ignore_invalid:
        valid_mask = (depths > 0) & (depths < np.inf)
        depths = depths[valid_mask]
        pixels_homo = pixels_homo[valid_mask]
        if im_color is not None:
            colors = im_color.reshape((-1, 3))[valid_mask]

    # Transform pixel coordinates to world coordinates.
    # (height * width, 1)
    depths = depths.reshape((-1, 1))

    # (N, 3)
    points_camera = depths * (np.linalg.inv(K) @ pixels_homo.T).T
    # (N, 4)
    points_world = (pose @ (convert.to_homo(points_camera).T)).T
    # (N, 3)
    points_world = convert.from_homo(points_world)

    if to_image:
        assert (
            ignore_invalid == False
        ), "ignore_invalid is ignored when to_image is True."
        points_world = points_world.reshape((height, width, 3))
        if im_color is None:
            return points_world
        else:
            return points_world, im_color
    else:
        if im_color is None:
            return points_world
        else:
            return points_world, colors