Source code for ct.image

"""
Functions for manipulating images.
"""

import numpy as np
import cv2
from . import sanity
from . import colormap
from typing import Tuple, List, Optional, Union, Literal
from jaxtyping import Float, UInt8, UInt16, Int



[docs]
def crop_white_borders(
    im: Float[np.ndarray, "h w 3"], padding: Tuple[int, int, int, int] = (0, 0, 0, 0)
) -> Float[np.ndarray, "h_cropped w_cropped 3"]:
    """
    Crop white borders from an image and apply optional padding.

    Args:
        im: Input float image in range [0.0, 1.0].
        padding: Padding to apply after cropping in the format
            (top, bottom, left, right). Defaults to (0, 0, 0, 0).

    Returns:
        Cropped and padded image.
    """
    tblr = compute_cropping(im)
    im_dst = apply_cropping_padding(im, tblr, padding)
    return im_dst




[docs]
def compute_cropping(im: Float[np.ndarray, "h w 3"]) -> Tuple[int, int, int, int]:
    """
    Compute white border sizes in pixels for 3-channel RGB images.

    This function calculates the number of white pixels on each edge of a 3-channel
    RGB image. White pixels are defined as having values of (1.0, 1.0, 1.0).

    Args:
        im: Input float image in range [0.0, 1.0].

    Returns:
        Tuple[int, int, int, int]:
            - crop_t: Number of white pixels on the top edge
            - crop_b: Number of white pixels on the bottom edge
            - crop_l: Number of white pixels on the left edge
            - crop_r: Number of white pixels on the right edge

    Raises:
        ValueError: If input image has invalid dtype or dimensions.
    """
    if not im.dtype == np.float32:
        raise ValueError(f"Expected im.dtype to be np.float32, but got {im.dtype}")
    if im.ndim != 3 or im.shape[2] != 3:
        raise ValueError(f"Expected im to be of shape (H, W, 3), but got {im.shape}")

    # Create a mask where white pixels are marked as True
    white_mask = np.all(im == 1.0, axis=-1)

    # Find the indices of rows and columns where there's at least one non-white pixel
    rows_with_color = np.where(~white_mask.all(axis=1))[0]
    cols_with_color = np.where(~white_mask.all(axis=0))[0]

    # Determine the crop values based on the positions of non-white pixels
    crop_t = rows_with_color[0] if len(rows_with_color) else 0
    crop_b = im.shape[0] - rows_with_color[-1] - 1 if len(rows_with_color) else 0
    crop_l = cols_with_color[0] if len(cols_with_color) else 0
    crop_r = im.shape[1] - cols_with_color[-1] - 1 if len(cols_with_color) else 0

    return crop_t, crop_b, crop_l, crop_r




[docs]
def apply_cropping_padding(
    im_src: Float[np.ndarray, "h w 3"],
    cropping: Tuple[int, int, int, int],
    padding: Tuple[int, int, int, int],
) -> Float[np.ndarray, "h_cropped w_cropped 3"]:
    """
    Apply cropping and padding to an RGB image.

    Args:
        im_src: Source float image in range [0.0, 1.0].
        cropping: Cropping values in the format (crop_top, crop_bottom, crop_left, crop_right).
        padding: Padding values in the format (pad_top, pad_bottom, pad_left, pad_right).

    Returns:
        Cropped and padded image.

    Raises:
        ValueError: If input image has invalid dtype or dimensions.
    """
    if not im_src.dtype == np.float32:
        raise ValueError(f"im_src.dtype == {im_src.dtype} != np.float32")
    if not im_src.ndim == 3:
        raise ValueError(f"im_src must be (H, W, 3), but got {im_src.shape}")

    (
        h,
        w,
        _,
    ) = im_src.shape
    crop_t, crop_b, crop_l, crop_r = cropping
    im_dst = im_src[crop_t : h - crop_b, crop_l : w - crop_r, :]
    pad_t, pad_b, pad_l, pad_r = padding
    im_dst = np.pad(
        im_dst,
        ((pad_t, pad_b), (pad_l, pad_r), (0, 0)),
        mode="constant",
        constant_values=1.0,
    )
    return im_dst




[docs]
def apply_croppings_paddings(
    src_ims: List[Float[np.ndarray, "h w 3"]],
    croppings: List[Tuple[int, int, int, int]],
    paddings: List[Tuple[int, int, int, int]],
) -> List[Float[np.ndarray, "h_cropped w_cropped 3"]]:
    """
    Apply cropping and padding to a list of RGB images.

    Args:
        src_ims: List of source images.
        croppings: List of 4-tuples: [(crop_t, crop_b, crop_l, crop_r), ...]
        paddings: List of 4-tuples: [(pad_t, pad_b, pad_l, pad_r), ...]

    Returns:
        List of cropped and padded images.

    Raises:
        ValueError: If the number of croppings or paddings doesn't match the
        number of images, or if any cropping tuple has invalid length.
    """
    num_ims = len(src_ims)
    if not len(croppings) == num_ims:
        raise ValueError(f"len(croppings) == {len(croppings)} != {num_ims}")
    if not len(paddings) == num_ims:
        raise ValueError(f"len(paddings) == {len(paddings)} != {num_ims}")
    for cropping in croppings:
        if not len(cropping) == 4:
            raise ValueError(f"len(cropping) == {len(cropping)} != 4")

    dst_ims = []
    for im_src, cropping, padding in zip(src_ims, croppings, paddings):
        im_dst = apply_cropping_padding(im_src, cropping, padding)
        dst_ims.append(im_dst)

    return dst_ims




[docs]
def get_post_croppings_paddings_shapes(
    src_shapes: List[Tuple[int, int, int]],
    croppings: List[Tuple[int, int, int, int]],
    paddings: List[Tuple[int, int, int, int]],
) -> List[Tuple[int, int, int]]:
    """
    Compute the shapes of images after applying cropping and padding.

    Args:
        src_shapes: List of source image shapes.
        croppings: List of 4-tuples: [(crop_t, crop_b, crop_l, crop_r), ...]
        paddings: List of 4-tuples: [(pad_t, pad_b, pad_l, pad_r), ...]

    Returns:
        List of resulting image shapes in format (height_cropped, width_cropped, channels).
    """
    dst_shapes = []
    for src_shape, cropping, padding in zip(src_shapes, croppings, paddings):
        crop_t, crop_b, crop_l, crop_r = cropping
        pad_t, pad_b, pad_l, pad_r = padding
        dst_shape = (
            src_shape[0] - crop_t - crop_b + pad_t + pad_b,
            src_shape[1] - crop_l - crop_r + pad_l + pad_r,
            src_shape[2],
        )
        dst_shapes.append(dst_shape)
    return dst_shapes




[docs]
def overlay_mask_on_rgb(
    im_rgb: Float[np.ndarray, "h w 3"],
    im_mask: Float[np.ndarray, "h w"],
    overlay_alpha: float = 0.4,
    overlay_color: Float[np.ndarray, "3"] = np.array([0, 0, 1]),
) -> Float[np.ndarray, "h w 3"]:
    """
    Overlay a mask on top of an RGB image with specified transparency and color.

    Args:
        im_rgb: RGB image in range [0.0, 1.0].
        im_mask: Mask image in range [0.0, 1.0].
        overlay_alpha: Transparency level for the overlay, in range [0.0, 1.0].
            Defaults to 0.4.
        overlay_color: Color for the overlay as a 3-channel array in range [0.0, 1.0].
            Defaults to blue.

    Returns:
        Resulting image with mask overlay applied.

    Raises:
        AssertionError: If input images have invalid shapes, dtypes, or value ranges.
    """
    # Sanity: im_rgb
    assert im_rgb.shape[:2] == im_mask.shape
    assert im_rgb.dtype == np.float32 or im_rgb.dtype == np.float64
    assert im_rgb.max() <= 1.0 and im_rgb.min() >= 0.0

    # Sanity: im_mask
    assert im_mask.dtype == np.float32 or im_mask.dtype == np.float64
    assert im_mask.max() <= 1.0 and im_mask.min() >= 0.0

    # Sanity: overlay_alpha
    assert overlay_alpha >= 0.0 and overlay_alpha <= 1.0

    # Sanity: overlay_color
    overlay_color = np.array(overlay_color)
    assert overlay_color.shape == (3,)
    assert overlay_color.max() <= 1.0 and overlay_color.min() >= 0.0

    im_mask_stacked = np.dstack([im_mask, im_mask, im_mask])
    im_hard = im_rgb * (1.0 - im_mask_stacked) + overlay_color * im_mask_stacked
    im_soft = im_rgb * (1.0 - overlay_alpha) + im_hard * overlay_alpha

    return im_soft




[docs]
def ndc_coords_to_pixels(
    ndc_coords: Float[np.ndarray, "n 2"],
    im_size_wh: Tuple[int, int],
    align_corners: bool = False,
) -> Float[np.ndarray, "n 2"]:
    """
    Convert Normalized Device Coordinates (NDC) to pixel coordinates.

    Args:
        ndc_coords: NDC coordinates. Each row represents (x, y) or (c, r).
            Most values shall be in [-1, 1], where (-1, -1) is the top left
            corner and (1, 1) is the bottom right corner.
        im_size_wh: Image size (width, height).
        align_corners: Determines how NDC coordinates map to pixel coordinates.
            If True: -1 and 1 are aligned to the center of the corner pixels.
            If False: -1 and 1 are aligned to the corner of the corner pixels.

    Returns:
        Pixel coordinates as a float array.

    Notes:
        This function is commonly used in computer graphics to map normalized
        coordinates to specific pixel locations in an image.

        When align_corners is True, src and dst images are aligned by the center
        point of their corner pixels; when align_corners is False, src and dst
        images are aligned by the corner points of the corner pixels.

        The NDC space does not have a "pixels size", so we precisely align the
        extrema -1 and 1 to either the center or corner of the corner pixels.
    """
    sanity.assert_shape(ndc_coords, (None, 2), name="ndc_coords")
    w, h = im_size_wh[:2]
    dtype = ndc_coords.dtype

    src_tl = np.array([-1.0, -1.0], dtype=dtype)
    src_br = np.array([1.0, 1.0], dtype=dtype)

    if align_corners:
        # (-1, -1) -> (    0,     0)
        # (1 ,  1) -> (w - 1, h - 1)
        dst_tl = np.array([0, 0], dtype=dtype)
        dst_br = np.array([w - 1, h - 1], dtype=dtype)
    else:
        # (-1, -1) -> (   -0.5,    -0.5)
        # (1 ,  1) -> (w - 0.5, h - 0.5)
        # Align to the corner of the corner pixels.
        dst_tl = np.array([-0.5, -0.5], dtype=dtype)
        dst_br = np.array([w - 0.5, h - 0.5], dtype=dtype)

    dst_pixels = (ndc_coords - src_tl) / (src_br - src_tl) * (dst_br - dst_tl) + dst_tl

    return dst_pixels




[docs]
def rotate(
    im: Float[np.ndarray, "h w c"], ccw_degrees: int
) -> Float[np.ndarray, "h_rotated w_rotated c"]:
    """
    Rotate an image by a specified counter-clockwise angle.

    Args:
        im: Input image.
        ccw_degrees: Counter-clockwise rotation angle in degrees. Must be one
            of: 0, 90, 180, or 270.

    Returns:
        Rotated image. The shape will depend on the rotation angle:
            - 0 or 180 degrees: (height, width, channels)
            - 90 or 270 degrees: (width, height, channels)

    Raises:
        ValueError: If ccw_degrees is not one of the allowed values.
    """
    if ccw_degrees == 0:
        im_rotated = np.copy(im)
    elif ccw_degrees == 90:
        im_rotated = cv2.rotate(im, cv2.ROTATE_90_COUNTERCLOCKWISE)
    elif ccw_degrees == 180:
        im_rotated = cv2.rotate(im, cv2.ROTATE_180)
    elif ccw_degrees == 270:
        im_rotated = cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE)
    else:
        raise ValueError(f"Invalid rotation angle: {ccw_degrees}.")

    return im_rotated




[docs]
def recover_rotated_pixels(dst_pixels, src_wh, ccw_degrees):
    """
    Convert pixel coordinates from a rotated image back to the original image space.

    Args:
        dst_pixels: Pixel coordinates in the rotated image. Each row is (col, row).
        src_wh: Width and height of the original image.
        ccw_degrees: Counter-clockwise rotation angle in degrees that was applied
            to create the rotated image. Must be one of: 0, 90, 180, or 270.

    Returns:
        Pixel coordinates in the original image space.

    Raises:
        ValueError: If ccw_degrees is not one of the allowed values.

    Notes:
        This function is the inverse operation of image rotation. It maps
        coordinates from the rotated image back to the original image space.
    """
    # - src:
    #   - src_wh    : (w    ,     h)
    #   - src_pixels: (c    ,     r)
    # - dst:
    #   - dst_pixels: (r    , w-1-c)  # rotate 90
    #   - dst_pixels: (w-1-c, h-1-r)  # rotate 180
    #   - dst_pixels: (h-1-r,     c)  # rotate 270
    sanity.assert_shape(dst_pixels, (None, 2), name="dst_pixels")
    w, h = src_wh

    # Convert back to src.
    dst_c = dst_pixels[:, 0]
    dst_r = dst_pixels[:, 1]
    if ccw_degrees == 0:
        src_pixels = np.copy(dst_pixels)
    elif ccw_degrees == 90:
        src_pixels = np.stack([w - 1 - dst_r, dst_c], axis=1)
    elif ccw_degrees == 180:
        src_pixels = np.stack([w - 1 - dst_c, h - 1 - dst_r], axis=1)
    elif ccw_degrees == 270:
        src_pixels = np.stack([dst_r, h - 1 - dst_c], axis=1)
    else:
        raise ValueError(f"Invalid rotation angle: {ccw_degrees}.")

    # Sanity check.
    src_c = src_pixels[:, 0]
    src_r = src_pixels[:, 1]
    if ccw_degrees == 0:
        dst_pixels_recovered = np.copy(src_pixels)
    elif ccw_degrees == 90:
        dst_pixels_recovered = np.stack([src_r, w - 1 - src_c], axis=1)
    elif ccw_degrees == 180:
        dst_pixels_recovered = np.stack([w - 1 - src_c, h - 1 - src_r], axis=1)
    elif ccw_degrees == 270:
        dst_pixels_recovered = np.stack([h - 1 - src_r, src_c], axis=1)
    else:
        raise ValueError(f"Invalid rotation angle: {ccw_degrees}.")
    np.testing.assert_allclose(dst_pixels, dst_pixels_recovered, rtol=1e-5, atol=1e-5)

    return src_pixels




[docs]
def resize(
    im: Union[
        Float[np.ndarray, "h_src w_src"],
        Float[np.ndarray, "h_src w_src 3"],
        UInt8[np.ndarray, "h_src w_src"],
        UInt8[np.ndarray, "h_src w_src 3"],
        UInt16[np.ndarray, "h_src w_src"],
        UInt16[np.ndarray, "h_src w_src 3"],
    ],
    shape_wh: Tuple[int, int],
    aspect_ratio_fill: Optional[
        Union[float, Tuple[float, float, float], np.ndarray]
    ] = None,
    interpolation: int = cv2.INTER_LINEAR,
) -> Union[
    Float[np.ndarray, "h_dst w_dst"],
    Float[np.ndarray, "h_dst w_dst 3"],
    UInt8[np.ndarray, "h_dst w_dst"],
    UInt8[np.ndarray, "h_dst w_dst 3"],
    UInt16[np.ndarray, "h_dst w_dst"],
    UInt16[np.ndarray, "h_dst w_dst 3"],
]:
    """
    Resize an image to a specified width and height, optionally maintaining aspect ratio.

    Args:
        im: Input image.
        shape_wh: Target size as (width, height) in pixels.
        aspect_ratio_fill: Value(s) to use for padding when maintaining aspect ratio.
            If None, image is directly resized without maintaining aspect ratio.
            If provided, must match the number of channels in the input image.
        interpolation: OpenCV interpolation method (e.g., cv2.INTER_LINEAR).

    Returns:
        Resized image.

    Notes:
        - When maintaining aspect ratio, the image is resized to fit within the target
          dimensions and padded with aspect_ratio_fill values as needed.
        - OpenCV uses (width, height) for image size while numpy uses (height, width).
    """
    # Sanity: dtype.
    dtype = im.dtype
    assert dtype in (np.uint8, np.uint16, np.float32, np.float64)

    # Sanity: input shape.
    ndim = im.ndim
    assert ndim in {2, 3}, "ndim must be 2 or 3"
    if ndim == 3:
        assert im.shape[2] == 3, "im.shape[2] must be 3"

    # Sanity: output shape.
    dst_w, dst_h = shape_wh
    assert dst_w > 0 and dst_h > 0
    if ndim == 2:
        dst_numpy_shape = (dst_h, dst_w)
    else:
        dst_numpy_shape = (dst_h, dst_w, 3)

    # Sanity: aspect_ratio_fill's shape and value.
    if aspect_ratio_fill is not None:
        aspect_ratio_fill = np.array(aspect_ratio_fill).flatten()
        if ndim == 2:
            assert len(aspect_ratio_fill) == 1
        else:
            assert len(aspect_ratio_fill) == 3
        if dtype == np.float32 or dtype == np.float64:
            assert aspect_ratio_fill.max() <= 1.0
            assert aspect_ratio_fill.min() >= 0.0
        aspect_ratio_fill = aspect_ratio_fill.astype(dtype)

    # Compute intermediate shape (tmp_w, tmp_h)
    if aspect_ratio_fill is None:
        # Case 1: direct reshape, do not keep aspect ratio.
        tmp_w, tmp_h = dst_w, dst_h
    else:
        # Case 2; keep aspect ratio and fill with aspect_ratio_fill.
        src_h, src_w = im.shape[:2]
        src_wh_ratio = src_w / float(src_h)
        dst_wh_ratio = dst_w / float(dst_h)
        if src_wh_ratio >= dst_wh_ratio:
            # Source image is "wider". Pad in the height dimension.
            tmp_w = dst_w
            tmp_h = int(round(tmp_w / src_wh_ratio))
        else:
            # Source image is "taller". Pad in the width dimension.
            tmp_h = dst_h
            tmp_w = int(round(tmp_h * src_wh_ratio))
        assert tmp_w <= dst_w and tmp_h <= dst_h

    # Resize.
    im_tmp = cv2.resize(im, dsize=(tmp_w, tmp_h), interpolation=interpolation)

    # Pad if necessary.
    if tmp_w == dst_w and tmp_h == dst_h:
        im_resize = im_tmp
    else:
        im_resize = np.full(dst_numpy_shape, fill_value=aspect_ratio_fill, dtype=dtype)
        im_resize[:tmp_h, :tmp_w] = im_tmp

    # Final sanity checks for the reshaped image.
    assert im_resize.shape == dst_numpy_shape

    return im_resize




[docs]
def recover_resized_pixels(
    dst_pixels: Float[np.ndarray, "n 2"],
    src_wh: Tuple[int, int],
    dst_wh: Tuple[int, int],
    keep_aspect_ratio: bool = True,
) -> Float[np.ndarray, "n 2"]:
    """
    Convert pixel coordinates from a resized image back to the original image space.

    Args:
        dst_pixels: Pixel coordinates in the resized image. Each row is (col, row).
        src_wh: Width and height of the original image.
        dst_wh: Width and height of the resized image.
        keep_aspect_ratio: Whether aspect ratio was maintained during resizing.
            If True, accounts for any padding that was added to maintain aspect ratio.

    Returns:
        Pixel coordinates in the original image space.

    Notes:
        1. This function is paired with OpenCV's cv2.resize() function, where
           the *center* of the top-left pixel is considered to be (0, 0).

           - Top-left     corner: ``(-0.5, -0.5)``
           - Bottom-right corner: ``(w - 0.5, h - 0.5)``

           However, most other implementations in computer graphics treat the
           *corner* of the top-left pixel to be (0, 0). For more discussions, see:
           https://www.realtimerendering.com/blog/the-center-of-the-pixel-is-0-50-5/
        2. OpenCV's image size is (width, height), while numpy's array shape is
           (height, width) or (height, width, 3). Be careful with the order.
        3. This function is the inverse operation of image resizing.
        4. Coordinates are not rounded to integers and out-of-bound values are not corrected.
    """
    sanity.assert_shape_nx2(dst_pixels)
    src_w, src_h = src_wh[:2]
    dst_w, dst_h = dst_wh[:2]

    # Compute intermediate shape (tmp_h, tmp_w)
    if not keep_aspect_ratio:
        # Case 1: direct reshape, do not keep aspect ratio.
        tmp_w, tmp_h = dst_w, dst_h
    else:
        # Case 2; keep aspect ratio and fill.
        src_wh_ratio = src_w / float(src_h)
        dst_wh_ratio = dst_w / float(dst_h)
        if src_wh_ratio >= dst_wh_ratio:
            # Source image is "wider". Pad in the height dimension.
            tmp_w = dst_w
            tmp_h = int(round(tmp_w / src_wh_ratio))
        else:
            # Source image is "taller". Pad in the width dimension.
            tmp_h = dst_h
            tmp_w = int(round(tmp_h * src_wh_ratio))
        assert tmp_w <= dst_w and tmp_h <= dst_h

    # Mapping relationship, linear interpolate between:
    # src                                 -> tmp
    # src_tl = (-0.5 , -0.5)              -> dst_tl = (-0.5 , -0.5)
    # src_br = (src_w - 0.5, src_h - 0.5) -> dst_br = (tmp_w - 0.5, tmp_h - 0.5)
    #
    # dst_pixels - dst_tl    src_pixels - src_tl
    # ------------------- == --------------------
    # dst_br - dst_tl        src_br - src_tl
    src_tl = np.array([-0.5, -0.5])
    src_br = np.array([src_w - 0.5, src_h - 0.5])
    dst_tl = np.array([-0.5, -0.5])
    dst_br = np.array([tmp_w - 0.5, tmp_h - 0.5])
    src_pixels = (dst_pixels - dst_tl) / (dst_br - dst_tl) * (src_br - src_tl) + src_tl

    return src_pixels




[docs]
def make_corres_image(
    im_src: Float[np.ndarray, "h w 3"],
    im_dst: Float[np.ndarray, "h w 3"],
    src_pixels: Int[np.ndarray, "n 2"],
    dst_pixels: Int[np.ndarray, "n 2"],
    confidences: Optional[Float[np.ndarray, "n"]] = None,
    texts: Optional[List[str]] = None,
    point_color: Optional[Tuple[float, ...]] = (0, 1, 0, 1.0),
    line_color: Optional[Tuple[float, ...]] = (0, 0, 1, 0.75),
    text_color: Tuple[float, float, float] = (1, 1, 1),
    point_size: int = 1,
    line_width: int = 1,
    sample_ratio: Optional[float] = None,
) -> Float[np.ndarray, "h 2*w 3"]:
    """
    Make correspondence image.

    Args:
        im_src: Source image in range [0, 1].
        im_dst: Destination image in range [0, 1].
        src_pixels: Source pixel coordinates. Each row represents (x, y) or (c, r).
        dst_pixels: Destination pixel coordinates. Each row represents (x, y) or (c, r).
        confidences: Confidence values for each correspondence in range [0, 1].
        texts: List of texts to draw on the top-left of the image.
        point_color: RGB or RGBA color of the point in range [0, 1].
        If point_color == None:
            points will never be drawn.
        If point_color != None and confidences == None:
            point color will be determined by point_color.
        If point_color != None and confidences != None:
            point color will be determined by "viridis" colormap.
        line_color: RGB or RGBA color of the line in range [0, 1].
        text_color: RGB color of the text in range [0, 1].
        point_size: Size of the point.
        line_width: Width of the line.
        sample_ratio: Float value from 0-1. If None, all points are drawn.

    Returns:
        Correspondence image.
    """
    assert im_src.shape == im_dst.shape
    assert im_src.ndim == 3 and im_src.shape[2] == 3
    assert im_src.dtype == np.float32 or im_src.dtype == np.float64
    assert im_dst.dtype == np.float32 or im_dst.dtype == np.float64
    assert im_src.min() >= 0.0 and im_src.max() <= 1.0
    assert im_dst.min() >= 0.0 and im_dst.max() <= 1.0

    assert src_pixels.shape == dst_pixels.shape
    assert src_pixels.ndim == 2 and src_pixels.shape[1] == 2
    assert src_pixels.dtype == np.int32 or src_pixels.dtype == np.int64
    assert dst_pixels.dtype == np.int32 or dst_pixels.dtype == np.int64
    assert len(src_pixels) == len(dst_pixels)

    if confidences is not None:
        assert len(confidences) == len(src_pixels)
        assert confidences.dtype == np.float32 or confidences.dtype == np.float64
        if confidences.size > 0:
            assert confidences.min() >= 0.0 and confidences.max() <= 1.0
        assert confidences.ndim == 1

    # Get shape.
    h, w, _ = im_src.shape

    # Sample corres.
    sample_ratio = 1.0 if sample_ratio is None else sample_ratio
    if sample_ratio > 1.0 or sample_ratio < 0.0:
        raise ValueError("sample_ratio should be in [0.0, 1.0]")
    elif sample_ratio == 1.0:
        pass
    else:
        n = src_pixels.shape[0]
        n_sample = int(round(n * sample_ratio))
        idx = np.random.choice(n, n_sample, replace=False)
        src_pixels = src_pixels[idx]
        dst_pixels = dst_pixels[idx]

    # If there is no corres, return the original images side by side.
    if len(src_pixels) == 0:
        im_corres = np.concatenate((im_src, im_dst), axis=1)

    else:
        assert src_pixels[:, 0].min() >= 0 and src_pixels[:, 0].max() < w
        assert src_pixels[:, 1].min() >= 0 and src_pixels[:, 1].max() < h
        assert dst_pixels[:, 0].min() >= 0 and dst_pixels[:, 0].max() < w
        assert dst_pixels[:, 1].min() >= 0 and dst_pixels[:, 1].max() < h

        # Sanity check: point_color and line_color.
        if point_color is not None:
            assert len(point_color) in {3, 4}
        if line_color is not None:
            assert len(line_color) in {3, 4}

        # Concatenate images.
        im_corres = np.concatenate((im_src, im_dst), axis=1)

        # Sample corres.
        if sample_ratio is not None:
            assert sample_ratio > 0.0 and sample_ratio <= 1.0
            num_points = len(src_pixels)
            num_samples = int(round(num_points * sample_ratio))
            sample_indices = np.random.choice(num_points, num_samples, replace=False)
            src_pixels = src_pixels[sample_indices]
            dst_pixels = dst_pixels[sample_indices]
            confidences = confidences[sample_indices]

        # Draw points.
        if point_color is not None:
            assert len(point_color) == 4 or len(point_color) == 3
            assert np.min(point_color) >= 0.0 and np.max(point_color) <= 1.0

            if confidences is None:
                # Draw white points as mask.
                im_point_mask = np.zeros(im_corres.shape[:2], dtype=im_corres.dtype)
                for (src_c, src_r), (dst_c, dst_r) in zip(src_pixels, dst_pixels):
                    cv2.circle(
                        im_point_mask,
                        (src_c, src_r),
                        point_size,
                        (1,),
                        -1,
                    )
                    cv2.circle(
                        im_point_mask,
                        (dst_c + w, dst_r),
                        point_size,
                        (1,),
                        -1,
                    )
                point_alpha = point_color[3] if len(point_color) == 4 else 1.0
                point_color = point_color[:3]
                im_corres = overlay_mask_on_rgb(
                    im_corres,
                    im_point_mask,
                    overlay_alpha=point_alpha,
                    overlay_color=point_color,
                )
            else:
                # Query color map for colors, given confidences from 0-1.
                colors = colormap.query(confidences, colormap="viridis")

                # Draw points.
                for (src_c, src_r), (dst_c, dst_r), color in zip(
                    src_pixels, dst_pixels, colors
                ):
                    cv2.circle(
                        im_corres,
                        (src_c, src_r),
                        point_size,
                        tuple(color.tolist()),
                        -1,
                    )
                    cv2.circle(
                        im_corres,
                        (dst_c + w, dst_r),
                        point_size,
                        tuple(color.tolist()),
                        -1,
                    )

        # Draw lines.
        if line_color is not None:
            assert len(line_color) == 4 or len(line_color) == 3
            assert np.min(line_color) >= 0.0 and np.max(line_color) <= 1.0

            # Draw white lines as mask.
            im_line_mask = np.zeros(im_corres.shape[:2], dtype=im_corres.dtype)
            for (src_c, src_r), (dst_c, dst_r) in zip(src_pixels, dst_pixels):
                cv2.line(
                    im_line_mask, (src_c, src_r), (dst_c + w, dst_r), (1,), line_width
                )

            line_alpha = line_color[3] if len(line_color) == 4 else 1.0
            line_color = line_color[:3]
            im_corres = overlay_mask_on_rgb(
                im_corres,
                im_line_mask,
                overlay_alpha=line_alpha,
                overlay_color=line_color,
            )

    # Draw texts.
    if texts:

        def get_scales(im_height, max_lines, font, line_text_h_ratio):
            (_, text_h), _ = cv2.getTextSize("ABCDE", font, 1, 1)
            text_h = text_h * line_text_h_ratio
            expected_text_h = im_height / max_lines

            font_scale = int(round(expected_text_h / text_h))
            (_, text_h), _ = cv2.getTextSize("ABCDE", font, font_scale, 1)
            line_h = text_h * line_text_h_ratio
            line_h = int(round(line_h))
            text_h = int(round(text_h))

            return font_scale, line_h, text_h

        font = cv2.FONT_HERSHEY_DUPLEX
        line_text_h_ratio = 1.2
        max_lines = 20
        font_scale, line_h, text_h = get_scales(
            im_corres.shape[0], max_lines, font, line_text_h_ratio
        )
        font_thickness = 2
        org = (line_h, line_h * 2)

        for text in texts:
            im_corres = cv2.putText(
                im_corres,
                text,
                org,
                font,
                font_scale,
                text_color,
                font_thickness,
                cv2.LINE_AA,
            )
            org = (org[0], org[1] + line_h)

    assert im_corres.min() >= 0.0 and im_corres.max() <= 1.0

    return im_corres




[docs]
def vstack_images(
    ims: List[Float[np.ndarray, "h w 3"]],
    alignment: Literal["left", "center", "right"] = "left",
    background_color: Tuple[float, float, float] = (1.0, 1.0, 1.0),
) -> Float[np.ndarray, "h_stacked w_stacked 3"]:
    """
    Vertically stack multiple images with optional alignment and background color.

    Args:
        ims: List of RGB images in range [0.0, 1.0].
        alignment: Horizontal alignment of images in the stack.
            Must be one of:

            - "left": Align images to the left
            - "center": Center align images
            - "right": Align images to the right

            Defaults to "left".
        background_color: Background color for the stacked image as (R, G, B) values
            in range [0.0, 1.0]. Defaults to white (1.0, 1.0, 1.0).

    Returns:
        Stacked image.

    Raises:
        ValueError: If images have invalid shapes, dtypes, or value ranges, or if
            alignment is not one of the allowed values.
    """
    for im in ims:
        if im.ndim != 3 or im.shape[2] != 3:
            raise ValueError("Each image must be 3D with 3 channels.")
        if im.dtype not in [np.float32, np.float64]:
            raise ValueError("Image dtype must be float32/float64.")
        if im.min() < 0.0 or im.max() > 1.0:
            raise ValueError("Pixels must be in [0.0, 1.0].")
    if not all(0 <= c <= 1 for c in background_color):
        raise ValueError(
            f"background_color must be 3 floats in the range [0, 1], "
            f"but got {background_color}."
        )
    valid_alignments = ["left", "center", "right"]
    if alignment not in valid_alignments:
        raise ValueError(
            f"Invalid alignment: '{alignment}', must be one of {valid_alignments}."
        )

    max_width = max(im.shape[1] for im in ims)
    total_height = sum(im.shape[0] for im in ims)

    im_stacked = np.ones((total_height, max_width, 3), dtype=np.float32)
    im_stacked = im_stacked * np.array(background_color).reshape(1, 1, 3)

    curr_row = 0
    for im in ims:
        offset = (
            (max_width - im.shape[1]) // 2
            if alignment == "center"
            else max_width - im.shape[1] if alignment == "right" else 0
        )
        im_stacked[curr_row : curr_row + im.shape[0], offset : offset + im.shape[1]] = (
            im
        )
        curr_row += im.shape[0]

    return im_stacked