Source code for nv_ingest_api.util.image_processing.clustering

import logging
from typing import List
from typing import Optional


logger = logging.getLogger(__name__)


[docs] def boxes_are_close_or_overlap(b1: List[int], b2: List[int], threshold: float = 10.0) -> bool: """ Determine if two bounding boxes either overlap or are within a certain distance threshold. The function expands each bounding box by `threshold` in all directions and checks if the expanded regions overlap on both the x-axis and y-axis. Parameters ---------- b1 (tuple): The first bounding box, in the format (xmin, ymin, xmax, ymax). b2 (tuple): The second bounding box, in the same format. threshold (float, optional): The distance (in pixels or points) by which to expand each bounding box before checking for overlap. Defaults to 10.0. Returns ------- bool: True if the two bounding boxes overlap or are within the specified threshold distance of each other, False otherwise. Example ------- >>> box1 = (100, 100, 150, 150) >>> box2 = (160, 110, 200, 140) >>> boxes_are_close_or_overlap(box1, box2, threshold=10) True # Because box2 is within 10 pixels of box1 along the x-axis """ (xmin1, ymin1, xmax1, ymax1) = b1 (xmin2, ymin2, xmax2, ymax2) = b2 # Expand each box by 'threshold' in all directions and see if they overlap expanded_b1 = (xmin1 - threshold, ymin1 - threshold, xmax1 + threshold, ymax1 + threshold) expanded_b2 = (xmin2 - threshold, ymin2 - threshold, xmax2 + threshold, ymax2 + threshold) # Check overlap on expanded boxes (exmin1, eymin1, exmax1, eymax1) = expanded_b1 (exmin2, eymin2, exmax2, eymax2) = expanded_b2 overlap_x_expanded = not (exmax1 < exmin2 or exmax2 < exmin1) overlap_y_expanded = not (eymax1 < eymin2 or eymax2 < eymin1) return overlap_x_expanded and overlap_y_expanded
[docs] def group_bounding_boxes( boxes: List[List[int]], threshold: float = 10.0, max_num_boxes: int = 1_000, max_depth: Optional[int] = None ) -> List[List[int]]: """ Group bounding boxes that either overlap or lie within a given proximity threshold. This function first checks whether the number of bounding boxes exceeds `max_num_boxes`, returning an empty list if it does (to avoid excessive computation). Then, it builds an adjacency list by comparing each pair of bounding boxes (using `boxes_are_close_or_overlap`). Any bounding boxes determined to be within `threshold` distance (or overlapping) are treated as connected. Using a Depth-First Search (DFS), we traverse these connections to form groups (connected components). Each group is a list of indices referencing bounding boxes in the original `boxes` list. Parameters ---------- boxes (list of tuple): A list of bounding boxes in the format (xmin, ymin, xmax, ymax). threshold (float, optional): The distance threshold used to determine if two boxes are considered "close enough" to be in the same group. Defaults to 10.0. max_num_boxes (int, optional): The maximum number of bounding boxes to process. If the length of `boxes` exceeds this, a warning is logged and the function returns an empty list. Defaults to 1,000. max_depth (int, optional): The maximum depth for the DFS. If None, there is no limit to how many layers deep the search may go when forming connected components. If set, bounding boxes beyond that depth in the adjacency graph will not be included in the group. Defaults to None. Returns ------- list of list of int: Each element is a list (group) containing the indices of bounding boxes that are connected (overlapping or within `threshold` distance of each other). """ n = len(boxes) if n > max_num_boxes: logger.warning( "Number of bounding boxes (%d) exceeds the maximum allowed (%d). " "Skipping image grouping to avoid high computational overhead.", n, max_num_boxes, ) return [] visited = [False] * n adjacency_list = [[] for _ in range(n)] # Build adjacency by checking closeness/overlap for i in range(n): for j in range(i + 1, n): if boxes_are_close_or_overlap(boxes[i], boxes[j], threshold): adjacency_list[i].append(j) adjacency_list[j].append(i) # DFS to get connected components def dfs(start): stack = [(start, 0)] # (node, depth) component = [] while stack: node, depth = stack.pop() if not visited[node]: visited[node] = True component.append(node) # If we haven't reached max_depth (if max_depth is set) if max_depth is None or depth < max_depth: for neighbor in adjacency_list[node]: if not visited[neighbor]: stack.append((neighbor, depth + 1)) return component groups = [] for i in range(n): if not visited[i]: comp = dfs(i) groups.append(comp) return groups
[docs] def combine_groups_into_bboxes( boxes: List[List[int]], groups: List[List[int]], min_num_components: int = 1 ) -> List[List[int]]: """ Merge bounding boxes based on grouped indices. Given: - A list of bounding boxes (`boxes`), each in the form (xmin, ymin, xmax, ymax). - A list of groups (`groups`), where each group is a list of indices referring to bounding boxes in `boxes`. For each group, this function: 1. Collects all bounding boxes in that group. 2. Computes a single bounding box that tightly encompasses all of those bounding boxes by taking the minimum of all xmins and ymins, and the maximum of all xmaxs and ymaxs. 3. If the group has fewer than `min_num_components` bounding boxes, it is skipped. Parameters ---------- boxes (list of tuple): The original bounding boxes, each in (xmin, ymin, xmax, ymax) format. groups (list of list of int): A list of groups, where each group is a list of indices into `boxes`. min_num_components (int, optional): The minimum number of bounding boxes a group must have to produce a merged bounding box. Defaults to 1. Returns ------- list of list of int: A list of merged bounding boxes, one for each group that meets or exceeds `min_num_components`. Each bounding box is in the format (xmin, ymin, xmax, ymax). """ combined = [] for group in groups: if len(group) < min_num_components: continue xmins = [] ymins = [] xmaxs = [] ymaxs = [] for idx in group: (xmin, ymin, xmax, ymax) = boxes[idx] xmins.append(xmin) ymins.append(ymin) xmaxs.append(xmax) ymaxs.append(ymax) group_xmin = min(xmins) group_ymin = min(ymins) group_xmax = max(xmaxs) group_ymax = max(ymaxs) combined.append([group_xmin, group_ymin, group_xmax, group_ymax]) return combined
[docs] def remove_superset_bboxes(bboxes: List[List[int]]) -> List[List[int]]: """ Remove any bounding box that strictly contains another bounding box. Specifically, for each bounding box `box_a`, if it fully encloses another bounding box `box_b` in all dimensions (with at least one edge strictly larger rather than exactly equal), then `box_a` is excluded from the results. Parameters ---------- bboxes (List[List[int]]): A list of bounding boxes, where each bounding box is a list or tuple of four integers in the format: [x_min, y_min, x_max, y_max]. Returns ------- List[List[int]]: A new list of bounding boxes, excluding those that are strict supersets of any other bounding box in `bboxes`. Example ------- >>> bboxes = [ ... [0, 0, 5, 5], # box A ... [1, 1, 2, 2], # box B ... [3, 3, 4, 4] # box C ... ] >>> # Box A strictly encloses B and C, so it is removed >>> remove_superset_bboxes(bboxes) [[1, 1, 2, 2], [3, 3, 4, 4]] """ results = [] for i, box_a in enumerate(bboxes): xA_min, yA_min, xA_max, yA_max = box_a # Flag to mark if we should exclude this box exclude_a = False for j, box_b in enumerate(bboxes): if i == j: continue xB_min, yB_min, xB_max, yB_max = box_b # Check if box_a strictly encloses box_b: # 1) xA_min <= xB_min, yA_min <= yB_min, xA_max >= xB_max, yA_max >= yB_max # 2) At least one of those inequalities is strict, meaning they're not equal on all edges if xA_min <= xB_min and yA_min <= yB_min and xA_max >= xB_max and yA_max >= yB_max: # box_a is a strict superset => remove it exclude_a = True break if not exclude_a: results.append(box_a) return results