Source code for deepforest.IoU

"""
IoU Module, with help from https://github.com/SpaceNetChallenge/utilities/blob/spacenetV3/spacenetutilities/evalTools.py
"""

import geopandas as gpd
import numpy as np
import pandas as pd
import shapely
from scipy.optimize import linear_sum_assignment
from shapely import STRtree


def _overlap_all(test_polys: "gpd.GeoDataFrame", truth_polys: "gpd.GeoDataFrame"):
    """Computes intersection and union areas for all polygons in the test/truth
    dataframes.

    For efficient querying, truth polygons are stored in a spatial R-Tree
    and we only compute intersections/unions for matching pairs. The output from the
    function are Numpy arrays containing the all-to-all intersection and union areas and
    the indices of intersecting ground truth and prediction polygons.

    This method works with any Shapely polygon, but may have
    reduced performance for the polygon case where bounding box intersection does
    not necessarily mean the vertices intersect. For rectangles, it's efficient
    as the an r-tree hit is usually a true intersection, depending on how touching
    edge cases are handled.

    Returns:
      intersections  : (n_truth, n_pred) intersection areas
      unions : (n_truth, n_pred) union areas
      truth_ids : (n_truth,) truth index values (order matches rows of areas/unions)
      pred_ids  : (n_pred,) prediction index values (order matches cols of areas/unions)
    """
    # geometry arrays
    pred_geoms = np.asarray(test_polys.geometry.values, dtype=object)
    truth_geoms = np.asarray(truth_polys.geometry.values, dtype=object)

    pred_ids = test_polys.index.to_numpy()
    truth_ids = truth_polys.index.to_numpy()

    n_pred = pred_geoms.size
    n_truth = truth_geoms.size

    # empty cases
    if n_pred == 0 or n_truth == 0:
        return (
            np.zeros((n_truth, n_pred), dtype=float),
            np.zeros((n_truth, n_pred), dtype=float),
            truth_ids,
            pred_ids,
        )

    # spatial index on truth
    tree = STRtree(truth_geoms)
    p_idx, t_idx = tree.query(pred_geoms, predicate="intersects")  # shape (2, M)

    intersections = np.zeros((n_truth, n_pred), dtype=float)
    unions = np.zeros((n_truth, n_pred), dtype=float)

    if p_idx.size:
        inter = shapely.intersection(truth_geoms[t_idx], pred_geoms[p_idx])
        uni = shapely.union(truth_geoms[t_idx], pred_geoms[p_idx])
        intersections[t_idx, p_idx] = shapely.area(inter)
        unions[t_idx, p_idx] = shapely.area(uni)

    return intersections, unions, truth_ids, pred_ids


[docs]def compute_IoU(ground_truth: "gpd.GeoDataFrame", submission: "gpd.GeoDataFrame"):
    """Find area of overlap among all sets of ground truth and prediction.

    This function performs matching between a ground truth dataset and a
    submission or prediction dataset, typically the output from a validation or
    test run. In order to compute IoU, we must know which boxes correspond
    between the datasets. This is performed by Hungarian matching, or linear
    sum assignment.

    For each ground truth polygon, we compute the IoUs of all
    overlapping polygons. Intersection areas are used as the input cost matrix for the assignment and the
    algorithm is such that at most one prediction is assigned to each ground truth,
    and each prediction is only used at most once, with the solver aiming to
    maximise the total area of intersection. The matching indices are then returned,
    along with their IoUs and scores, to be used in downstream metrics like recall
    and precision.

    No filtering on IoU or score is performed.

    Args:
        ground_truth: a projected geopandas dataframe with geoemtry
        submission: a projected geopandas dataframe with geometry
    Returns:
        iou_df: dataframe of IoU scores
    """
    # Compute truth <> prediction overlaps
    intersections, unions, truth_ids, pred_ids = _overlap_all(
        test_polys=submission, truth_polys=ground_truth
    )

    # Cost matrix is the intersection area
    matrix = intersections

    if matrix.size == 0:
        # No matches, early exit
        return pd.DataFrame(
            {
                "prediction_id": pd.Series(dtype="float64"),
                "truth_id": pd.Series(dtype=truth_ids.dtype),
                "IoU": pd.Series(dtype="float64"),
                "score": pd.Series(dtype="float64"),
                "geometry": pd.Series(dtype=object),
            }
        )

    # Linear sum assignment + match lookup
    row_ind, col_ind = linear_sum_assignment(matrix, maximize=True)
    match_for_truth = dict(zip(row_ind, col_ind, strict=False))

    # Score lookup
    pred_scores = submission["score"].to_dict() if "score" in submission.columns else {}

    # IoU matrix
    with np.errstate(divide="ignore", invalid="ignore"):
        iou_mat = np.divide(
            intersections,
            unions,
            out=np.zeros_like(intersections, dtype=float),
            where=unions > 0,
        )

    # build rows for every truth element (unmatched => None, IoU 0)
    records = []
    for t_idx, truth_id in enumerate(truth_ids):
        # If we matched this truth box
        if t_idx in match_for_truth:
            # Look up matching prediction and corresponding IoU and score
            p_idx = match_for_truth[t_idx]
            matched_id = pred_ids[p_idx]
            iou = float(iou_mat[t_idx, p_idx])
            score = pred_scores.get(matched_id, None)
        else:
            matched_id = None
            iou = 0.0
            score = None
        records.append(
            {
                "prediction_id": matched_id,
                "truth_id": truth_id,
                "IoU": iou,
                "score": score,
            }
        )

    # Output dataframe
    iou_df = pd.DataFrame.from_records(records)
    iou_df = iou_df.merge(
        ground_truth.assign(truth_id=truth_ids)[["truth_id", "geometry"]],
        on="truth_id",
        how="left",
    )
    return iou_df