Source code for deepforest.evaluate

"""Evaluation module."""

import geopandas as gpd
import numpy as np
import pandas as pd

from deepforest import IoU
from deepforest.utilities import __pandas_to_geodataframe__


def _empty_result_dataframe_(group, image_path, task="box"):
    """Create an empty result dataframe for images with no predictions."""

    result_dict = {
        "truth_id": group.index.values,
        "prediction_id": pd.Series([None] * len(group), dtype="object"),
        "geometry": group.geometry,
        "image_path": image_path,
        "match": pd.Series([False] * len(group), dtype="bool"),
        "score": pd.Series([None] * len(group), dtype="float64"),
        "predicted_label": pd.Series([None] * len(group), dtype="object"),
        "true_label": group.label,
    }

    if task == "box" or task == "polygon":
        result_dict.update(
            {
                "IoU": pd.Series([0.0] * len(group), dtype="float64"),
            }
        )

    return pd.DataFrame(result_dict)


[docs]def match_predictions(predictions, ground_df, task="box"): """Compute intersection-over-union matching among prediction and ground truth geometries for one image. The returned results are guaranteed to be at most one-to-one, but are not filtered for "quality" of match (i.e. IoU threshold). Args: predictions: a geopandas dataframe with geometry columns ground_df: a geopandas dataframe with geometry columns Returns: result: pandas dataframe with crown ids of prediction and ground truth and the IoU score. """ plot_names = predictions["image_path"].unique() if len(plot_names) > 1: raise ValueError(f"More than one plot passed to image crown: {plot_names}") # match if task in ["box", "polygon"]: result = IoU.match_polygons(ground_df, predictions) elif task == "point": result = IoU.match_points(ground_df, predictions, norm="l2") else: raise NotImplementedError(f"Geometry type {task} not implemented") # Map prediction/truth IDs back to their original labels from input dataframes pred_label_dict = predictions.label.to_dict() ground_label_dict = ground_df.label.to_dict() result["predicted_label"] = result.prediction_id.map(pred_label_dict) result["true_label"] = result.truth_id.map(ground_label_dict) return result
[docs]def compute_class_recall(results): """Given a set of evaluations, what proportion of predicted boxes match. True boxes which are not matched to predictions do not count against accuracy. """ # Per class recall and precision class_recall_dict = {} class_precision_dict = {} class_size = {} box_results = results[results.predicted_label.notna()] if box_results.empty: print("No predictions made") class_recall = None return class_recall # Get all labels from both predictions and ground truth predicted_labels = set(box_results["predicted_label"].dropna()) true_labels = set(box_results["true_label"].dropna()) all_labels = predicted_labels.union(true_labels) for label in all_labels: # Recall: of all ground truth boxes with this label, how many were correctly predicted? ground_df = box_results[box_results["true_label"] == label] n_ground_boxes = ground_df.shape[0] if n_ground_boxes > 0: class_recall_dict[label] = ( sum(ground_df.true_label == ground_df.predicted_label) / n_ground_boxes ) # Precision: of all predictions with this label, how many were correct? pred_df = box_results[box_results["predicted_label"] == label] n_pred_boxes = pred_df.shape[0] if n_pred_boxes > 0: class_precision_dict[label] = ( sum(pred_df.true_label == pred_df.predicted_label) / n_pred_boxes ) class_size[label] = n_ground_boxes # fillna(0) handles labels with no ground truth (recall=0) or no predictions (precision=0) class_recall = ( pd.DataFrame( { "recall": pd.Series(class_recall_dict), "precision": pd.Series(class_precision_dict), "size": pd.Series(class_size), } ) .reset_index(names="label") .fillna(0) .sort_values("label") ) return class_recall
def __evaluate_wrapper__( predictions: pd.DataFrame | gpd.GeoDataFrame, ground_df: pd.DataFrame | gpd.GeoDataFrame, numeric_to_label_dict: dict, iou_threshold: float = 0.4, l2_threshold: float = 10.0, geometry_type: str | None = "box", ) -> dict: """Evaluate a set of predictions against ground truth Args: predictions: a pandas dataframe with a root dir attribute is needed to give the relative path of files in df.name. The labels in ground truth and predictions must match. If one is numeric, the other must be numeric. ground_df: a pandas dataframe with a root dir attribute is needed to give the relative path of files in df.name iou_threshold: intersection-over-union threshold, see deepforest.evaluate numeric_to_label_dict: mapping from numeric class codes to string labels geometry_type: 'box', 'polygon' or 'point' Returns: results: a dictionary of results with keys, results, box_recall, box_precision, class_recall """ # Convert labels to consistent types prior to eval # Use shallow copy to avoid duplicating large data arrays predictions = predictions.copy(deep=False) ground_df = ground_df.copy(deep=False) # Apply numeric_to_label_dict mapping to ensure type consistency. Checking # for labels guards against empty frames. if not predictions.empty and "label" in predictions.columns: predictions["label"] = predictions["label"].map( lambda x: numeric_to_label_dict.get(x, x) if pd.notnull(x) else x ) if not ground_df.empty and "label" in ground_df.columns: ground_df["label"] = ground_df["label"].map( lambda x: numeric_to_label_dict.get(x, x) if pd.notnull(x) else x ) results = evaluate_geometry( predictions=predictions, ground_df=ground_df, iou_threshold=iou_threshold, distance_threshold=l2_threshold, geometry_type=geometry_type, ) # Store the converted predictions for reference if results["results"] is not None: results["predictions"] = predictions return results
[docs]def evaluate_boxes( predictions: pd.DataFrame | gpd.GeoDataFrame, ground_df: pd.DataFrame | gpd.GeoDataFrame, iou_threshold: float = 0.4, ) -> dict: """Evaluate bounding box predictions against ground truth. Calls evaluate_geometry. Args: predictions: a pandas dataframe with geometry columns. The labels in ground truth and predictions must match. If one is numeric, the other must be numeric. ground_df: a pandas dataframe with geometry columns iou_threshold: intersection-over-union threshold, see deepforest.evaluate Returns: results: a dictionary of results with keys, results, box_recall, box_precision, class_recall """ return evaluate_geometry( predictions=predictions, ground_df=ground_df, iou_threshold=iou_threshold, geometry_type="box", )
[docs]def evaluate_geometry( predictions: pd.DataFrame | gpd.GeoDataFrame, ground_df: pd.DataFrame | gpd.GeoDataFrame, iou_threshold: float = 0.4, distance_threshold: float = 10.0, geometry_type: str = "box", ) -> dict: """Image annotated crown evaluation routine submission can be submitted as a .shp, existing pandas dataframe or .csv path. Args: predictions: a pandas dataframe with geometry columns. The labels in ground truth and predictions must match. If one is numeric, the other must be numeric. ground_df: a pandas dataframe with geometry columns iou_threshold: intersection-over-union threshold, see deepforest.evaluate l2_threshold: L2 distance threshold for point matching geometry_type: 'box', 'polygon' or 'point' Returns: results: a dataframe of match bounding boxes box_recall: proportion of true positives of box position, regardless of class box_precision: proportion of predictions that are true positive, regardless of class class_recall: a pandas dataframe of class level recall and precision with class sizes """ if geometry_type not in ["box", "polygon", "point"]: raise ValueError( f"Unknown geometry type {geometry_type}. Must be one of 'box', 'polygon' or 'point'." ) # If no predictions, return 0 recall, NaN precision if predictions.empty: return { "results": None, f"{geometry_type}_recall": 0, f"{geometry_type}_precision": np.nan, "class_recall": None, "predictions": predictions, "ground_df": ground_df, } elif not isinstance(predictions, gpd.GeoDataFrame): predictions = __pandas_to_geodataframe__(predictions) # Remove empty ground truth boxes if geometry_type == "box": ground_df = ground_df[ ~( (ground_df.xmin == 0) & (ground_df.xmax == 0) & (ground_df.ymin == 0) & (ground_df.ymax == 0) ) ] elif geometry_type == "polygon": ground_df = ground_df[~ground_df.geometry.is_empty] elif geometry_type == "point": ground_df = ground_df[~((ground_df.x == 0) & (ground_df.y == 0))] # If all empty ground truth, return 0 recall and precision if ground_df.empty: return { "results": None, f"{geometry_type}_recall": None, f"{geometry_type}_precision": 0, "class_recall": None, "predictions": predictions, "ground_df": ground_df, } if not isinstance(ground_df, gpd.GeoDataFrame): ground_df = __pandas_to_geodataframe__(ground_df) # Pre-group predictions by image predictions_by_image = { name: group.reset_index(drop=True) for name, group in predictions.groupby("image_path") } # Run evaluation on all plots results = [] per_image_recalls = [] per_image_precisions = [] for image_path, group in ground_df.groupby("image_path"): # Predictions for this image image_predictions = predictions_by_image.get(image_path, pd.DataFrame()) # If empty, add to list without computing IoU if image_predictions.empty: # Reset index group = group.reset_index(drop=True) result = _empty_result_dataframe_(group, image_path, task=geometry_type) # An empty prediction set has recall of 0, precision of NA. per_image_recalls.append(0) results.append(result) continue else: group = group.reset_index(drop=True) result = match_predictions( predictions=image_predictions, ground_df=group, task=geometry_type ) result["image_path"] = image_path # Determine matches based on IoU or distance thresholds if geometry_type == "box" or geometry_type == "polygon": result["match"] = result.IoU > iou_threshold elif geometry_type == "point": result["match"] = result.distance < distance_threshold # Convert None to False for boolean consistency result["match"] = result["match"].fillna(False) true_positive = sum(result["match"]) recall = true_positive / result.shape[0] precision = true_positive / image_predictions.shape[0] per_image_recalls.append(recall) per_image_precisions.append(precision) results.append(result) # Concatenate results if results: results = pd.concat(results, ignore_index=True) # Convert back to GeoDataFrame if it has geometry column if "geometry" in results.columns: results = gpd.GeoDataFrame(results, geometry="geometry") else: columns = [ "truth_id", "prediction_id", "predicted_label", "score", "match", "true_label", "geometry", "image_path", ] if geometry_type == "box" or geometry_type == "polygon": columns.append("IoU") elif geometry_type == "point": columns.append("distance") results = gpd.GeoDataFrame(columns=columns) mean_precision = np.mean(per_image_precisions) mean_recall = np.mean(per_image_recalls) # Only matching boxes are considered in class recall matched_results = results[results.match] class_recall = compute_class_recall(matched_results) return { "results": results, f"{geometry_type}_precision": mean_precision, f"{geometry_type}_recall": mean_recall, "class_recall": class_recall, "predictions": predictions, "ground_df": ground_df, }