"""Evaluation module."""
import geopandas as gpd
import numpy as np
import pandas as pd
from deepforest import IoU
from deepforest.utilities import __pandas_to_geodataframe__
def _empty_result_dataframe_(group, image_path, task="box"):
"""Create an empty result dataframe for images with no predictions."""
result_dict = {
"truth_id": group.index.values,
"prediction_id": pd.Series([None] * len(group), dtype="object"),
"geometry": group.geometry,
"image_path": image_path,
"match": pd.Series([False] * len(group), dtype="bool"),
"score": pd.Series([None] * len(group), dtype="float64"),
"predicted_label": pd.Series([None] * len(group), dtype="object"),
"true_label": group.label,
}
if task == "box" or task == "polygon":
result_dict.update(
{
"IoU": pd.Series([0.0] * len(group), dtype="float64"),
}
)
return pd.DataFrame(result_dict)
[docs]def match_predictions(predictions, ground_df, task="box"):
"""Compute intersection-over-union matching among prediction and ground
truth geometries for one image. The returned results are guaranteed to be
at most one-to-one, but are not filtered for "quality" of match (i.e. IoU
threshold).
Args:
predictions: a geopandas dataframe with geometry columns
ground_df: a geopandas dataframe with geometry columns
Returns:
result: pandas dataframe with crown ids of prediction and ground truth and the IoU score.
"""
plot_names = predictions["image_path"].unique()
if len(plot_names) > 1:
raise ValueError(f"More than one plot passed to image crown: {plot_names}")
# match
if task in ["box", "polygon"]:
result = IoU.match_polygons(ground_df, predictions)
elif task == "point":
result = IoU.match_points(ground_df, predictions, norm="l2")
else:
raise NotImplementedError(f"Geometry type {task} not implemented")
# Map prediction/truth IDs back to their original labels from input dataframes
pred_label_dict = predictions.label.to_dict()
ground_label_dict = ground_df.label.to_dict()
result["predicted_label"] = result.prediction_id.map(pred_label_dict)
result["true_label"] = result.truth_id.map(ground_label_dict)
return result
[docs]def compute_class_recall(results):
"""Given a set of evaluations, what proportion of predicted boxes match.
True boxes which are not matched to predictions do not count against
accuracy.
"""
# Per class recall and precision
class_recall_dict = {}
class_precision_dict = {}
class_size = {}
box_results = results[results.predicted_label.notna()]
if box_results.empty:
print("No predictions made")
class_recall = None
return class_recall
# Get all labels from both predictions and ground truth
predicted_labels = set(box_results["predicted_label"].dropna())
true_labels = set(box_results["true_label"].dropna())
all_labels = predicted_labels.union(true_labels)
for label in all_labels:
# Recall: of all ground truth boxes with this label, how many were correctly predicted?
ground_df = box_results[box_results["true_label"] == label]
n_ground_boxes = ground_df.shape[0]
if n_ground_boxes > 0:
class_recall_dict[label] = (
sum(ground_df.true_label == ground_df.predicted_label) / n_ground_boxes
)
# Precision: of all predictions with this label, how many were correct?
pred_df = box_results[box_results["predicted_label"] == label]
n_pred_boxes = pred_df.shape[0]
if n_pred_boxes > 0:
class_precision_dict[label] = (
sum(pred_df.true_label == pred_df.predicted_label) / n_pred_boxes
)
class_size[label] = n_ground_boxes
# fillna(0) handles labels with no ground truth (recall=0) or no predictions (precision=0)
class_recall = (
pd.DataFrame(
{
"recall": pd.Series(class_recall_dict),
"precision": pd.Series(class_precision_dict),
"size": pd.Series(class_size),
}
)
.reset_index(names="label")
.fillna(0)
.sort_values("label")
)
return class_recall
def __evaluate_wrapper__(
predictions: pd.DataFrame | gpd.GeoDataFrame,
ground_df: pd.DataFrame | gpd.GeoDataFrame,
numeric_to_label_dict: dict,
iou_threshold: float = 0.4,
l2_threshold: float = 10.0,
geometry_type: str | None = "box",
) -> dict:
"""Evaluate a set of predictions against ground truth
Args:
predictions: a pandas dataframe with a root dir attribute is needed to give the relative path of files in df.name. The labels in ground truth and predictions must match. If one is numeric, the other must be numeric.
ground_df: a pandas dataframe with a root dir attribute is needed to give the relative path of files in df.name
iou_threshold: intersection-over-union threshold, see deepforest.evaluate
numeric_to_label_dict: mapping from numeric class codes to string labels
geometry_type: 'box', 'polygon' or 'point'
Returns:
results: a dictionary of results with keys, results, box_recall, box_precision, class_recall
"""
# Convert labels to consistent types prior to eval
# Use shallow copy to avoid duplicating large data arrays
predictions = predictions.copy(deep=False)
ground_df = ground_df.copy(deep=False)
# Apply numeric_to_label_dict mapping to ensure type consistency. Checking
# for labels guards against empty frames.
if not predictions.empty and "label" in predictions.columns:
predictions["label"] = predictions["label"].map(
lambda x: numeric_to_label_dict.get(x, x) if pd.notnull(x) else x
)
if not ground_df.empty and "label" in ground_df.columns:
ground_df["label"] = ground_df["label"].map(
lambda x: numeric_to_label_dict.get(x, x) if pd.notnull(x) else x
)
results = evaluate_geometry(
predictions=predictions,
ground_df=ground_df,
iou_threshold=iou_threshold,
distance_threshold=l2_threshold,
geometry_type=geometry_type,
)
# Store the converted predictions for reference
if results["results"] is not None:
results["predictions"] = predictions
return results
[docs]def evaluate_boxes(
predictions: pd.DataFrame | gpd.GeoDataFrame,
ground_df: pd.DataFrame | gpd.GeoDataFrame,
iou_threshold: float = 0.4,
) -> dict:
"""Evaluate bounding box predictions against ground truth. Calls
evaluate_geometry.
Args:
predictions: a pandas dataframe with geometry columns. The labels in ground truth and predictions must match. If one is numeric, the other must be numeric.
ground_df: a pandas dataframe with geometry columns
iou_threshold: intersection-over-union threshold, see deepforest.evaluate
Returns:
results: a dictionary of results with keys, results, box_recall, box_precision, class_recall
"""
return evaluate_geometry(
predictions=predictions,
ground_df=ground_df,
iou_threshold=iou_threshold,
geometry_type="box",
)
[docs]def evaluate_geometry(
predictions: pd.DataFrame | gpd.GeoDataFrame,
ground_df: pd.DataFrame | gpd.GeoDataFrame,
iou_threshold: float = 0.4,
distance_threshold: float = 10.0,
geometry_type: str = "box",
) -> dict:
"""Image annotated crown evaluation routine submission can be submitted as
a .shp, existing pandas dataframe or .csv path.
Args:
predictions: a pandas dataframe with geometry columns. The labels in ground truth and predictions must match. If one is numeric, the other must be numeric.
ground_df: a pandas dataframe with geometry columns
iou_threshold: intersection-over-union threshold, see deepforest.evaluate
l2_threshold: L2 distance threshold for point matching
geometry_type: 'box', 'polygon' or 'point'
Returns:
results: a dataframe of match bounding boxes
box_recall: proportion of true positives of box position, regardless of class
box_precision: proportion of predictions that are true positive, regardless of class
class_recall: a pandas dataframe of class level recall and precision with class sizes
"""
if geometry_type not in ["box", "polygon", "point"]:
raise ValueError(
f"Unknown geometry type {geometry_type}. Must be one of 'box', 'polygon' or 'point'."
)
# If no predictions, return 0 recall, NaN precision
if predictions.empty:
return {
"results": None,
f"{geometry_type}_recall": 0,
f"{geometry_type}_precision": np.nan,
"class_recall": None,
"predictions": predictions,
"ground_df": ground_df,
}
elif not isinstance(predictions, gpd.GeoDataFrame):
predictions = __pandas_to_geodataframe__(predictions)
# Remove empty ground truth boxes
if geometry_type == "box":
ground_df = ground_df[
~(
(ground_df.xmin == 0)
& (ground_df.xmax == 0)
& (ground_df.ymin == 0)
& (ground_df.ymax == 0)
)
]
elif geometry_type == "polygon":
ground_df = ground_df[~ground_df.geometry.is_empty]
elif geometry_type == "point":
ground_df = ground_df[~((ground_df.x == 0) & (ground_df.y == 0))]
# If all empty ground truth, return 0 recall and precision
if ground_df.empty:
return {
"results": None,
f"{geometry_type}_recall": None,
f"{geometry_type}_precision": 0,
"class_recall": None,
"predictions": predictions,
"ground_df": ground_df,
}
if not isinstance(ground_df, gpd.GeoDataFrame):
ground_df = __pandas_to_geodataframe__(ground_df)
# Pre-group predictions by image
predictions_by_image = {
name: group.reset_index(drop=True)
for name, group in predictions.groupby("image_path")
}
# Run evaluation on all plots
results = []
per_image_recalls = []
per_image_precisions = []
for image_path, group in ground_df.groupby("image_path"):
# Predictions for this image
image_predictions = predictions_by_image.get(image_path, pd.DataFrame())
# If empty, add to list without computing IoU
if image_predictions.empty:
# Reset index
group = group.reset_index(drop=True)
result = _empty_result_dataframe_(group, image_path, task=geometry_type)
# An empty prediction set has recall of 0, precision of NA.
per_image_recalls.append(0)
results.append(result)
continue
else:
group = group.reset_index(drop=True)
result = match_predictions(
predictions=image_predictions, ground_df=group, task=geometry_type
)
result["image_path"] = image_path
# Determine matches based on IoU or distance thresholds
if geometry_type == "box" or geometry_type == "polygon":
result["match"] = result.IoU > iou_threshold
elif geometry_type == "point":
result["match"] = result.distance < distance_threshold
# Convert None to False for boolean consistency
result["match"] = result["match"].fillna(False)
true_positive = sum(result["match"])
recall = true_positive / result.shape[0]
precision = true_positive / image_predictions.shape[0]
per_image_recalls.append(recall)
per_image_precisions.append(precision)
results.append(result)
# Concatenate results
if results:
results = pd.concat(results, ignore_index=True)
# Convert back to GeoDataFrame if it has geometry column
if "geometry" in results.columns:
results = gpd.GeoDataFrame(results, geometry="geometry")
else:
columns = [
"truth_id",
"prediction_id",
"predicted_label",
"score",
"match",
"true_label",
"geometry",
"image_path",
]
if geometry_type == "box" or geometry_type == "polygon":
columns.append("IoU")
elif geometry_type == "point":
columns.append("distance")
results = gpd.GeoDataFrame(columns=columns)
mean_precision = np.mean(per_image_precisions)
mean_recall = np.mean(per_image_recalls)
# Only matching boxes are considered in class recall
matched_results = results[results.match]
class_recall = compute_class_recall(matched_results)
return {
"results": results,
f"{geometry_type}_precision": mean_precision,
f"{geometry_type}_recall": mean_recall,
"class_recall": class_recall,
"predictions": predictions,
"ground_df": ground_df,
}