403 lines
13 KiB
Python
403 lines
13 KiB
Python
|
|
"""
|
|||
|
|
FPN vs 滑窗性能对标脚本
|
|||
|
|
|
|||
|
|
功能:比较 FPN 推理路径与传统图像金字塔滑窗路径的性能差异。
|
|||
|
|
|
|||
|
|
输出指标:
|
|||
|
|
- 推理时间(ms)
|
|||
|
|
- 内存占用(MB)
|
|||
|
|
- 检测到的关键点数
|
|||
|
|
- 检测精度(匹配内点数)
|
|||
|
|
|
|||
|
|
使用示例:
|
|||
|
|
uv run python tests/benchmark_fpn.py \
|
|||
|
|
--layout /path/to/layout.png \
|
|||
|
|
--template /path/to/template.png \
|
|||
|
|
--num-runs 5 \
|
|||
|
|
--output benchmark_results.json
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import argparse
|
|||
|
|
import json
|
|||
|
|
import sys
|
|||
|
|
import time
|
|||
|
|
from pathlib import Path
|
|||
|
|
from typing import Dict, List, Tuple
|
|||
|
|
|
|||
|
|
import numpy as np
|
|||
|
|
import psutil
|
|||
|
|
import torch
|
|||
|
|
from PIL import Image
|
|||
|
|
|
|||
|
|
# 添加项目根目录到 Python 路径
|
|||
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|||
|
|
|
|||
|
|
from models.rord import RoRD
|
|||
|
|
from utils.config_loader import load_config, to_absolute_path
|
|||
|
|
from utils.data_utils import get_transform
|
|||
|
|
|
|||
|
|
|
|||
|
|
def get_memory_usage() -> float:
|
|||
|
|
"""获取当前进程的内存占用(MB)"""
|
|||
|
|
process = psutil.Process()
|
|||
|
|
return process.memory_info().rss / 1024 / 1024
|
|||
|
|
|
|||
|
|
|
|||
|
|
def get_gpu_memory_usage() -> float:
|
|||
|
|
"""获取 GPU 显存占用(MB)"""
|
|||
|
|
if torch.cuda.is_available():
|
|||
|
|
return torch.cuda.memory_allocated() / 1024 / 1024
|
|||
|
|
return 0
|
|||
|
|
|
|||
|
|
|
|||
|
|
def benchmark_fpn(
|
|||
|
|
model: torch.nn.Module,
|
|||
|
|
layout_image: Image.Image,
|
|||
|
|
template_image: Image.Image,
|
|||
|
|
transform,
|
|||
|
|
matching_cfg,
|
|||
|
|
num_runs: int = 5,
|
|||
|
|
) -> Dict[str, float]:
|
|||
|
|
"""
|
|||
|
|
测试 FPN 性能
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
model: RoRD 模型
|
|||
|
|
layout_image: 大版图
|
|||
|
|
template_image: 模板
|
|||
|
|
transform: 图像预处理管道
|
|||
|
|
matching_cfg: 匹配配置
|
|||
|
|
num_runs: 运行次数
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
性能指标字典
|
|||
|
|
"""
|
|||
|
|
from match import extract_from_pyramid, extract_features_sliding_window, mutual_nearest_neighbor
|
|||
|
|
|
|||
|
|
device = next(model.parameters()).device
|
|||
|
|
times = []
|
|||
|
|
keypoint_counts = []
|
|||
|
|
inlier_counts = []
|
|||
|
|
|
|||
|
|
print(f"\n{'=' * 60}")
|
|||
|
|
print(f"性能测试:FPN 路径")
|
|||
|
|
print(f"{'=' * 60}")
|
|||
|
|
|
|||
|
|
for run in range(num_runs):
|
|||
|
|
# 版图特征提取
|
|||
|
|
layout_tensor = transform(layout_image).unsqueeze(0).to(device)
|
|||
|
|
|
|||
|
|
torch.cuda.synchronize() if torch.cuda.is_available() else None
|
|||
|
|
start_time = time.time()
|
|||
|
|
|
|||
|
|
layout_kps, layout_descs = extract_from_pyramid(
|
|||
|
|
model,
|
|||
|
|
layout_tensor,
|
|||
|
|
float(matching_cfg.keypoint_threshold),
|
|||
|
|
getattr(matching_cfg, 'nms', {})
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 模板特征提取(单尺度,取 1.0)
|
|||
|
|
template_tensor = transform(template_image).unsqueeze(0).to(device)
|
|||
|
|
template_kps, template_descs = extract_from_pyramid(
|
|||
|
|
model,
|
|||
|
|
template_tensor,
|
|||
|
|
float(matching_cfg.keypoint_threshold),
|
|||
|
|
getattr(matching_cfg, 'nms', {})
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 匹配
|
|||
|
|
if len(layout_descs) > 0 and len(template_descs) > 0:
|
|||
|
|
matches = mutual_nearest_neighbor(template_descs, layout_descs)
|
|||
|
|
inlier_count = len(matches)
|
|||
|
|
else:
|
|||
|
|
inlier_count = 0
|
|||
|
|
|
|||
|
|
torch.cuda.synchronize() if torch.cuda.is_available() else None
|
|||
|
|
elapsed = (time.time() - start_time) * 1000 # 转换为 ms
|
|||
|
|
|
|||
|
|
times.append(elapsed)
|
|||
|
|
keypoint_counts.append(len(layout_kps))
|
|||
|
|
inlier_counts.append(inlier_count)
|
|||
|
|
|
|||
|
|
print(f" Run {run + 1}/{num_runs}: {elapsed:.2f}ms, KPs: {len(layout_kps)}, Matches: {inlier_count}")
|
|||
|
|
|
|||
|
|
mean_time = np.mean(times)
|
|||
|
|
std_time = np.std(times)
|
|||
|
|
mean_kps = np.mean(keypoint_counts)
|
|||
|
|
mean_inliers = np.mean(inlier_counts)
|
|||
|
|
gpu_mem = get_gpu_memory_usage()
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
"method": "FPN",
|
|||
|
|
"mean_time_ms": float(mean_time),
|
|||
|
|
"std_time_ms": float(std_time),
|
|||
|
|
"min_time_ms": float(np.min(times)),
|
|||
|
|
"max_time_ms": float(np.max(times)),
|
|||
|
|
"all_times_ms": [float(t) for t in times],
|
|||
|
|
"mean_keypoints": float(mean_kps),
|
|||
|
|
"mean_matches": float(mean_inliers),
|
|||
|
|
"gpu_memory_mb": float(gpu_mem),
|
|||
|
|
"num_runs": num_runs,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
def benchmark_sliding_window(
|
|||
|
|
model: torch.nn.Module,
|
|||
|
|
layout_image: Image.Image,
|
|||
|
|
template_image: Image.Image,
|
|||
|
|
transform,
|
|||
|
|
matching_cfg,
|
|||
|
|
num_runs: int = 5,
|
|||
|
|
) -> Dict[str, float]:
|
|||
|
|
"""
|
|||
|
|
测试滑窗性能(图像金字塔路径)
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
model: RoRD 模型
|
|||
|
|
layout_image: 大版图
|
|||
|
|
template_image: 模板
|
|||
|
|
transform: 图像预处理管道
|
|||
|
|
matching_cfg: 匹配配置
|
|||
|
|
num_runs: 运行次数
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
性能指标字典
|
|||
|
|
"""
|
|||
|
|
from match import extract_features_sliding_window, extract_keypoints_and_descriptors, mutual_nearest_neighbor
|
|||
|
|
|
|||
|
|
device = next(model.parameters()).device
|
|||
|
|
times = []
|
|||
|
|
keypoint_counts = []
|
|||
|
|
inlier_counts = []
|
|||
|
|
|
|||
|
|
print(f"\n{'=' * 60}")
|
|||
|
|
print(f"性能测试:滑窗路径")
|
|||
|
|
print(f"{'=' * 60}")
|
|||
|
|
|
|||
|
|
for run in range(num_runs):
|
|||
|
|
torch.cuda.synchronize() if torch.cuda.is_available() else None
|
|||
|
|
start_time = time.time()
|
|||
|
|
|
|||
|
|
# 版图滑窗特征提取
|
|||
|
|
layout_kps, layout_descs = extract_features_sliding_window(
|
|||
|
|
model,
|
|||
|
|
layout_image,
|
|||
|
|
transform,
|
|||
|
|
matching_cfg
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 模板单尺度特征提取
|
|||
|
|
template_tensor = transform(template_image).unsqueeze(0).to(device)
|
|||
|
|
template_kps, template_descs = extract_keypoints_and_descriptors(
|
|||
|
|
model,
|
|||
|
|
template_tensor,
|
|||
|
|
float(matching_cfg.keypoint_threshold)
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 匹配
|
|||
|
|
if len(layout_descs) > 0 and len(template_descs) > 0:
|
|||
|
|
matches = mutual_nearest_neighbor(template_descs, layout_descs)
|
|||
|
|
inlier_count = len(matches)
|
|||
|
|
else:
|
|||
|
|
inlier_count = 0
|
|||
|
|
|
|||
|
|
torch.cuda.synchronize() if torch.cuda.is_available() else None
|
|||
|
|
elapsed = (time.time() - start_time) * 1000 # 转换为 ms
|
|||
|
|
|
|||
|
|
times.append(elapsed)
|
|||
|
|
keypoint_counts.append(len(layout_kps))
|
|||
|
|
inlier_counts.append(inlier_count)
|
|||
|
|
|
|||
|
|
print(f" Run {run + 1}/{num_runs}: {elapsed:.2f}ms, KPs: {len(layout_kps)}, Matches: {inlier_count}")
|
|||
|
|
|
|||
|
|
mean_time = np.mean(times)
|
|||
|
|
std_time = np.std(times)
|
|||
|
|
mean_kps = np.mean(keypoint_counts)
|
|||
|
|
mean_inliers = np.mean(inlier_counts)
|
|||
|
|
gpu_mem = get_gpu_memory_usage()
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
"method": "Sliding Window",
|
|||
|
|
"mean_time_ms": float(mean_time),
|
|||
|
|
"std_time_ms": float(std_time),
|
|||
|
|
"min_time_ms": float(np.min(times)),
|
|||
|
|
"max_time_ms": float(np.max(times)),
|
|||
|
|
"all_times_ms": [float(t) for t in times],
|
|||
|
|
"mean_keypoints": float(mean_kps),
|
|||
|
|
"mean_matches": float(mean_inliers),
|
|||
|
|
"gpu_memory_mb": float(gpu_mem),
|
|||
|
|
"num_runs": num_runs,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
def compute_speedup(fpn_result: Dict, sw_result: Dict) -> Dict[str, float]:
|
|||
|
|
"""计算 FPN 相对于滑窗的性能改进"""
|
|||
|
|
speedup = (sw_result["mean_time_ms"] - fpn_result["mean_time_ms"]) / sw_result["mean_time_ms"] * 100
|
|||
|
|
memory_saving = (sw_result["gpu_memory_mb"] - fpn_result["gpu_memory_mb"]) / sw_result["gpu_memory_mb"] * 100 if sw_result["gpu_memory_mb"] > 0 else 0
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
"speedup_percent": float(speedup),
|
|||
|
|
"memory_saving_percent": float(memory_saving),
|
|||
|
|
"fpn_faster": speedup > 0,
|
|||
|
|
"meets_speedup_target": speedup >= 30,
|
|||
|
|
"meets_memory_target": memory_saving >= 20,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
def print_results(fpn_result: Dict, sw_result: Dict, comparison: Dict) -> None:
|
|||
|
|
"""打印性能对比结果"""
|
|||
|
|
|
|||
|
|
print(f"\n{'=' * 80}")
|
|||
|
|
print(f"{'性能基准测试结果':^80}")
|
|||
|
|
print(f"{'=' * 80}\n")
|
|||
|
|
|
|||
|
|
print(f"{'指标':<30} {'FPN':<20} {'滑窗':<20}")
|
|||
|
|
print("-" * 70)
|
|||
|
|
|
|||
|
|
print(f"{'平均推理时间 (ms)':<30} {fpn_result['mean_time_ms']:<20.2f} {sw_result['mean_time_ms']:<20.2f}")
|
|||
|
|
print(f"{'标准差 (ms)':<30} {fpn_result['std_time_ms']:<20.2f} {sw_result['std_time_ms']:<20.2f}")
|
|||
|
|
print(f"{'最小时间 (ms)':<30} {fpn_result['min_time_ms']:<20.2f} {sw_result['min_time_ms']:<20.2f}")
|
|||
|
|
print(f"{'最大时间 (ms)':<30} {fpn_result['max_time_ms']:<20.2f} {sw_result['max_time_ms']:<20.2f}")
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
print(f"{'平均关键点数':<30} {fpn_result['mean_keypoints']:<20.0f} {sw_result['mean_keypoints']:<20.0f}")
|
|||
|
|
print(f"{'平均匹配数':<30} {fpn_result['mean_matches']:<20.0f} {sw_result['mean_matches']:<20.0f}")
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
print(f"{'GPU 内存占用 (MB)':<30} {fpn_result['gpu_memory_mb']:<20.2f} {sw_result['gpu_memory_mb']:<20.2f}")
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
print(f"{'=' * 80}")
|
|||
|
|
print(f"{'对标结果':^80}")
|
|||
|
|
print(f"{'=' * 80}\n")
|
|||
|
|
|
|||
|
|
speedup = comparison["speedup_percent"]
|
|||
|
|
memory_saving = comparison["memory_saving_percent"]
|
|||
|
|
|
|||
|
|
print(f"推理速度提升: {speedup:+.2f}% {'✅' if speedup >= 30 else '⚠️'}")
|
|||
|
|
print(f" (目标: ≥30% | 达成: {'是' if comparison['meets_speedup_target'] else '否'})")
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
print(f"内存节省: {memory_saving:+.2f}% {'✅' if memory_saving >= 20 else '⚠️'}")
|
|||
|
|
print(f" (目标: ≥20% | 达成: {'是' if comparison['meets_memory_target'] else '否'})")
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
if speedup > 0:
|
|||
|
|
print(f"🎉 FPN 相比滑窗快 {abs(speedup):.2f}%")
|
|||
|
|
elif speedup < 0:
|
|||
|
|
print(f"⚠️ FPN 相比滑窗慢 {abs(speedup):.2f}%")
|
|||
|
|
else:
|
|||
|
|
print(f"ℹ️ FPN 与滑窗性能相当")
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
parser = argparse.ArgumentParser(description="RoRD FPN vs 滑窗性能对标测试")
|
|||
|
|
parser.add_argument('--config', type=str, default="configs/base_config.yaml", help="YAML 配置文件")
|
|||
|
|
parser.add_argument('--model_path', type=str, default=None, help="模型权重路径")
|
|||
|
|
parser.add_argument('--layout', type=str, required=True, help="版图路径")
|
|||
|
|
parser.add_argument('--template', type=str, required=True, help="模板路径")
|
|||
|
|
parser.add_argument('--num-runs', type=int, default=5, help="每个方法的运行次数")
|
|||
|
|
parser.add_argument('--output', type=str, default="benchmark_results.json", help="输出 JSON 文件路径")
|
|||
|
|
parser.add_argument('--device', type=str, default="cuda", help="使用设备: cuda 或 cpu")
|
|||
|
|
|
|||
|
|
args = parser.parse_args()
|
|||
|
|
|
|||
|
|
# 加载配置
|
|||
|
|
cfg = load_config(args.config)
|
|||
|
|
config_dir = Path(args.config).resolve().parent
|
|||
|
|
matching_cfg = cfg.matching
|
|||
|
|
|
|||
|
|
model_path = args.model_path or str(to_absolute_path(cfg.paths.model_path, config_dir))
|
|||
|
|
|
|||
|
|
# 设置设备
|
|||
|
|
device = torch.device(args.device if torch.cuda.is_available() or args.device == "cpu" else "cpu")
|
|||
|
|
print(f"使用设备: {device}")
|
|||
|
|
|
|||
|
|
# 加载模型
|
|||
|
|
print(f"加载模型: {model_path}")
|
|||
|
|
model = RoRD().to(device)
|
|||
|
|
model.load_state_dict(torch.load(model_path, map_location=device))
|
|||
|
|
model.eval()
|
|||
|
|
|
|||
|
|
# 加载图像
|
|||
|
|
print(f"加载版图: {args.layout}")
|
|||
|
|
layout_image = Image.open(args.layout).convert('L')
|
|||
|
|
print(f" 尺寸: {layout_image.size}")
|
|||
|
|
|
|||
|
|
print(f"加载模板: {args.template}")
|
|||
|
|
template_image = Image.open(args.template).convert('L')
|
|||
|
|
print(f" 尺寸: {template_image.size}")
|
|||
|
|
|
|||
|
|
# 获取预处理管道
|
|||
|
|
transform = get_transform()
|
|||
|
|
|
|||
|
|
# 运行基准测试
|
|||
|
|
print(f"\n{'=' * 80}")
|
|||
|
|
print(f"{'开始性能基准测试':^80}")
|
|||
|
|
print(f"{'=' * 80}")
|
|||
|
|
print(f"运行次数: {args.num_runs}")
|
|||
|
|
print(f"配置: {args.config}")
|
|||
|
|
|
|||
|
|
with torch.no_grad():
|
|||
|
|
fpn_result = benchmark_fpn(
|
|||
|
|
model, layout_image, template_image, transform, matching_cfg, args.num_runs
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 临时禁用 FPN,启用滑窗
|
|||
|
|
original_use_fpn = getattr(matching_cfg, 'use_fpn', True)
|
|||
|
|
matching_cfg.use_fpn = False
|
|||
|
|
|
|||
|
|
sw_result = benchmark_sliding_window(
|
|||
|
|
model, layout_image, template_image, transform, matching_cfg, args.num_runs
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 恢复配置
|
|||
|
|
matching_cfg.use_fpn = original_use_fpn
|
|||
|
|
|
|||
|
|
# 计算对比指标
|
|||
|
|
comparison = compute_speedup(fpn_result, sw_result)
|
|||
|
|
|
|||
|
|
# 打印结果
|
|||
|
|
print_results(fpn_result, sw_result, comparison)
|
|||
|
|
|
|||
|
|
# 保存结果
|
|||
|
|
results = {
|
|||
|
|
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
|
|||
|
|
"config": str(args.config),
|
|||
|
|
"model_path": str(model_path),
|
|||
|
|
"layout_path": str(args.layout),
|
|||
|
|
"layout_size": list(layout_image.size),
|
|||
|
|
"template_path": str(args.template),
|
|||
|
|
"template_size": list(template_image.size),
|
|||
|
|
"device": str(device),
|
|||
|
|
"fpn": fpn_result,
|
|||
|
|
"sliding_window": sw_result,
|
|||
|
|
"comparison": comparison,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
output_path = Path(args.output)
|
|||
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|||
|
|
|
|||
|
|
with open(output_path, 'w') as f:
|
|||
|
|
json.dump(results, f, indent=2)
|
|||
|
|
|
|||
|
|
print(f"\n✅ 结果已保存至: {output_path}")
|
|||
|
|
print(f"{'=' * 80}\n")
|
|||
|
|
|
|||
|
|
# 退出状态码
|
|||
|
|
if comparison["meets_speedup_target"] and comparison["meets_memory_target"]:
|
|||
|
|
print("🎉 所有性能指标均达到预期目标!")
|
|||
|
|
return 0
|
|||
|
|
elif comparison["fpn_faster"]:
|
|||
|
|
print("✅ FPN 性能优于滑窗,但未完全达到目标。")
|
|||
|
|
return 1
|
|||
|
|
else:
|
|||
|
|
print("⚠️ FPN 性能未优于滑窗,需要优化。")
|
|||
|
|
return 2
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
sys.exit(main())
|