Files
RoRD-Layout-Recognation/configs/base_config.yaml

75 lines
1.8 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

training:
learning_rate: 5.0e-5
batch_size: 8
num_epochs: 50
patch_size: 256
scale_jitter_range: [0.8, 1.2]
model:
fpn:
enabled: true
out_channels: 256
levels: [2, 3, 4]
norm: "bn"
# 新增:可切换骨干网络配置(默认为 vgg16保持与现有实现一致
backbone:
name: "vgg16" # 可选vgg16 | resnet34 | efficientnet_b0
pretrained: false # 是否加载 ImageNet 预训练权重(如可用)
# 新增:可选注意力机制(默认关闭,避免影响现有结果)
attention:
enabled: false
type: "none" # 可选none | cbam | se
places: [] # 插入位置backbone_high | det_head | desc_head数组
matching:
keypoint_threshold: 0.5
ransac_reproj_threshold: 5.0
min_inliers: 15
pyramid_scales: [0.75, 1.0, 1.5]
inference_window_size: 1024
inference_stride: 768
use_fpn: true
nms:
enabled: true
radius: 4
score_threshold: 0.5
evaluation:
iou_threshold: 0.5
logging:
use_tensorboard: true
log_dir: "runs"
experiment_name: "baseline"
paths:
layout_dir: "path/to/layouts"
save_dir: "path/to/save"
val_img_dir: "path/to/val/images"
val_ann_dir: "path/to/val/annotations"
template_dir: "path/to/templates"
model_path: "path/to/save/model_final.pth"
# 数据增强与合成数据配置(可选)
augment:
elastic:
enabled: false
alpha: 40
sigma: 6
alpha_affine: 6
prob: 0.3
photometric:
brightness_contrast: true
gauss_noise: true
synthetic:
enabled: false
png_dir: "data/synthetic/png"
ratio: 0.0 # 0~1训练时混合的合成样本比例
diffusion:
enabled: false
png_dir: "data/synthetic_diff/png"
ratio: 0.0 # 0~1训练时混合的扩散样本比例