model: name: "hfl/chinese-macbert-large" hidden_size: 1024 num_heads: 8 dropout: 0.1 use_lora: false data: train_path: "data/processed/CompanionRisk-Bench/train.jsonl" val_path: "data/processed/CompanionRisk-Bench/dev.jsonl" test_path: "data/processed/CompanionRisk-Bench/test.jsonl" max_persona_len: 128 max_context_len: 512 max_response_len: 256 max_history_turns: 5 num_workers: 0 # 0 for Windows (avoids multiprocessing issues); set to 4 on Linux training: epochs: 10 per_gpu_batch_size: 16 # single GPU: 16; 4 GPUs: use 32 (effective 128) gradient_accumulation_steps: 2 # effective_batch = 16 × 1 GPU × 2 = 32 lr: 2e-5 warmup_steps: 100 weight_decay: 0.01 gradient_clip: 1.0 eval_steps: 100 # global steps between validation runs mixed_precision: "bf16" # RTX 5090: bf16; RTX 30xx/40xx: fp16; CPU-only: no seed: 42 loss_weights: binary: 1.0 level: 1.0 primary: 1.0 fine: 1.0 # 下次训练建议提升到 2.0,配合 fine_training 选项 # Fine-grained label training options(下次训练时开启,当前 best.pt 不受影响) fine_training: use_pos_weight: false # 改为 true 开启 pos_weight(下次训练) risky_only: false # 改为 true 开启(下次训练) evaluation: binary_threshold: 0.5 fine_threshold: 0.4 logging: project: "CompanionGuard-RL" run_name: "detector-macbert-v1" use_wandb: false # set true if wandb is configured output: checkpoint_dir: "checkpoints/detector"