model: name: "hfl/chinese-macbert-large" hidden_size: 1024 num_heads: 8 dropout: 0.1 use_lora: false data: train_path: "data/processed/train.jsonl" val_path: "data/processed/val.jsonl" test_path: "data/processed/test.jsonl" max_persona_len: 128 max_context_len: 512 max_response_len: 256 max_history_turns: 5 num_workers: 4 # DataLoader worker processes per GPU training: epochs: 10 per_gpu_batch_size: 32 # 4 GPUs × 32 = 128 effective batch per step gradient_accumulation_steps: 1 # effective_batch = per_gpu × n_gpu × accum lr: 2e-5 warmup_steps: 200 weight_decay: 0.01 gradient_clip: 1.0 eval_steps: 200 # global steps between validation runs mixed_precision: "bf16" # RTX 5090 has native BF16; use "fp16" for older GPUs seed: 42 loss_weights: binary: 1.0 level: 1.0 primary: 1.0 fine: 1.0 evaluation: binary_threshold: 0.5 fine_threshold: 0.4 logging: project: "CompanionGuard-RL" run_name: "detector-macbert-4gpu" use_wandb: true output: checkpoint_dir: "checkpoints/detector"