detector: checkpoint: "checkpoints/detector/best.pt" # Server 2 path — update this when running on server 2 model_name: "/root/siton-data-740d234e02d749f08fe5347b0c74c49f/zsy/macbert-large" hidden_size: 1024 agent: state_hidden: 256 dropout: 0.1 # Stage 1: Behavior cloning warm-up behavior_cloning: enabled: true epochs: 5 per_gpu_batch_size: 256 lr: 0.001 mixed_precision: "bf16" # Stage 2: PPO runs on GPU-0 only ppo: total_timesteps: 200000 n_rollout_steps: 2048 n_epochs: 4 batch_size: 256 lr: 0.0003 clip_eps: 0.2 entropy_coef: 0.01 value_coef: 0.5 max_grad_norm: 0.5 gamma: 0.99 gae_lambda: 0.95 environment: max_turns: 20 evaluation: binary_threshold: 0.5 preprocessing: per_gpu_batch_size: 64 logging: project: "CompanionGuard-RL" run_name: "intervention-v5-1gpu" use_wandb: false output: checkpoint_dir: "checkpoints/intervention" save_interval: 10000