detector:
  checkpoint: "checkpoints/detector/best.pt"
  # Server 2 path — update this when running on server 2
  model_name: "/root/siton-data-740d234e02d749f08fe5347b0c74c49f/zsy/macbert-large"
  hidden_size: 1024

agent:
  state_hidden: 256
  dropout: 0.1

# Stage 1: Behavior cloning warm-up
behavior_cloning:
  enabled: true
  epochs: 5
  per_gpu_batch_size: 256
  lr: 0.001
  mixed_precision: "bf16"

# Stage 2: PPO runs on GPU-0 only
ppo:
  total_timesteps: 200000
  n_rollout_steps: 2048
  n_epochs: 4
  batch_size: 256
  lr: 0.0003
  clip_eps: 0.2
  entropy_coef: 0.01
  value_coef: 0.5
  max_grad_norm: 0.5
  gamma: 0.99
  gae_lambda: 0.95

environment:
  max_turns: 20

evaluation:
  binary_threshold: 0.5

preprocessing:
  per_gpu_batch_size: 64

logging:
  project:   "CompanionGuard-RL"
  run_name:  "intervention-v5-1gpu"
  use_wandb: false

output:
  checkpoint_dir: "checkpoints/intervention"
  save_interval:  10000