chore: initial commit — unified project repo

Merged code repo (CompanionGuard-RL) into single project-level git. Reorganized root: docs/, reference/, experiments/, tmp/active|archives/. Gitignored: data/, checkpoints/, .venv, experiment logs, tmp/archives. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-14 11:28:42 +08:00
commit bd1f51c496
85 changed files with 20568 additions and 0 deletions
--- a/code/configs/intervention_config.yaml
+++ b/code/configs/intervention_config.yaml
@@ -0,0 +1,49 @@
+detector:
+  checkpoint: "checkpoints/detector/best.pt"
+  # Server 2 path — update this when running on server 2
+  model_name: "/root/siton-data-740d234e02d749f08fe5347b0c74c49f/zsy/macbert-large"
+  hidden_size: 1024
+
+agent:
+  state_hidden: 256
+  dropout: 0.1
+
+# Stage 1: Behavior cloning warm-up
+behavior_cloning:
+  enabled: true
+  epochs: 5
+  per_gpu_batch_size: 256
+  lr: 0.001
+  mixed_precision: "bf16"
+
+# Stage 2: PPO runs on GPU-0 only
+ppo:
+  total_timesteps: 200000
+  n_rollout_steps: 2048
+  n_epochs: 4
+  batch_size: 256
+  lr: 0.0003
+  clip_eps: 0.2
+  entropy_coef: 0.01
+  value_coef: 0.5
+  max_grad_norm: 0.5
+  gamma: 0.99
+  gae_lambda: 0.95
+
+environment:
+  max_turns: 20
+
+evaluation:
+  binary_threshold: 0.5
+
+preprocessing:
+  per_gpu_batch_size: 64
+
+logging:
+  project:   "CompanionGuard-RL"
+  run_name:  "intervention-v5-1gpu"
+  use_wandb: false
+
+output:
+  checkpoint_dir: "checkpoints/intervention"
+  save_interval:  10000