chore: initial commit — unified project repo

Merged code repo (CompanionGuard-RL) into single project-level git. Reorganized root: docs/, reference/, experiments/, tmp/active|archives/. Gitignored: data/, checkpoints/, .venv, experiment logs, tmp/archives. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-14 11:28:42 +08:00
commit bd1f51c496
85 changed files with 20568 additions and 0 deletions
--- a/code/scripts/run_full_pipeline.sh
+++ b/code/scripts/run_full_pipeline.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# Full CompanionGuard-RL pipeline on 4x RTX 5090.
+#
+# Step 1: Generate data          (calls LLM API, single process)
+# Step 2: Annotate + split       (calls LLM API, single process)
+# Step 3: Train detector         (4 GPU DDP, BF16)
+# Step 4: Train intervention     (4 GPU BC + 1 GPU PPO)
+# Step 5: Evaluate               (single GPU)
+#
+# Usage:
+#   export DASHSCOPE_API_KEY=your_key   # for Qwen
+#   bash scripts/run_full_pipeline.sh
+
+set -e
+
+NUM_GPUS=4
+echo "======================================================"
+echo " CompanionGuard-RL Full Pipeline — 4x RTX 5090"
+echo "======================================================"
+
+# ── Step 1: Data generation ────────────────────────────────────────────
+echo ""
+echo "[1/5] Generating dataset..."
+python scripts/generate_data.py --config configs/data_generation.yaml
+
+# ── Step 2: LLM annotation + split ─────────────────────────────────────
+echo ""
+echo "[2/5] Annotating and splitting dataset..."
+python scripts/annotate_data.py \
+    --input  data/raw/generated.jsonl \
+    --output data/processed/annotated.jsonl \
+    --config configs/data_generation.yaml
+
+# ── Step 3: Train detector ──────────────────────────────────────────────
+echo ""
+echo "[3/5] Training risk detector (4 GPU DDP, BF16)..."
+accelerate launch \
+    --num_processes=${NUM_GPUS} \
+    --mixed_precision=bf16 \
+    --multi_gpu \
+    scripts/train_detector.py \
+    --config configs/detector_config.yaml
+
+# ── Step 4: Train intervention policy ──────────────────────────────────
+echo ""
+echo "[4/5] Training intervention policy (BC: 4 GPU, PPO: 1 GPU)..."
+accelerate launch \
+    --num_processes=${NUM_GPUS} \
+    --mixed_precision=bf16 \
+    --multi_gpu \
+    scripts/train_intervention.py \
+    --config     configs/intervention_config.yaml \
+    --train-data data/processed/train.jsonl
+
+# ── Step 5: Evaluate ────────────────────────────────────────────────────
+echo ""
+echo "[5/5] Evaluating..."
+python scripts/evaluate.py \
+    --detector-ckpt checkpoints/detector/best.pt \
+    --agent-ckpt    checkpoints/intervention/final.pt \
+    --test-data     data/processed/test.jsonl \
+    --config        configs/detector_config.yaml \
+    --intervention-config configs/intervention_config.yaml \
+    --output        experiments/eval_results.json
+
+echo ""
+echo "======================================================"
+echo " Pipeline complete. Results: experiments/eval_results.json"
+echo "======================================================"