CompanionGuard-RL/code/scripts/run_full_pipeline.sh

#!/bin/bash
# Full CompanionGuard-RL pipeline on 4x RTX 5090.
#
# Step 1: Generate data          (calls LLM API, single process)
# Step 2: Annotate + split       (calls LLM API, single process)
# Step 3: Train detector         (4 GPU DDP, BF16)
# Step 4: Train intervention     (4 GPU BC + 1 GPU PPO)
# Step 5: Evaluate               (single GPU)
#
# Usage:
#   export DASHSCOPE_API_KEY=your_key   # for Qwen
#   bash scripts/run_full_pipeline.sh

set -e

NUM_GPUS=4
echo "======================================================"
echo " CompanionGuard-RL Full Pipeline — 4x RTX 5090"
echo "======================================================"

# ── Step 1: Data generation ────────────────────────────────────────────
echo ""
echo "[1/5] Generating dataset..."
python scripts/generate_data.py --config configs/data_generation.yaml

# ── Step 2: LLM annotation + split ─────────────────────────────────────
echo ""
echo "[2/5] Annotating and splitting dataset..."
python scripts/annotate_data.py \
    --input  data/raw/generated.jsonl \
    --output data/processed/annotated.jsonl \
    --config configs/data_generation.yaml

# ── Step 3: Train detector ──────────────────────────────────────────────
echo ""
echo "[3/5] Training risk detector (4 GPU DDP, BF16)..."
accelerate launch \
    --num_processes=${NUM_GPUS} \
    --mixed_precision=bf16 \
    --multi_gpu \
    scripts/train_detector.py \
    --config configs/detector_config.yaml

# ── Step 4: Train intervention policy ──────────────────────────────────
echo ""
echo "[4/5] Training intervention policy (BC: 4 GPU, PPO: 1 GPU)..."
accelerate launch \
    --num_processes=${NUM_GPUS} \
    --mixed_precision=bf16 \
    --multi_gpu \
    scripts/train_intervention.py \
    --config     configs/intervention_config.yaml \
    --train-data data/processed/train.jsonl

# ── Step 5: Evaluate ────────────────────────────────────────────────────
echo ""
echo "[5/5] Evaluating..."
python scripts/evaluate.py \
    --detector-ckpt checkpoints/detector/best.pt \
    --agent-ckpt    checkpoints/intervention/final.pt \
    --test-data     data/processed/test.jsonl \
    --config        configs/detector_config.yaml \
    --intervention-config configs/intervention_config.yaml \
    --output        experiments/eval_results.json

echo ""
echo "======================================================"
echo " Pipeline complete. Results: experiments/eval_results.json"
echo "======================================================"