#!/bin/bash
# Train Module C (Intervention Policy) on 4x RTX 5090.
#
# Stage 1 — Behavior Cloning: all 4 GPUs (DDP, BF16)
# Stage 2 — PPO fine-tuning: GPU-0 only (inherently sequential)
#
# Usage:
#   bash scripts/run_intervention.sh
#   bash scripts/run_intervention.sh data/processed/train.jsonl

set -e

TRAIN_DATA="${1:-data/processed/train.jsonl}"
CONFIG="configs/intervention_config.yaml"
NUM_GPUS=4

echo "=============================================="
echo " CompanionGuard-RL — Module C: Intervention"
echo " Stage 1 (BC)  : ${NUM_GPUS}x GPU (DDP, BF16)"
echo " Stage 2 (PPO) : GPU-0 only"
echo " Config        : ${CONFIG}"
echo " Train data    : ${TRAIN_DATA}"
echo "=============================================="

ACTUAL_GPUS=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
if [ "$ACTUAL_GPUS" -lt "$NUM_GPUS" ]; then
    echo "[WARN] Expected ${NUM_GPUS} GPUs, found ${ACTUAL_GPUS}. Adjusting."
    NUM_GPUS=$ACTUAL_GPUS
fi

accelerate launch \
    --num_processes=${NUM_GPUS} \
    --mixed_precision=bf16 \
    --multi_gpu \
    scripts/train_intervention.py \
    --config ${CONFIG} \
    --train-data ${TRAIN_DATA}