#!/bin/bash # Train Module C (Intervention Policy) on 4x RTX 5090. # # Stage 1 — Behavior Cloning: all 4 GPUs (DDP, BF16) # Stage 2 — PPO fine-tuning: GPU-0 only (inherently sequential) # # Usage: # bash scripts/run_intervention.sh # bash scripts/run_intervention.sh data/processed/train.jsonl set -e TRAIN_DATA="${1:-data/processed/train.jsonl}" CONFIG="configs/intervention_config.yaml" NUM_GPUS=4 echo "==============================================" echo " CompanionGuard-RL — Module C: Intervention" echo " Stage 1 (BC) : ${NUM_GPUS}x GPU (DDP, BF16)" echo " Stage 2 (PPO) : GPU-0 only" echo " Config : ${CONFIG}" echo " Train data : ${TRAIN_DATA}" echo "==============================================" ACTUAL_GPUS=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l) if [ "$ACTUAL_GPUS" -lt "$NUM_GPUS" ]; then echo "[WARN] Expected ${NUM_GPUS} GPUs, found ${ACTUAL_GPUS}. Adjusting." NUM_GPUS=$ACTUAL_GPUS fi accelerate launch \ --num_processes=${NUM_GPUS} \ --mixed_precision=bf16 \ --multi_gpu \ scripts/train_intervention.py \ --config ${CONFIG} \ --train-data ${TRAIN_DATA}