#!/bin/bash # Train Module B (Risk Detector) on 4x RTX 5090. # # Usage: # bash scripts/run_detector.sh # bash scripts/run_detector.sh --config configs/detector_config.yaml # # NVLink not required: DDP communicates via PCIe (sufficient for MacBERT-large). # Mixed precision: BF16 (native on RTX 5090, ~2x throughput vs FP32). set -e CONFIG="${1:---config configs/detector_config.yaml}" NUM_GPUS=4 echo "==============================================" echo " CompanionGuard-RL — Module B: Detector" echo " GPUs : ${NUM_GPUS}x RTX 5090 (PCIe DDP)" echo " Precision : BF16" echo " Config : ${CONFIG}" echo "==============================================" # Verify GPU count ACTUAL_GPUS=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l) if [ "$ACTUAL_GPUS" -lt "$NUM_GPUS" ]; then echo "[WARN] Expected ${NUM_GPUS} GPUs, found ${ACTUAL_GPUS}. Adjusting." NUM_GPUS=$ACTUAL_GPUS fi accelerate launch \ --num_processes=${NUM_GPUS} \ --mixed_precision=bf16 \ --multi_gpu \ scripts/train_detector.py ${CONFIG}