Files
CompanionGuard-RL/tmp/active/train_v5.sh

20 lines
855 B
Bash
Raw Normal View History

#!/bin/bash
PROJ=/root/siton-data-740d234e02d749f08fe5347b0c74c49f/zsy/my-reasearch/companionguard-rl
PYTHON=/root/siton-data-740d234e02d749f08fe5347b0c74c49f/zsy/env/dlapo-py310-cu128/bin/python
cd $PROJ
export PYTHONPATH=$PROJ
export CUDA_VISIBLE_DEVICES=1
mkdir -p experiments checkpoints/intervention
LOG=$PROJ/experiments/train_intervention_v5_$(date +%Y%m%d_%H%M%S).log
echo "Starting Module C v5 training (GPU 1, direct python)"
echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
echo "Log: $LOG"
# Run directly without accelerate launcher to avoid CUDA init issues
$PYTHON scripts/train_intervention.py \
--config configs/intervention_config.yaml \
--train-data data/processed/CompanionRisk-Bench/train.jsonl \
>> $LOG 2>&1
EXIT_CODE=$?
echo "v5 training done, exit=$EXIT_CODE" >> $LOG
echo "Training finished with exit=$EXIT_CODE, log=$LOG"