#!/bin/bash # Run optimized SOTA baseline evaluation (v2) on CompanionRisk-Bench. # Adaptations: zh→en translation + companion-specific prompts + threshold=0.3 # # Usage: # cd $PROJ && bash tools/run_sota_v2.sh # # Prerequisites on server: # pip install sentencepiece sacremoses # for Helsinki-NLP translation model set -e PROJ_ROOT="$(cd "$(dirname "$0")/.." && pwd)" cd "$PROJ_ROOT/code" PYTHON="/opt/conda/envs/dlapo-py310-cu128/bin/python" TEST_DATA="data/processed/CompanionRisk-Bench/test.jsonl" TRANSLATION_CACHE="../experiments/translation_cache.json" WILDGUARD_PATH="../wildguard" SHIELDGEMMA_PATH="/root/siton-data-2849d4ce327c4ccfb233ce33868fe7fe/zsy/shieldgemma-2b" echo "==============================================" echo " SOTA Baseline v2 — WildGuard + ShieldGemma" echo " zh→en translation ON | threshold=0.3" echo "==============================================" # ── Install translation dependencies (one-time) ── echo "" echo "[0] Checking translation dependencies..." $PYTHON -c "import sentencepiece" 2>/dev/null || \ pip install sentencepiece sacremoses --quiet # ── WildGuard v2 ── echo "" echo "[1/2] Evaluating WildGuard v2 (translate + companion prompt)..." $PYTHON scripts/eval_sota_baselines_v2.py \ --model wildguard \ --model-path "$WILDGUARD_PATH" \ --test-data "$TEST_DATA" \ --output "../experiments/eval_sota_wildguard_v2.json" \ --translate \ --translation-cache "$TRANSLATION_CACHE" \ --translate-device cuda \ --threshold 0.3 \ --device cuda # ── ShieldGemma-2B v2 ── echo "" echo "[2/2] Evaluating ShieldGemma-2B v2 (translate + companion policies)..." $PYTHON scripts/eval_sota_baselines_v2.py \ --model shieldgemma2b \ --model-path "$SHIELDGEMMA_PATH" \ --test-data "$TEST_DATA" \ --output "../experiments/eval_sota_shieldgemma2b_v2.json" \ --translate \ --translation-cache "$TRANSLATION_CACHE" \ --translate-device cuda \ --threshold 0.3 \ --device cuda echo "" echo "==============================================" echo " Done. Results:" echo " experiments/eval_sota_wildguard_v2.json" echo " experiments/eval_sota_shieldgemma2b_v2.json" echo "=============================================="