Files
CompanionGuard-RL/tools/run_sota_v2.sh

68 lines
2.2 KiB
Bash
Raw Permalink Normal View History

#!/bin/bash
# Run optimized SOTA baseline evaluation (v2) on CompanionRisk-Bench.
# Adaptations: zh→en translation + companion-specific prompts + threshold=0.3
#
# Usage:
# cd $PROJ && bash tools/run_sota_v2.sh
#
# Prerequisites on server:
# pip install sentencepiece sacremoses # for Helsinki-NLP translation model
set -e
PROJ_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "$PROJ_ROOT/code"
PYTHON="/opt/conda/envs/dlapo-py310-cu128/bin/python"
TEST_DATA="data/processed/CompanionRisk-Bench/test.jsonl"
TRANSLATION_CACHE="../experiments/translation_cache.json"
WILDGUARD_PATH="../wildguard"
SHIELDGEMMA_PATH="/root/siton-data-2849d4ce327c4ccfb233ce33868fe7fe/zsy/shieldgemma-2b"
echo "=============================================="
echo " SOTA Baseline v2 — WildGuard + ShieldGemma"
echo " zh→en translation ON | threshold=0.3"
echo "=============================================="
# ── Install translation dependencies (one-time) ──
echo ""
echo "[0] Checking translation dependencies..."
$PYTHON -c "import sentencepiece" 2>/dev/null || \
pip install sentencepiece sacremoses --quiet
# ── WildGuard v2 ──
echo ""
echo "[1/2] Evaluating WildGuard v2 (translate + companion prompt)..."
$PYTHON scripts/eval_sota_baselines_v2.py \
--model wildguard \
--model-path "$WILDGUARD_PATH" \
--test-data "$TEST_DATA" \
--output "../experiments/eval_sota_wildguard_v2.json" \
--translate \
--translation-cache "$TRANSLATION_CACHE" \
--translate-device cuda \
--threshold 0.3 \
--device cuda
# ── ShieldGemma-2B v2 ──
echo ""
echo "[2/2] Evaluating ShieldGemma-2B v2 (translate + companion policies)..."
$PYTHON scripts/eval_sota_baselines_v2.py \
--model shieldgemma2b \
--model-path "$SHIELDGEMMA_PATH" \
--test-data "$TEST_DATA" \
--output "../experiments/eval_sota_shieldgemma2b_v2.json" \
--translate \
--translation-cache "$TRANSLATION_CACHE" \
--translate-device cuda \
--threshold 0.3 \
--device cuda
echo ""
echo "=============================================="
echo " Done. Results:"
echo " experiments/eval_sota_wildguard_v2.json"
echo " experiments/eval_sota_shieldgemma2b_v2.json"
echo "=============================================="