feat: SOTA baseline v2 with zh→en translation + companion-adapted prompts
- eval_sota_baselines_v2.py: optimized eval for WildGuard & ShieldGemma-2B
* ChineseTranslator: Helsinki-NLP/opus-mt-zh-en (local, no API)
* ShieldGemma: +4 companion-specific safety policies (crisis non-response,
dependency reinforcement, isolation reinforcement, minor intimacy)
* WildGuard: companion context injected into prompt + extended keyword parsing
* Default threshold lowered 0.5 → 0.3 for better recall
* Translation cache saved to experiments/translation_cache.json (reusable)
- tools/run_sota_v2.sh: one-command runner for both models on server
- paper/05_moduleB.tex: add †-adapted rows to SOTA table + updated discussion
explaining root causes (language barrier + taxonomy gap) and adaptation results
- paper/07_experiments.tex: update baseline description to include v2 adapted variants
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
67
tools/run_sota_v2.sh
Normal file
67
tools/run_sota_v2.sh
Normal file
@@ -0,0 +1,67 @@
|
||||
#!/bin/bash
|
||||
# Run optimized SOTA baseline evaluation (v2) on CompanionRisk-Bench.
|
||||
# Adaptations: zh→en translation + companion-specific prompts + threshold=0.3
|
||||
#
|
||||
# Usage:
|
||||
# cd $PROJ && bash tools/run_sota_v2.sh
|
||||
#
|
||||
# Prerequisites on server:
|
||||
# pip install sentencepiece sacremoses # for Helsinki-NLP translation model
|
||||
|
||||
set -e
|
||||
|
||||
PROJ_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||||
cd "$PROJ_ROOT/code"
|
||||
|
||||
PYTHON="/opt/conda/envs/dlapo-py310-cu128/bin/python"
|
||||
TEST_DATA="data/processed/CompanionRisk-Bench/test.jsonl"
|
||||
TRANSLATION_CACHE="../experiments/translation_cache.json"
|
||||
|
||||
WILDGUARD_PATH="../wildguard"
|
||||
SHIELDGEMMA_PATH="/root/siton-data-2849d4ce327c4ccfb233ce33868fe7fe/zsy/shieldgemma-2b"
|
||||
|
||||
echo "=============================================="
|
||||
echo " SOTA Baseline v2 — WildGuard + ShieldGemma"
|
||||
echo " zh→en translation ON | threshold=0.3"
|
||||
echo "=============================================="
|
||||
|
||||
# ── Install translation dependencies (one-time) ──
|
||||
echo ""
|
||||
echo "[0] Checking translation dependencies..."
|
||||
$PYTHON -c "import sentencepiece" 2>/dev/null || \
|
||||
pip install sentencepiece sacremoses --quiet
|
||||
|
||||
# ── WildGuard v2 ──
|
||||
echo ""
|
||||
echo "[1/2] Evaluating WildGuard v2 (translate + companion prompt)..."
|
||||
$PYTHON scripts/eval_sota_baselines_v2.py \
|
||||
--model wildguard \
|
||||
--model-path "$WILDGUARD_PATH" \
|
||||
--test-data "$TEST_DATA" \
|
||||
--output "../experiments/eval_sota_wildguard_v2.json" \
|
||||
--translate \
|
||||
--translation-cache "$TRANSLATION_CACHE" \
|
||||
--translate-device cuda \
|
||||
--threshold 0.3 \
|
||||
--device cuda
|
||||
|
||||
# ── ShieldGemma-2B v2 ──
|
||||
echo ""
|
||||
echo "[2/2] Evaluating ShieldGemma-2B v2 (translate + companion policies)..."
|
||||
$PYTHON scripts/eval_sota_baselines_v2.py \
|
||||
--model shieldgemma2b \
|
||||
--model-path "$SHIELDGEMMA_PATH" \
|
||||
--test-data "$TEST_DATA" \
|
||||
--output "../experiments/eval_sota_shieldgemma2b_v2.json" \
|
||||
--translate \
|
||||
--translation-cache "$TRANSLATION_CACHE" \
|
||||
--translate-device cuda \
|
||||
--threshold 0.3 \
|
||||
--device cuda
|
||||
|
||||
echo ""
|
||||
echo "=============================================="
|
||||
echo " Done. Results:"
|
||||
echo " experiments/eval_sota_wildguard_v2.json"
|
||||
echo " experiments/eval_sota_shieldgemma2b_v2.json"
|
||||
echo "=============================================="
|
||||
Reference in New Issue
Block a user