Files
CompanionGuard-RL/experiments/eval_all.json
zhangsiyuan 52ba43f08d feat: Module C v5/v6 training complete, ablations, SOTA baselines, paper updates
- Module C: BC+PPO training v5/v6 done; eval results in experiments/eval_intervention_v{5,6}.json
- Reward: v5 label-aligned constrained reward (code/src/rl/reward.py)
- Ablations: Module B (history_r, response_only, full) + Module C (wo_category_reward)
- SOTA baselines: WildGuard and ShieldGemma2b eval scripts and results
- Paper: update sections 05–08 (Module B/C description, experiments table, discussion)
- Docs: add record.md (change log), update state.md and exp.md; retire change.md
- Tools: add html-to-ppt utilities and run_shieldgemma2b.sh
- Configs: add ablation YAML configs for Module B and C
- Cleanup: remove stale reference/ PNG screenshots

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-20 14:24:09 +08:00

307 lines
6.3 KiB
JSON

{
"meta": {
"test_file": "data/processed/CompanionRisk-Bench/test.jsonl",
"source_filter": "all",
"n_total": 605,
"n_filtered": 605,
"n_risky": 456
},
"L1a_keyword": {
"binary_f1": 0.29313543599257885,
"high_risk_recall": 0.17324561403508773,
"high_risk_precision": 0.9518072289156626,
"false_negative_rate": 0.8267543859649122,
"level_macro_f1": 0.09819557155678502,
"level_weighted_f1": 0.08825982748460577,
"per_category_recall": {
"R1": {
"total": 67,
"detected": 3,
"recall": 0.0448,
"miss_rate": 0.9552
},
"R2": {
"total": 45,
"detected": 3,
"recall": 0.0667,
"miss_rate": 0.9333
},
"R3": {
"total": 39,
"detected": 10,
"recall": 0.2564,
"miss_rate": 0.7436
},
"R4": {
"total": 41,
"detected": 7,
"recall": 0.1707,
"miss_rate": 0.8293
},
"R5": {
"total": 36,
"detected": 8,
"recall": 0.2222,
"miss_rate": 0.7778
},
"R6": {
"total": 64,
"detected": 6,
"recall": 0.0938,
"miss_rate": 0.9062
},
"R7": {
"total": 37,
"detected": 4,
"recall": 0.1081,
"miss_rate": 0.8919
},
"R8": {
"total": 38,
"detected": 25,
"recall": 0.6579,
"miss_rate": 0.3421
},
"R9": {
"total": 52,
"detected": 3,
"recall": 0.0577,
"miss_rate": 0.9423
},
"R10": {
"total": 37,
"detected": 10,
"recall": 0.2703,
"miss_rate": 0.7297
}
}
},
"L1b_regex": {
"binary_f1": 0.0759493670886076,
"high_risk_recall": 0.039473684210526314,
"high_risk_precision": 1.0,
"false_negative_rate": 0.9605263157894737,
"level_macro_f1": 0.07132623033992896,
"level_weighted_f1": 0.058213483946983315,
"per_category_recall": {
"R1": {
"total": 67,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R2": {
"total": 45,
"detected": 1,
"recall": 0.0222,
"miss_rate": 0.9778
},
"R3": {
"total": 39,
"detected": 9,
"recall": 0.2308,
"miss_rate": 0.7692
},
"R4": {
"total": 41,
"detected": 3,
"recall": 0.0732,
"miss_rate": 0.9268
},
"R5": {
"total": 36,
"detected": 1,
"recall": 0.0278,
"miss_rate": 0.9722
},
"R6": {
"total": 64,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R7": {
"total": 37,
"detected": 2,
"recall": 0.0541,
"miss_rate": 0.9459
},
"R8": {
"total": 38,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R9": {
"total": 52,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R10": {
"total": 37,
"detected": 2,
"recall": 0.0541,
"miss_rate": 0.9459
}
}
},
"L1c_combined": {
"binary_f1": 0.33634719710669075,
"high_risk_recall": 0.20394736842105263,
"high_risk_precision": 0.9587628865979382,
"false_negative_rate": 0.7960526315789473,
"level_macro_f1": 0.10979552475377227,
"level_weighted_f1": 0.1000980341896042,
"per_category_recall": {
"R1": {
"total": 67,
"detected": 3,
"recall": 0.0448,
"miss_rate": 0.9552
},
"R2": {
"total": 45,
"detected": 4,
"recall": 0.0889,
"miss_rate": 0.9111
},
"R3": {
"total": 39,
"detected": 16,
"recall": 0.4103,
"miss_rate": 0.5897
},
"R4": {
"total": 41,
"detected": 9,
"recall": 0.2195,
"miss_rate": 0.7805
},
"R5": {
"total": 36,
"detected": 9,
"recall": 0.25,
"miss_rate": 0.75
},
"R6": {
"total": 64,
"detected": 6,
"recall": 0.0938,
"miss_rate": 0.9062
},
"R7": {
"total": 37,
"detected": 6,
"recall": 0.1622,
"miss_rate": 0.8378
},
"R8": {
"total": 38,
"detected": 25,
"recall": 0.6579,
"miss_rate": 0.3421
},
"R9": {
"total": 52,
"detected": 3,
"recall": 0.0577,
"miss_rate": 0.9423
},
"R10": {
"total": 37,
"detected": 12,
"recall": 0.3243,
"miss_rate": 0.6757
}
}
},
"ours_detection": {
"binary_f1": 0.9967069154774972,
"high_risk_recall": 0.9956140350877193,
"high_risk_precision": 0.9978021978021978,
"false_negative_rate": 0.004385964912280715,
"level_macro_f1": 0.5150467302191439,
"level_weighted_f1": 0.5173056767699116,
"fine_macro_f1": 0.0,
"fine_weighted_f1": 0.0,
"fine_per_label_f1": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0
],
"per_category_recall": {
"R1": {
"total": 67,
"detected": 66,
"recall": 0.9851,
"miss_rate": 0.0149
},
"R2": {
"total": 45,
"detected": 44,
"recall": 0.9778,
"miss_rate": 0.0222
},
"R3": {
"total": 39,
"detected": 39,
"recall": 1.0,
"miss_rate": 0.0
},
"R4": {
"total": 41,
"detected": 41,
"recall": 1.0,
"miss_rate": 0.0
},
"R5": {
"total": 36,
"detected": 36,
"recall": 1.0,
"miss_rate": 0.0
},
"R6": {
"total": 64,
"detected": 64,
"recall": 1.0,
"miss_rate": 0.0
},
"R7": {
"total": 37,
"detected": 37,
"recall": 1.0,
"miss_rate": 0.0
},
"R8": {
"total": 38,
"detected": 38,
"recall": 1.0,
"miss_rate": 0.0
},
"R9": {
"total": 52,
"detected": 52,
"recall": 1.0,
"miss_rate": 0.0
},
"R10": {
"total": 37,
"detected": 37,
"recall": 1.0,
"miss_rate": 0.0
}
}
}
}