- Module C: BC+PPO training v5/v6 done; eval results in experiments/eval_intervention_v{5,6}.json
- Reward: v5 label-aligned constrained reward (code/src/rl/reward.py)
- Ablations: Module B (history_r, response_only, full) + Module C (wo_category_reward)
- SOTA baselines: WildGuard and ShieldGemma2b eval scripts and results
- Paper: update sections 05–08 (Module B/C description, experiments table, discussion)
- Docs: add record.md (change log), update state.md and exp.md; retire change.md
- Tools: add html-to-ppt utilities and run_shieldgemma2b.sh
- Configs: add ablation YAML configs for Module B and C
- Cleanup: remove stale reference/ PNG screenshots
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1049 lines
22 KiB
JSON
1049 lines
22 KiB
JSON
{
|
|
"meta": {
|
|
"test_file": "data/processed/CompanionRisk-Bench/test.jsonl",
|
|
"source_filter": "all",
|
|
"label_filter": "all",
|
|
"n_total": 1486,
|
|
"n_filtered": 1486,
|
|
"n_risky": 1039
|
|
},
|
|
"L1a_keyword": {
|
|
"binary_f1": 0.26436781609195403,
|
|
"high_risk_recall": 0.15495668912415783,
|
|
"high_risk_precision": 0.8994413407821229,
|
|
"false_negative_rate": 0.8450433108758422,
|
|
"level_macro_f1": 0.10427720349098286,
|
|
"level_weighted_f1": 0.09799538109505529,
|
|
"level_per_class_f1": [
|
|
0.2979274611398964,
|
|
0.0,
|
|
0.1934156378600823,
|
|
0.030042918454935622,
|
|
0.0
|
|
],
|
|
"per_category_recall": {
|
|
"R1": {
|
|
"total": 136,
|
|
"detected": 10,
|
|
"recall": 0.0735,
|
|
"miss_rate": 0.9265
|
|
},
|
|
"R2": {
|
|
"total": 142,
|
|
"detected": 16,
|
|
"recall": 0.1127,
|
|
"miss_rate": 0.8873
|
|
},
|
|
"R3": {
|
|
"total": 95,
|
|
"detected": 17,
|
|
"recall": 0.1789,
|
|
"miss_rate": 0.8211
|
|
},
|
|
"R4": {
|
|
"total": 116,
|
|
"detected": 22,
|
|
"recall": 0.1897,
|
|
"miss_rate": 0.8103
|
|
},
|
|
"R5": {
|
|
"total": 64,
|
|
"detected": 9,
|
|
"recall": 0.1406,
|
|
"miss_rate": 0.8594
|
|
},
|
|
"R6": {
|
|
"total": 97,
|
|
"detected": 11,
|
|
"recall": 0.1134,
|
|
"miss_rate": 0.8866
|
|
},
|
|
"R7": {
|
|
"total": 91,
|
|
"detected": 6,
|
|
"recall": 0.0659,
|
|
"miss_rate": 0.9341
|
|
},
|
|
"R8": {
|
|
"total": 73,
|
|
"detected": 49,
|
|
"recall": 0.6712,
|
|
"miss_rate": 0.3288
|
|
},
|
|
"R9": {
|
|
"total": 152,
|
|
"detected": 11,
|
|
"recall": 0.0724,
|
|
"miss_rate": 0.9276
|
|
},
|
|
"R10": {
|
|
"total": 73,
|
|
"detected": 10,
|
|
"recall": 0.137,
|
|
"miss_rate": 0.863
|
|
}
|
|
}
|
|
},
|
|
"L1b_regex": {
|
|
"binary_f1": 0.06697674418604652,
|
|
"high_risk_recall": 0.03464870067372473,
|
|
"high_risk_precision": 1.0,
|
|
"false_negative_rate": 0.9653512993262753,
|
|
"level_macro_f1": 0.07297879241072718,
|
|
"level_weighted_f1": 0.06312377515343655,
|
|
"level_per_class_f1": [
|
|
0.2809721398933017,
|
|
0.0,
|
|
0.07954545454545454,
|
|
0.00437636761487965,
|
|
0.0
|
|
],
|
|
"per_category_recall": {
|
|
"R1": {
|
|
"total": 136,
|
|
"detected": 0,
|
|
"recall": 0.0,
|
|
"miss_rate": 1.0
|
|
},
|
|
"R2": {
|
|
"total": 142,
|
|
"detected": 1,
|
|
"recall": 0.007,
|
|
"miss_rate": 0.993
|
|
},
|
|
"R3": {
|
|
"total": 95,
|
|
"detected": 19,
|
|
"recall": 0.2,
|
|
"miss_rate": 0.8
|
|
},
|
|
"R4": {
|
|
"total": 116,
|
|
"detected": 9,
|
|
"recall": 0.0776,
|
|
"miss_rate": 0.9224
|
|
},
|
|
"R5": {
|
|
"total": 64,
|
|
"detected": 0,
|
|
"recall": 0.0,
|
|
"miss_rate": 1.0
|
|
},
|
|
"R6": {
|
|
"total": 97,
|
|
"detected": 0,
|
|
"recall": 0.0,
|
|
"miss_rate": 1.0
|
|
},
|
|
"R7": {
|
|
"total": 91,
|
|
"detected": 3,
|
|
"recall": 0.033,
|
|
"miss_rate": 0.967
|
|
},
|
|
"R8": {
|
|
"total": 73,
|
|
"detected": 0,
|
|
"recall": 0.0,
|
|
"miss_rate": 1.0
|
|
},
|
|
"R9": {
|
|
"total": 152,
|
|
"detected": 0,
|
|
"recall": 0.0,
|
|
"miss_rate": 1.0
|
|
},
|
|
"R10": {
|
|
"total": 73,
|
|
"detected": 4,
|
|
"recall": 0.0548,
|
|
"miss_rate": 0.9452
|
|
}
|
|
}
|
|
},
|
|
"L1c_combined": {
|
|
"binary_f1": 0.3060897435897436,
|
|
"high_risk_recall": 0.18383060635226178,
|
|
"high_risk_precision": 0.9138755980861244,
|
|
"false_negative_rate": 0.8161693936477382,
|
|
"level_macro_f1": 0.11189027535274536,
|
|
"level_weighted_f1": 0.10619241328971442,
|
|
"level_per_class_f1": [
|
|
0.3038309114927345,
|
|
0.0,
|
|
0.22135922330097088,
|
|
0.034261241970021415,
|
|
0.0
|
|
],
|
|
"per_category_recall": {
|
|
"R1": {
|
|
"total": 136,
|
|
"detected": 10,
|
|
"recall": 0.0735,
|
|
"miss_rate": 0.9265
|
|
},
|
|
"R2": {
|
|
"total": 142,
|
|
"detected": 17,
|
|
"recall": 0.1197,
|
|
"miss_rate": 0.8803
|
|
},
|
|
"R3": {
|
|
"total": 95,
|
|
"detected": 32,
|
|
"recall": 0.3368,
|
|
"miss_rate": 0.6632
|
|
},
|
|
"R4": {
|
|
"total": 116,
|
|
"detected": 29,
|
|
"recall": 0.25,
|
|
"miss_rate": 0.75
|
|
},
|
|
"R5": {
|
|
"total": 64,
|
|
"detected": 9,
|
|
"recall": 0.1406,
|
|
"miss_rate": 0.8594
|
|
},
|
|
"R6": {
|
|
"total": 97,
|
|
"detected": 11,
|
|
"recall": 0.1134,
|
|
"miss_rate": 0.8866
|
|
},
|
|
"R7": {
|
|
"total": 91,
|
|
"detected": 9,
|
|
"recall": 0.0989,
|
|
"miss_rate": 0.9011
|
|
},
|
|
"R8": {
|
|
"total": 73,
|
|
"detected": 49,
|
|
"recall": 0.6712,
|
|
"miss_rate": 0.3288
|
|
},
|
|
"R9": {
|
|
"total": 152,
|
|
"detected": 11,
|
|
"recall": 0.0724,
|
|
"miss_rate": 0.9276
|
|
},
|
|
"R10": {
|
|
"total": 73,
|
|
"detected": 14,
|
|
"recall": 0.1918,
|
|
"miss_rate": 0.8082
|
|
}
|
|
}
|
|
},
|
|
"ours_detection": {
|
|
"binary_f1": 0.9995189995189995,
|
|
"high_risk_recall": 1.0,
|
|
"high_risk_precision": 0.9990384615384615,
|
|
"false_negative_rate": 0.0,
|
|
"level_macro_f1": 0.5495554176357882,
|
|
"level_weighted_f1": 0.5584578220374772,
|
|
"level_per_class_f1": [
|
|
0.37540453074433655,
|
|
0.6351931330472103,
|
|
0.46393762183235865,
|
|
0.6400759734093068,
|
|
0.6331658291457286
|
|
],
|
|
"fine_per_label_f1": [
|
|
0.6844262295081968,
|
|
0.46567164179104475,
|
|
0.697986577181208,
|
|
0.40233236151603496,
|
|
0.585,
|
|
0.3559322033898305,
|
|
0.38322211630123926,
|
|
0.3374578177727784,
|
|
0.531810766721044,
|
|
0.39436619718309857,
|
|
0.2691029900332226,
|
|
0.4410480349344978,
|
|
0.32142857142857145,
|
|
0.615916955017301
|
|
],
|
|
"fine_macro_f1": 0.46326446162700485,
|
|
"fine_weighted_f1": 0.4915026862223374,
|
|
"per_category_recall": {
|
|
"R1": {
|
|
"total": 136,
|
|
"detected": 136,
|
|
"recall": 1.0,
|
|
"miss_rate": 0.0
|
|
},
|
|
"R2": {
|
|
"total": 142,
|
|
"detected": 142,
|
|
"recall": 1.0,
|
|
"miss_rate": 0.0
|
|
},
|
|
"R3": {
|
|
"total": 95,
|
|
"detected": 95,
|
|
"recall": 1.0,
|
|
"miss_rate": 0.0
|
|
},
|
|
"R4": {
|
|
"total": 116,
|
|
"detected": 116,
|
|
"recall": 1.0,
|
|
"miss_rate": 0.0
|
|
},
|
|
"R5": {
|
|
"total": 64,
|
|
"detected": 64,
|
|
"recall": 1.0,
|
|
"miss_rate": 0.0
|
|
},
|
|
"R6": {
|
|
"total": 97,
|
|
"detected": 97,
|
|
"recall": 1.0,
|
|
"miss_rate": 0.0
|
|
},
|
|
"R7": {
|
|
"total": 91,
|
|
"detected": 91,
|
|
"recall": 1.0,
|
|
"miss_rate": 0.0
|
|
},
|
|
"R8": {
|
|
"total": 73,
|
|
"detected": 73,
|
|
"recall": 1.0,
|
|
"miss_rate": 0.0
|
|
},
|
|
"R9": {
|
|
"total": 152,
|
|
"detected": 152,
|
|
"recall": 1.0,
|
|
"miss_rate": 0.0
|
|
},
|
|
"R10": {
|
|
"total": 73,
|
|
"detected": 73,
|
|
"recall": 1.0,
|
|
"miss_rate": 0.0
|
|
}
|
|
},
|
|
"label_filter": "all"
|
|
},
|
|
"baseline_rule": {
|
|
"intervention_recall_high": 0.9079754601226994,
|
|
"safety_recall": 0.9079754601226994,
|
|
"over_refusal": 0.0,
|
|
"over_intervention_rate": 0.0,
|
|
"action_distribution": [
|
|
0.46231493943472407,
|
|
0.0,
|
|
0.0,
|
|
0.5376850605652759,
|
|
0.0
|
|
],
|
|
"per_level_action_dist": {
|
|
"L0_Safe": {
|
|
"n": 237,
|
|
"action_dist": [
|
|
1.0,
|
|
0.0,
|
|
0.0,
|
|
0.0,
|
|
0.0
|
|
]
|
|
},
|
|
"L1_Mild": {
|
|
"n": 280,
|
|
"action_dist": [
|
|
0.9178571428571428,
|
|
0.0,
|
|
0.0,
|
|
0.08214285714285714,
|
|
0.0
|
|
]
|
|
},
|
|
"L2_Moderate": {
|
|
"n": 317,
|
|
"action_dist": [
|
|
0.4195583596214511,
|
|
0.0,
|
|
0.0,
|
|
0.580441640378549,
|
|
0.0
|
|
]
|
|
},
|
|
"L3_High": {
|
|
"n": 456,
|
|
"action_dist": [
|
|
0.11403508771929824,
|
|
0.0,
|
|
0.0,
|
|
0.8859649122807017,
|
|
0.0
|
|
]
|
|
},
|
|
"L4_Critical": {
|
|
"n": 196,
|
|
"action_dist": [
|
|
0.04081632653061224,
|
|
0.0,
|
|
0.0,
|
|
0.9591836734693877,
|
|
0.0
|
|
]
|
|
}
|
|
},
|
|
"crisis_precision_l4": NaN,
|
|
"crisis_precision": NaN,
|
|
"crisis_appropriateness": NaN,
|
|
"R1_high_critical_crisis_recall": 0.0,
|
|
"reject_rate_R6_R7_high": 0.956989247311828,
|
|
"strong_intervention_rate_L1": 0.08214285714285714,
|
|
"per_category_action_dist": {
|
|
"R1": {
|
|
"n": 140,
|
|
"action_dist": [
|
|
0.17857142857142858,
|
|
0.0,
|
|
0.0,
|
|
0.8214285714285714,
|
|
0.0
|
|
]
|
|
},
|
|
"R2": {
|
|
"n": 150,
|
|
"action_dist": [
|
|
0.4533333333333333,
|
|
0.0,
|
|
0.0,
|
|
0.5466666666666666,
|
|
0.0
|
|
]
|
|
},
|
|
"R3": {
|
|
"n": 95,
|
|
"action_dist": [
|
|
0.05263157894736842,
|
|
0.0,
|
|
0.0,
|
|
0.9473684210526315,
|
|
0.0
|
|
]
|
|
},
|
|
"R4": {
|
|
"n": 176,
|
|
"action_dist": [
|
|
0.48295454545454547,
|
|
0.0,
|
|
0.0,
|
|
0.5170454545454546,
|
|
0.0
|
|
]
|
|
},
|
|
"R5": {
|
|
"n": 64,
|
|
"action_dist": [
|
|
0.359375,
|
|
0.0,
|
|
0.0,
|
|
0.640625,
|
|
0.0
|
|
]
|
|
},
|
|
"R6": {
|
|
"n": 100,
|
|
"action_dist": [
|
|
0.09,
|
|
0.0,
|
|
0.0,
|
|
0.91,
|
|
0.0
|
|
]
|
|
},
|
|
"R7": {
|
|
"n": 91,
|
|
"action_dist": [
|
|
0.02197802197802198,
|
|
0.0,
|
|
0.0,
|
|
0.978021978021978,
|
|
0.0
|
|
]
|
|
},
|
|
"R8": {
|
|
"n": 215,
|
|
"action_dist": [
|
|
0.786046511627907,
|
|
0.0,
|
|
0.0,
|
|
0.21395348837209302,
|
|
0.0
|
|
]
|
|
},
|
|
"R9": {
|
|
"n": 382,
|
|
"action_dist": [
|
|
0.7827225130890052,
|
|
0.0,
|
|
0.0,
|
|
0.21727748691099477,
|
|
0.0
|
|
]
|
|
},
|
|
"R10": {
|
|
"n": 73,
|
|
"action_dist": [
|
|
0.0273972602739726,
|
|
0.0,
|
|
0.0,
|
|
0.9726027397260274,
|
|
0.0
|
|
]
|
|
}
|
|
},
|
|
"safety_ux_fscore": 0.9517684887459806
|
|
},
|
|
"baseline_threshold": {
|
|
"intervention_recall_high": 0.9079754601226994,
|
|
"safety_recall": 0.9079754601226994,
|
|
"over_refusal": 0.0,
|
|
"over_intervention_rate": 0.0,
|
|
"action_distribution": [
|
|
0.3304172274562584,
|
|
0.13189771197846567,
|
|
0.40174966352624497,
|
|
0.0,
|
|
0.13593539703903096
|
|
],
|
|
"per_level_action_dist": {
|
|
"L0_Safe": {
|
|
"n": 237,
|
|
"action_dist": [
|
|
1.0,
|
|
0.0,
|
|
0.0,
|
|
0.0,
|
|
0.0
|
|
]
|
|
},
|
|
"L1_Mild": {
|
|
"n": 280,
|
|
"action_dist": [
|
|
0.8428571428571429,
|
|
0.075,
|
|
0.08214285714285714,
|
|
0.0,
|
|
0.0
|
|
]
|
|
},
|
|
"L2_Moderate": {
|
|
"n": 317,
|
|
"action_dist": [
|
|
0.04416403785488959,
|
|
0.3753943217665615,
|
|
0.5520504731861199,
|
|
0.0,
|
|
0.028391167192429023
|
|
]
|
|
},
|
|
"L3_High": {
|
|
"n": 456,
|
|
"action_dist": [
|
|
0.008771929824561403,
|
|
0.10526315789473684,
|
|
0.7390350877192983,
|
|
0.0,
|
|
0.14692982456140352
|
|
]
|
|
},
|
|
"L4_Critical": {
|
|
"n": 196,
|
|
"action_dist": [
|
|
0.0,
|
|
0.04081632653061224,
|
|
0.3163265306122449,
|
|
0.0,
|
|
0.6428571428571429
|
|
]
|
|
}
|
|
},
|
|
"crisis_precision_l4": 0.6237623762376238,
|
|
"crisis_precision": 0.6237623762376238,
|
|
"crisis_appropriateness": 0.7128712871287128,
|
|
"R1_high_critical_crisis_recall": 0.5132743362831859,
|
|
"reject_rate_R6_R7_high": 0.0,
|
|
"strong_intervention_rate_L1": 0.08214285714285714,
|
|
"per_category_action_dist": {
|
|
"R1": {
|
|
"n": 140,
|
|
"action_dist": [
|
|
0.03571428571428571,
|
|
0.14285714285714285,
|
|
0.38571428571428573,
|
|
0.0,
|
|
0.4357142857142857
|
|
]
|
|
},
|
|
"R2": {
|
|
"n": 150,
|
|
"action_dist": [
|
|
0.12,
|
|
0.3333333333333333,
|
|
0.5333333333333333,
|
|
0.0,
|
|
0.013333333333333334
|
|
]
|
|
},
|
|
"R3": {
|
|
"n": 95,
|
|
"action_dist": [
|
|
0.042105263157894736,
|
|
0.010526315789473684,
|
|
0.9473684210526315,
|
|
0.0,
|
|
0.0
|
|
]
|
|
},
|
|
"R4": {
|
|
"n": 176,
|
|
"action_dist": [
|
|
0.42613636363636365,
|
|
0.056818181818181816,
|
|
0.5170454545454546,
|
|
0.0,
|
|
0.0
|
|
]
|
|
},
|
|
"R5": {
|
|
"n": 64,
|
|
"action_dist": [
|
|
0.0,
|
|
0.359375,
|
|
0.4375,
|
|
0.0,
|
|
0.203125
|
|
]
|
|
},
|
|
"R6": {
|
|
"n": 100,
|
|
"action_dist": [
|
|
0.03,
|
|
0.06,
|
|
0.4,
|
|
0.0,
|
|
0.51
|
|
]
|
|
},
|
|
"R7": {
|
|
"n": 91,
|
|
"action_dist": [
|
|
0.0,
|
|
0.02197802197802198,
|
|
0.2087912087912088,
|
|
0.0,
|
|
0.7692307692307693
|
|
]
|
|
},
|
|
"R8": {
|
|
"n": 215,
|
|
"action_dist": [
|
|
0.6651162790697674,
|
|
0.12093023255813953,
|
|
0.21395348837209302,
|
|
0.0,
|
|
0.0
|
|
]
|
|
},
|
|
"R9": {
|
|
"n": 382,
|
|
"action_dist": [
|
|
0.6361256544502618,
|
|
0.14659685863874344,
|
|
0.21727748691099477,
|
|
0.0,
|
|
0.0
|
|
]
|
|
},
|
|
"R10": {
|
|
"n": 73,
|
|
"action_dist": [
|
|
0.0,
|
|
0.0273972602739726,
|
|
0.9041095890410958,
|
|
0.0,
|
|
0.0684931506849315
|
|
]
|
|
}
|
|
},
|
|
"safety_ux_fscore": 0.9517684887459806
|
|
},
|
|
"bc_only_intervention": {
|
|
"intervention_recall_high": 0.9401840490797546,
|
|
"safety_recall": 0.9401840490797546,
|
|
"over_refusal": 0.0,
|
|
"over_intervention_rate": 0.0,
|
|
"action_accuracy": 0.6965006729475101,
|
|
"exact_action_accuracy_by_level": {
|
|
"L0_Safe": 1.0,
|
|
"L1_Mild": 0.8071428571428572,
|
|
"L2_Moderate": 0.28391167192429023,
|
|
"L3_High": 0.7850877192982456,
|
|
"L4_Critical": 0.6326530612244898
|
|
},
|
|
"action_distribution": [
|
|
0.3203230148048452,
|
|
0.09488559892328398,
|
|
0.351278600269179,
|
|
0.16218034993270525,
|
|
0.07133243606998654
|
|
],
|
|
"per_level_action_dist": {
|
|
"L0_Safe": {
|
|
"n": 237,
|
|
"action_dist": [
|
|
1.0,
|
|
0.0,
|
|
0.0,
|
|
0.0,
|
|
0.0
|
|
]
|
|
},
|
|
"L1_Mild": {
|
|
"n": 280,
|
|
"action_dist": [
|
|
0.8071428571428572,
|
|
0.05357142857142857,
|
|
0.11071428571428571,
|
|
0.017857142857142856,
|
|
0.010714285714285714
|
|
]
|
|
},
|
|
"L2_Moderate": {
|
|
"n": 317,
|
|
"action_dist": [
|
|
0.031545741324921134,
|
|
0.28391167192429023,
|
|
0.5331230283911672,
|
|
0.08832807570977919,
|
|
0.06309148264984227
|
|
]
|
|
},
|
|
"L3_High": {
|
|
"n": 456,
|
|
"action_dist": [
|
|
0.006578947368421052,
|
|
0.07017543859649122,
|
|
0.6513157894736842,
|
|
0.20833333333333334,
|
|
0.06359649122807018
|
|
]
|
|
},
|
|
"L4_Critical": {
|
|
"n": 196,
|
|
"action_dist": [
|
|
0.0,
|
|
0.02040816326530612,
|
|
0.12755102040816327,
|
|
0.576530612244898,
|
|
0.2755102040816326
|
|
]
|
|
}
|
|
},
|
|
"crisis_precision_l4": 0.5094339622641509,
|
|
"crisis_precision": 0.5094339622641509,
|
|
"crisis_appropriateness": 0.6509433962264151,
|
|
"R1_high_critical_crisis_recall": 0.4690265486725664,
|
|
"reject_rate_R6_R7_high": 0.7849462365591398,
|
|
"strong_intervention_rate_L1": 0.1392857142857143,
|
|
"per_category_action_dist": {
|
|
"R1": {
|
|
"n": 140,
|
|
"action_dist": [
|
|
0.02857142857142857,
|
|
0.05,
|
|
0.20714285714285716,
|
|
0.24285714285714285,
|
|
0.4714285714285714
|
|
]
|
|
},
|
|
"R2": {
|
|
"n": 150,
|
|
"action_dist": [
|
|
0.1,
|
|
0.25333333333333335,
|
|
0.5,
|
|
0.12,
|
|
0.02666666666666667
|
|
]
|
|
},
|
|
"R3": {
|
|
"n": 95,
|
|
"action_dist": [
|
|
0.021052631578947368,
|
|
0.0,
|
|
0.9368421052631579,
|
|
0.042105263157894736,
|
|
0.0
|
|
]
|
|
},
|
|
"R4": {
|
|
"n": 176,
|
|
"action_dist": [
|
|
0.3977272727272727,
|
|
0.028409090909090908,
|
|
0.5625,
|
|
0.011363636363636364,
|
|
0.0
|
|
]
|
|
},
|
|
"R5": {
|
|
"n": 64,
|
|
"action_dist": [
|
|
0.0,
|
|
0.3125,
|
|
0.4375,
|
|
0.015625,
|
|
0.234375
|
|
]
|
|
},
|
|
"R6": {
|
|
"n": 100,
|
|
"action_dist": [
|
|
0.03,
|
|
0.01,
|
|
0.14,
|
|
0.69,
|
|
0.13
|
|
]
|
|
},
|
|
"R7": {
|
|
"n": 91,
|
|
"action_dist": [
|
|
0.0,
|
|
0.01098901098901099,
|
|
0.0989010989010989,
|
|
0.8681318681318682,
|
|
0.02197802197802198
|
|
]
|
|
},
|
|
"R8": {
|
|
"n": 215,
|
|
"action_dist": [
|
|
0.6604651162790698,
|
|
0.10232558139534884,
|
|
0.20930232558139536,
|
|
0.023255813953488372,
|
|
0.004651162790697674
|
|
]
|
|
},
|
|
"R9": {
|
|
"n": 382,
|
|
"action_dist": [
|
|
0.6282722513089005,
|
|
0.11780104712041885,
|
|
0.193717277486911,
|
|
0.0549738219895288,
|
|
0.005235602094240838
|
|
]
|
|
},
|
|
"R10": {
|
|
"n": 73,
|
|
"action_dist": [
|
|
0.0,
|
|
0.0273972602739726,
|
|
0.821917808219178,
|
|
0.1095890410958904,
|
|
0.0410958904109589
|
|
]
|
|
}
|
|
},
|
|
"safety_ux_fscore": 0.9691699604743083
|
|
},
|
|
"ours_intervention": {
|
|
"intervention_recall_high": 0.9524539877300614,
|
|
"safety_recall": 0.9524539877300614,
|
|
"over_refusal": 0.0,
|
|
"over_intervention_rate": 0.0,
|
|
"action_accuracy": 0.7059219380888291,
|
|
"exact_action_accuracy_by_level": {
|
|
"L0_Safe": 1.0,
|
|
"L1_Mild": 0.8214285714285714,
|
|
"L2_Moderate": 0.2807570977917981,
|
|
"L3_High": 0.8048245614035088,
|
|
"L4_Critical": 0.6428571428571429
|
|
},
|
|
"action_distribution": [
|
|
0.32166890982503366,
|
|
0.0901749663526245,
|
|
0.3916554508748318,
|
|
0.12584118438761777,
|
|
0.07065948855989233
|
|
],
|
|
"per_level_action_dist": {
|
|
"L0_Safe": {
|
|
"n": 237,
|
|
"action_dist": [
|
|
1.0,
|
|
0.0,
|
|
0.0,
|
|
0.0,
|
|
0.0
|
|
]
|
|
},
|
|
"L1_Mild": {
|
|
"n": 280,
|
|
"action_dist": [
|
|
0.8214285714285714,
|
|
0.07142857142857142,
|
|
0.1,
|
|
0.007142857142857143,
|
|
0.0
|
|
]
|
|
},
|
|
"L2_Moderate": {
|
|
"n": 317,
|
|
"action_dist": [
|
|
0.025236593059936908,
|
|
0.27129337539432175,
|
|
0.5930599369085173,
|
|
0.0694006309148265,
|
|
0.04100946372239748
|
|
]
|
|
},
|
|
"L3_High": {
|
|
"n": 456,
|
|
"action_dist": [
|
|
0.006578947368421052,
|
|
0.05921052631578947,
|
|
0.7105263157894737,
|
|
0.15350877192982457,
|
|
0.07017543859649122
|
|
]
|
|
},
|
|
"L4_Critical": {
|
|
"n": 196,
|
|
"action_dist": [
|
|
0.0,
|
|
0.00510204081632653,
|
|
0.21428571428571427,
|
|
0.4744897959183674,
|
|
0.30612244897959184
|
|
]
|
|
}
|
|
},
|
|
"crisis_precision_l4": 0.5714285714285714,
|
|
"crisis_precision": 0.5714285714285714,
|
|
"crisis_appropriateness": 0.7523809523809524,
|
|
"R1_high_critical_crisis_recall": 0.5663716814159292,
|
|
"reject_rate_R6_R7_high": 0.7150537634408602,
|
|
"strong_intervention_rate_L1": 0.10714285714285714,
|
|
"per_category_action_dist": {
|
|
"R1": {
|
|
"n": 140,
|
|
"action_dist": [
|
|
0.03571428571428571,
|
|
0.07857142857142857,
|
|
0.2857142857142857,
|
|
0.09285714285714286,
|
|
0.5071428571428571
|
|
]
|
|
},
|
|
"R2": {
|
|
"n": 150,
|
|
"action_dist": [
|
|
0.1,
|
|
0.24,
|
|
0.5666666666666667,
|
|
0.06666666666666667,
|
|
0.02666666666666667
|
|
]
|
|
},
|
|
"R3": {
|
|
"n": 95,
|
|
"action_dist": [
|
|
0.021052631578947368,
|
|
0.021052631578947368,
|
|
0.9263157894736842,
|
|
0.021052631578947368,
|
|
0.010526315789473684
|
|
]
|
|
},
|
|
"R4": {
|
|
"n": 176,
|
|
"action_dist": [
|
|
0.4034090909090909,
|
|
0.07386363636363637,
|
|
0.5170454545454546,
|
|
0.0,
|
|
0.005681818181818182
|
|
]
|
|
},
|
|
"R5": {
|
|
"n": 64,
|
|
"action_dist": [
|
|
0.0,
|
|
0.1875,
|
|
0.65625,
|
|
0.03125,
|
|
0.125
|
|
]
|
|
},
|
|
"R6": {
|
|
"n": 100,
|
|
"action_dist": [
|
|
0.03,
|
|
0.0,
|
|
0.25,
|
|
0.56,
|
|
0.16
|
|
]
|
|
},
|
|
"R7": {
|
|
"n": 91,
|
|
"action_dist": [
|
|
0.0,
|
|
0.01098901098901099,
|
|
0.10989010989010989,
|
|
0.8681318681318682,
|
|
0.01098901098901099
|
|
]
|
|
},
|
|
"R8": {
|
|
"n": 215,
|
|
"action_dist": [
|
|
0.6604651162790698,
|
|
0.07441860465116279,
|
|
0.24651162790697675,
|
|
0.018604651162790697,
|
|
0.0
|
|
]
|
|
},
|
|
"R9": {
|
|
"n": 382,
|
|
"action_dist": [
|
|
0.6282722513089005,
|
|
0.10732984293193717,
|
|
0.21727748691099477,
|
|
0.04450261780104712,
|
|
0.002617801047120419
|
|
]
|
|
},
|
|
"R10": {
|
|
"n": 73,
|
|
"action_dist": [
|
|
0.0,
|
|
0.0273972602739726,
|
|
0.8904109589041096,
|
|
0.0547945205479452,
|
|
0.0273972602739726
|
|
]
|
|
}
|
|
},
|
|
"safety_ux_fscore": 0.9756480754124116
|
|
}
|
|
} |