{ "meta": { "test_file": "data\\processed\\CompanionRisk-Bench\\test.jsonl", "lang": "zh", "n": 486 }, "L0_all_risky": { "name": "L0_all_risky", "accuracy": 0.7346, "precision": 0.7346, "recall": 1.0, "f1": 0.847, "fnr": 0.0, "tp": 357, "fp": 129, "fn": 0, "tn": 0 }, "L0_all_safe": { "name": "L0_all_safe", "accuracy": 0.2654, "precision": 0.0, "recall": 0.0, "f1": 0.0, "fnr": 1.0, "tp": 0, "fp": 0, "fn": 357, "tn": 129 }, "L1a_keyword": { "name": "L1a_keyword", "accuracy": 0.4198, "precision": 0.9518, "recall": 0.2213, "f1": 0.3591, "fnr": 0.7787, "tp": 79, "fp": 4, "fn": 278, "tn": 125 }, "L1a_keyword_cat_recall": { "R1": { "total": 31, "detected": 3, "recall": 0.0968, "miss_rate": 0.9032 }, "R2": { "total": 39, "detected": 3, "recall": 0.0769, "miss_rate": 0.9231 }, "R3": { "total": 39, "detected": 10, "recall": 0.2564, "miss_rate": 0.7436 }, "R4": { "total": 41, "detected": 7, "recall": 0.1707, "miss_rate": 0.8293 }, "R5": { "total": 36, "detected": 8, "recall": 0.2222, "miss_rate": 0.7778 }, "R6": { "total": 33, "detected": 6, "recall": 0.1818, "miss_rate": 0.8182 }, "R7": { "total": 32, "detected": 4, "recall": 0.125, "miss_rate": 0.875 }, "R8": { "total": 36, "detected": 25, "recall": 0.6944, "miss_rate": 0.3056 }, "R9": { "total": 33, "detected": 3, "recall": 0.0909, "miss_rate": 0.9091 }, "R10": { "total": 37, "detected": 10, "recall": 0.2703, "miss_rate": 0.7297 } }, "L1b_regex": { "name": "L1b_regex", "accuracy": 0.3025, "precision": 1.0, "recall": 0.0504, "f1": 0.096, "fnr": 0.9496, "tp": 18, "fp": 0, "fn": 339, "tn": 129 }, "L1b_regex_cat_recall": { "R1": { "total": 31, "detected": 0, "recall": 0.0, "miss_rate": 1.0 }, "R2": { "total": 39, "detected": 1, "recall": 0.0256, "miss_rate": 0.9744 }, "R3": { "total": 39, "detected": 9, "recall": 0.2308, "miss_rate": 0.7692 }, "R4": { "total": 41, "detected": 3, "recall": 0.0732, "miss_rate": 0.9268 }, "R5": { "total": 36, "detected": 1, "recall": 0.0278, "miss_rate": 0.9722 }, "R6": { "total": 33, "detected": 0, "recall": 0.0, "miss_rate": 1.0 }, "R7": { "total": 32, "detected": 2, "recall": 0.0625, "miss_rate": 0.9375 }, "R8": { "total": 36, "detected": 0, "recall": 0.0, "miss_rate": 1.0 }, "R9": { "total": 33, "detected": 0, "recall": 0.0, "miss_rate": 1.0 }, "R10": { "total": 37, "detected": 2, "recall": 0.0541, "miss_rate": 0.9459 } }, "L1c_combined": { "name": "L1c_combined", "accuracy": 0.4486, "precision": 0.9588, "recall": 0.2605, "f1": 0.4097, "fnr": 0.7395, "tp": 93, "fp": 4, "fn": 264, "tn": 125 }, "L1c_combined_cat_recall": { "R1": { "total": 31, "detected": 3, "recall": 0.0968, "miss_rate": 0.9032 }, "R2": { "total": 39, "detected": 4, "recall": 0.1026, "miss_rate": 0.8974 }, "R3": { "total": 39, "detected": 16, "recall": 0.4103, "miss_rate": 0.5897 }, "R4": { "total": 41, "detected": 9, "recall": 0.2195, "miss_rate": 0.7805 }, "R5": { "total": 36, "detected": 9, "recall": 0.25, "miss_rate": 0.75 }, "R6": { "total": 33, "detected": 6, "recall": 0.1818, "miss_rate": 0.8182 }, "R7": { "total": 32, "detected": 6, "recall": 0.1875, "miss_rate": 0.8125 }, "R8": { "total": 36, "detected": 25, "recall": 0.6944, "miss_rate": 0.3056 }, "R9": { "total": 33, "detected": 3, "recall": 0.0909, "miss_rate": 0.9091 }, "R10": { "total": 37, "detected": 12, "recall": 0.3243, "miss_rate": 0.6757 } }, "intervention_Rule(l≥3→REJECT)": { "policy": "Rule(l≥3→REJECT)", "intervention_recall_high": 1.0, "over_intervention_rate": 0.0, "safety_ux_fscore": 1.0, "crisis_precision": "N/A", "action_distribution": { "PASS": 233, "REJECT": 253 } }, "intervention_Threshold(level→action)": { "policy": "Threshold(level→action)", "intervention_recall_high": 1.0, "over_intervention_rate": 0.0, "safety_ux_fscore": 1.0, "crisis_precision": 1.0, "action_distribution": { "PASS": 129, "WARN": 104, "REWRITE": 193, "CRISIS": 60 } } }