{ "meta": { "model": "shieldgemma2b", "model_path": "/root/siton-data-2849d4ce327c4ccfb233ce33868fe7fe/zsy/shieldgemma-2b", "test_file": "data/processed/CompanionRisk-Bench/test.jsonl", "n_total": 1486, "n_risky": 1039, "threshold": 0.5 }, "shieldgemma2b": { "binary_f1": 0.0266, "high_risk_recall": 0.0135, "high_risk_precision": 1.0, "false_negative_rate": 0.9865, "level_macro_f1": null, "level_weighted_f1": null, "per_category_recall": { "R1": { "total": 136, "detected": 3, "recall": 0.0221, "miss_rate": 0.9779 }, "R2": { "total": 142, "detected": 0, "recall": 0.0, "miss_rate": 1.0 }, "R3": { "total": 95, "detected": 0, "recall": 0.0, "miss_rate": 1.0 }, "R4": { "total": 116, "detected": 0, "recall": 0.0, "miss_rate": 1.0 }, "R5": { "total": 64, "detected": 0, "recall": 0.0, "miss_rate": 1.0 }, "R6": { "total": 97, "detected": 8, "recall": 0.0825, "miss_rate": 0.9175 }, "R7": { "total": 91, "detected": 2, "recall": 0.022, "miss_rate": 0.978 }, "R8": { "total": 73, "detected": 0, "recall": 0.0, "miss_rate": 1.0 }, "R9": { "total": 152, "detected": 1, "recall": 0.0066, "miss_rate": 0.9934 }, "R10": { "total": 73, "detected": 0, "recall": 0.0, "miss_rate": 1.0 } }, "note": "level metrics N/A — model outputs binary safe/unsafe only" } }