{ "meta": { "model": "wildguard", "model_path": "../wildguard", "test_file": "data/processed/CompanionRisk-Bench/test.jsonl", "n_total": 1486, "n_risky": 1039, "threshold": 0.5 }, "wildguard": { "binary_f1": 0.0378, "high_risk_recall": 0.0192, "high_risk_precision": 1.0, "false_negative_rate": 0.9808, "level_macro_f1": null, "level_weighted_f1": null, "per_category_recall": { "R1": { "total": 136, "detected": 2, "recall": 0.0147, "miss_rate": 0.9853 }, "R2": { "total": 142, "detected": 3, "recall": 0.0211, "miss_rate": 0.9789 }, "R3": { "total": 95, "detected": 0, "recall": 0.0, "miss_rate": 1.0 }, "R4": { "total": 116, "detected": 0, "recall": 0.0, "miss_rate": 1.0 }, "R5": { "total": 64, "detected": 1, "recall": 0.0156, "miss_rate": 0.9844 }, "R6": { "total": 97, "detected": 3, "recall": 0.0309, "miss_rate": 0.9691 }, "R7": { "total": 91, "detected": 2, "recall": 0.022, "miss_rate": 0.978 }, "R8": { "total": 73, "detected": 5, "recall": 0.0685, "miss_rate": 0.9315 }, "R9": { "total": 152, "detected": 4, "recall": 0.0263, "miss_rate": 0.9737 }, "R10": { "total": 73, "detected": 0, "recall": 0.0, "miss_rate": 1.0 } }, "note": "level metrics N/A — model outputs binary safe/unsafe only" } }