chore: initial commit — unified project repo

Merged code repo (CompanionGuard-RL) into single project-level git.
Reorganized root: docs/, reference/, experiments/, tmp/active|archives/.
Gitignored: data/, checkpoints/, .venv, experiment logs, tmp/archives.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-14 11:28:42 +08:00
commit bd1f51c496
85 changed files with 20568 additions and 0 deletions

View File

@@ -0,0 +1,307 @@
{
"meta": {
"test_file": "data/processed/CompanionRisk-Bench/test.jsonl",
"source_filter": "human",
"n_total": 605,
"n_filtered": 119,
"n_risky": 99
},
"L1a_keyword": {
"binary_f1": 0.0,
"high_risk_recall": 0.0,
"high_risk_precision": 0.0,
"false_negative_rate": 1.0,
"level_macro_f1": 0.05755395683453237,
"level_weighted_f1": 0.04836466960885073,
"per_category_recall": {
"R1": {
"total": 36,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R2": {
"total": 6,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R3": {
"total": 0,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R4": {
"total": 0,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R5": {
"total": 0,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R6": {
"total": 31,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R7": {
"total": 5,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R8": {
"total": 2,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R9": {
"total": 19,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R10": {
"total": 0,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
}
}
},
"L1b_regex": {
"binary_f1": 0.0,
"high_risk_recall": 0.0,
"high_risk_precision": 0.0,
"false_negative_rate": 1.0,
"level_macro_f1": 0.05755395683453237,
"level_weighted_f1": 0.04836466960885073,
"per_category_recall": {
"R1": {
"total": 36,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R2": {
"total": 6,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R3": {
"total": 0,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R4": {
"total": 0,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R5": {
"total": 0,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R6": {
"total": 31,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R7": {
"total": 5,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R8": {
"total": 2,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R9": {
"total": 19,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R10": {
"total": 0,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
}
}
},
"L1c_combined": {
"binary_f1": 0.0,
"high_risk_recall": 0.0,
"high_risk_precision": 0.0,
"false_negative_rate": 1.0,
"level_macro_f1": 0.05755395683453237,
"level_weighted_f1": 0.04836466960885073,
"per_category_recall": {
"R1": {
"total": 36,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R2": {
"total": 6,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R3": {
"total": 0,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R4": {
"total": 0,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R5": {
"total": 0,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R6": {
"total": 31,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R7": {
"total": 5,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R8": {
"total": 2,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R9": {
"total": 19,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R10": {
"total": 0,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
}
}
},
"ours_detection": {
"binary_f1": 0.9847715736040609,
"high_risk_recall": 0.9797979797979798,
"high_risk_precision": 0.9897959183673469,
"false_negative_rate": 0.02020202020202022,
"level_macro_f1": 0.3641541183069423,
"level_weighted_f1": 0.4092843419457787,
"fine_macro_f1": 0.0,
"fine_weighted_f1": 0.0,
"fine_per_label_f1": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0
],
"per_category_recall": {
"R1": {
"total": 36,
"detected": 35,
"recall": 0.9722,
"miss_rate": 0.0278
},
"R2": {
"total": 6,
"detected": 5,
"recall": 0.8333,
"miss_rate": 0.1667
},
"R3": {
"total": 0,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R4": {
"total": 0,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R5": {
"total": 0,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
},
"R6": {
"total": 31,
"detected": 31,
"recall": 1.0,
"miss_rate": 0.0
},
"R7": {
"total": 5,
"detected": 5,
"recall": 1.0,
"miss_rate": 0.0
},
"R8": {
"total": 2,
"detected": 2,
"recall": 1.0,
"miss_rate": 0.0
},
"R9": {
"total": 19,
"detected": 19,
"recall": 1.0,
"miss_rate": 0.0
},
"R10": {
"total": 0,
"detected": 0,
"recall": 0.0,
"miss_rate": 1.0
}
}
}
}