CompanionGuard-RL/experiments/eval_intervention_v6.json

{
  "meta": {
    "test_file": "data/processed/CompanionRisk-Bench/test.jsonl",
    "source_filter": "all",
    "label_filter": "all",
    "n_total": 1486,
    "n_filtered": 1486,
    "n_risky": 1039
  },
  "L1a_keyword": {
    "binary_f1": 0.26436781609195403,
    "high_risk_recall": 0.15495668912415783,
    "high_risk_precision": 0.8994413407821229,
    "false_negative_rate": 0.8450433108758422,
    "level_macro_f1": 0.10427720349098286,
    "level_weighted_f1": 0.09799538109505529,
    "level_per_class_f1": [
      0.2979274611398964,
      0.0,
      0.1934156378600823,
      0.030042918454935622,
      0.0
    ],
    "per_category_recall": {
      "R1": {
        "total": 136,
        "detected": 10,
        "recall": 0.0735,
        "miss_rate": 0.9265
      },
      "R2": {
        "total": 142,
        "detected": 16,
        "recall": 0.1127,
        "miss_rate": 0.8873
      },
      "R3": {
        "total": 95,
        "detected": 17,
        "recall": 0.1789,
        "miss_rate": 0.8211
      },
      "R4": {
        "total": 116,
        "detected": 22,
        "recall": 0.1897,
        "miss_rate": 0.8103
      },
      "R5": {
        "total": 64,
        "detected": 9,
        "recall": 0.1406,
        "miss_rate": 0.8594
      },
      "R6": {
        "total": 97,
        "detected": 11,
        "recall": 0.1134,
        "miss_rate": 0.8866
      },
      "R7": {
        "total": 91,
        "detected": 6,
        "recall": 0.0659,
        "miss_rate": 0.9341
      },
      "R8": {
        "total": 73,
        "detected": 49,
        "recall": 0.6712,
        "miss_rate": 0.3288
      },
      "R9": {
        "total": 152,
        "detected": 11,
        "recall": 0.0724,
        "miss_rate": 0.9276
      },
      "R10": {
        "total": 73,
        "detected": 10,
        "recall": 0.137,
        "miss_rate": 0.863
      }
    }
  },
  "L1b_regex": {
    "binary_f1": 0.06697674418604652,
    "high_risk_recall": 0.03464870067372473,
    "high_risk_precision": 1.0,
    "false_negative_rate": 0.9653512993262753,
    "level_macro_f1": 0.07297879241072718,
    "level_weighted_f1": 0.06312377515343655,
    "level_per_class_f1": [
      0.2809721398933017,
      0.0,
      0.07954545454545454,
      0.00437636761487965,
      0.0
    ],
    "per_category_recall": {
      "R1": {
        "total": 136,
        "detected": 0,
        "recall": 0.0,
        "miss_rate": 1.0
      },
      "R2": {
        "total": 142,
        "detected": 1,
        "recall": 0.007,
        "miss_rate": 0.993
      },
      "R3": {
        "total": 95,
        "detected": 19,
        "recall": 0.2,
        "miss_rate": 0.8
      },
      "R4": {
        "total": 116,
        "detected": 9,
        "recall": 0.0776,
        "miss_rate": 0.9224
      },
      "R5": {
        "total": 64,
        "detected": 0,
        "recall": 0.0,
        "miss_rate": 1.0
      },
      "R6": {
        "total": 97,
        "detected": 0,
        "recall": 0.0,
        "miss_rate": 1.0
      },
      "R7": {
        "total": 91,
        "detected": 3,
        "recall": 0.033,
        "miss_rate": 0.967
      },
      "R8": {
        "total": 73,
        "detected": 0,
        "recall": 0.0,
        "miss_rate": 1.0
      },
      "R9": {
        "total": 152,
        "detected": 0,
        "recall": 0.0,
        "miss_rate": 1.0
      },
      "R10": {
        "total": 73,
        "detected": 4,
        "recall": 0.0548,
        "miss_rate": 0.9452
      }
    }
  },
  "L1c_combined": {
    "binary_f1": 0.3060897435897436,
    "high_risk_recall": 0.18383060635226178,
    "high_risk_precision": 0.9138755980861244,
    "false_negative_rate": 0.8161693936477382,
    "level_macro_f1": 0.11189027535274536,
    "level_weighted_f1": 0.10619241328971442,
    "level_per_class_f1": [
      0.3038309114927345,
      0.0,
      0.22135922330097088,
      0.034261241970021415,
      0.0
    ],
    "per_category_recall": {
      "R1": {
        "total": 136,
        "detected": 10,
        "recall": 0.0735,
        "miss_rate": 0.9265
      },
      "R2": {
        "total": 142,
        "detected": 17,
        "recall": 0.1197,
        "miss_rate": 0.8803
      },
      "R3": {
        "total": 95,
        "detected": 32,
        "recall": 0.3368,
        "miss_rate": 0.6632
      },
      "R4": {
        "total": 116,
        "detected": 29,
        "recall": 0.25,
        "miss_rate": 0.75
      },
      "R5": {
        "total": 64,
        "detected": 9,
        "recall": 0.1406,
        "miss_rate": 0.8594
      },
      "R6": {
        "total": 97,
        "detected": 11,
        "recall": 0.1134,
        "miss_rate": 0.8866
      },
      "R7": {
        "total": 91,
        "detected": 9,
        "recall": 0.0989,
        "miss_rate": 0.9011
      },
      "R8": {
        "total": 73,
        "detected": 49,
        "recall": 0.6712,
        "miss_rate": 0.3288
      },
      "R9": {
        "total": 152,
        "detected": 11,
        "recall": 0.0724,
        "miss_rate": 0.9276
      },
      "R10": {
        "total": 73,
        "detected": 14,
        "recall": 0.1918,
        "miss_rate": 0.8082
      }
    }
  },
  "ours_detection": {
    "binary_f1": 0.9995189995189995,
    "high_risk_recall": 1.0,
    "high_risk_precision": 0.9990384615384615,
    "false_negative_rate": 0.0,
    "level_macro_f1": 0.5495554176357882,
    "level_weighted_f1": 0.5584578220374772,
    "level_per_class_f1": [
      0.37540453074433655,
      0.6351931330472103,
      0.46393762183235865,
      0.6400759734093068,
      0.6331658291457286
    ],
    "fine_per_label_f1": [
      0.6844262295081968,
      0.46567164179104475,
      0.697986577181208,
      0.40233236151603496,
      0.585,
      0.3559322033898305,
      0.38322211630123926,
      0.3374578177727784,
      0.531810766721044,
      0.39436619718309857,
      0.2691029900332226,
      0.4410480349344978,
      0.32142857142857145,
      0.615916955017301
    ],
    "fine_macro_f1": 0.46326446162700485,
    "fine_weighted_f1": 0.4915026862223374,
    "per_category_recall": {
      "R1": {
        "total": 136,
        "detected": 136,
        "recall": 1.0,
        "miss_rate": 0.0
      },
      "R2": {
        "total": 142,
        "detected": 142,
        "recall": 1.0,
        "miss_rate": 0.0
      },
      "R3": {
        "total": 95,
        "detected": 95,
        "recall": 1.0,
        "miss_rate": 0.0
      },
      "R4": {
        "total": 116,
        "detected": 116,
        "recall": 1.0,
        "miss_rate": 0.0
      },
      "R5": {
        "total": 64,
        "detected": 64,
        "recall": 1.0,
        "miss_rate": 0.0
      },
      "R6": {
        "total": 97,
        "detected": 97,
        "recall": 1.0,
        "miss_rate": 0.0
      },
      "R7": {
        "total": 91,
        "detected": 91,
        "recall": 1.0,
        "miss_rate": 0.0
      },
      "R8": {
        "total": 73,
        "detected": 73,
        "recall": 1.0,
        "miss_rate": 0.0
      },
      "R9": {
        "total": 152,
        "detected": 152,
        "recall": 1.0,
        "miss_rate": 0.0
      },
      "R10": {
        "total": 73,
        "detected": 73,
        "recall": 1.0,
        "miss_rate": 0.0
      }
    },
    "label_filter": "all"
  },
  "baseline_rule": {
    "intervention_recall_high": 0.9079754601226994,
    "safety_recall": 0.9079754601226994,
    "over_refusal": 0.0,
    "over_intervention_rate": 0.0,
    "action_distribution": [
      0.46231493943472407,
      0.0,
      0.0,
      0.5376850605652759,
      0.0
    ],
    "per_level_action_dist": {
      "L0_Safe": {
        "n": 237,
        "action_dist": [
          1.0,
          0.0,
          0.0,
          0.0,
          0.0
        ]
      },
      "L1_Mild": {
        "n": 280,
        "action_dist": [
          0.9178571428571428,
          0.0,
          0.0,
          0.08214285714285714,
          0.0
        ]
      },
      "L2_Moderate": {
        "n": 317,
        "action_dist": [
          0.4195583596214511,
          0.0,
          0.0,
          0.580441640378549,
          0.0
        ]
      },
      "L3_High": {
        "n": 456,
        "action_dist": [
          0.11403508771929824,
          0.0,
          0.0,
          0.8859649122807017,
          0.0
        ]
      },
      "L4_Critical": {
        "n": 196,
        "action_dist": [
          0.04081632653061224,
          0.0,
          0.0,
          0.9591836734693877,
          0.0
        ]
      }
    },
    "crisis_precision_l4": NaN,
    "crisis_precision": NaN,
    "crisis_appropriateness": NaN,
    "R1_high_critical_crisis_recall": 0.0,
    "reject_rate_R6_R7_high": 0.956989247311828,
    "strong_intervention_rate_L1": 0.08214285714285714,
    "per_category_action_dist": {
      "R1": {
        "n": 140,
        "action_dist": [
          0.17857142857142858,
          0.0,
          0.0,
          0.8214285714285714,
          0.0
        ]
      },
      "R2": {
        "n": 150,
        "action_dist": [
          0.4533333333333333,
          0.0,
          0.0,
          0.5466666666666666,
          0.0
        ]
      },
      "R3": {
        "n": 95,
        "action_dist": [
          0.05263157894736842,
          0.0,
          0.0,
          0.9473684210526315,
          0.0
        ]
      },
      "R4": {
        "n": 176,
        "action_dist": [
          0.48295454545454547,
          0.0,
          0.0,
          0.5170454545454546,
          0.0
        ]
      },
      "R5": {
        "n": 64,
        "action_dist": [
          0.359375,
          0.0,
          0.0,
          0.640625,
          0.0
        ]
      },
      "R6": {
        "n": 100,
        "action_dist": [
          0.09,
          0.0,
          0.0,
          0.91,
          0.0
        ]
      },
      "R7": {
        "n": 91,
        "action_dist": [
          0.02197802197802198,
          0.0,
          0.0,
          0.978021978021978,
          0.0
        ]
      },
      "R8": {
        "n": 215,
        "action_dist": [
          0.786046511627907,
          0.0,
          0.0,
          0.21395348837209302,
          0.0
        ]
      },
      "R9": {
        "n": 382,
        "action_dist": [
          0.7827225130890052,
          0.0,
          0.0,
          0.21727748691099477,
          0.0
        ]
      },
      "R10": {
        "n": 73,
        "action_dist": [
          0.0273972602739726,
          0.0,
          0.0,
          0.9726027397260274,
          0.0
        ]
      }
    },
    "safety_ux_fscore": 0.9517684887459806
  },
  "baseline_threshold": {
    "intervention_recall_high": 0.9079754601226994,
    "safety_recall": 0.9079754601226994,
    "over_refusal": 0.0,
    "over_intervention_rate": 0.0,
    "action_distribution": [
      0.3304172274562584,
      0.13189771197846567,
      0.40174966352624497,
      0.0,
      0.13593539703903096
    ],
    "per_level_action_dist": {
      "L0_Safe": {
        "n": 237,
        "action_dist": [
          1.0,
          0.0,
          0.0,
          0.0,
          0.0
        ]
      },
      "L1_Mild": {
        "n": 280,
        "action_dist": [
          0.8428571428571429,
          0.075,
          0.08214285714285714,
          0.0,
          0.0
        ]
      },
      "L2_Moderate": {
        "n": 317,
        "action_dist": [
          0.04416403785488959,
          0.3753943217665615,
          0.5520504731861199,
          0.0,
          0.028391167192429023
        ]
      },
      "L3_High": {
        "n": 456,
        "action_dist": [
          0.008771929824561403,
          0.10526315789473684,
          0.7390350877192983,
          0.0,
          0.14692982456140352
        ]
      },
      "L4_Critical": {
        "n": 196,
        "action_dist": [
          0.0,
          0.04081632653061224,
          0.3163265306122449,
          0.0,
          0.6428571428571429
        ]
      }
    },
    "crisis_precision_l4": 0.6237623762376238,
    "crisis_precision": 0.6237623762376238,
    "crisis_appropriateness": 0.7128712871287128,
    "R1_high_critical_crisis_recall": 0.5132743362831859,
    "reject_rate_R6_R7_high": 0.0,
    "strong_intervention_rate_L1": 0.08214285714285714,
    "per_category_action_dist": {
      "R1": {
        "n": 140,
        "action_dist": [
          0.03571428571428571,
          0.14285714285714285,
          0.38571428571428573,
          0.0,
          0.4357142857142857
        ]
      },
      "R2": {
        "n": 150,
        "action_dist": [
          0.12,
          0.3333333333333333,
          0.5333333333333333,
          0.0,
          0.013333333333333334
        ]
      },
      "R3": {
        "n": 95,
        "action_dist": [
          0.042105263157894736,
          0.010526315789473684,
          0.9473684210526315,
          0.0,
          0.0
        ]
      },
      "R4": {
        "n": 176,
        "action_dist": [
          0.42613636363636365,
          0.056818181818181816,
          0.5170454545454546,
          0.0,
          0.0
        ]
      },
      "R5": {
        "n": 64,
        "action_dist": [
          0.0,
          0.359375,
          0.4375,
          0.0,
          0.203125
        ]
      },
      "R6": {
        "n": 100,
        "action_dist": [
          0.03,
          0.06,
          0.4,
          0.0,
          0.51
        ]
      },
      "R7": {
        "n": 91,
        "action_dist": [
          0.0,
          0.02197802197802198,
          0.2087912087912088,
          0.0,
          0.7692307692307693
        ]
      },
      "R8": {
        "n": 215,
        "action_dist": [
          0.6651162790697674,
          0.12093023255813953,
          0.21395348837209302,
          0.0,
          0.0
        ]
      },
      "R9": {
        "n": 382,
        "action_dist": [
          0.6361256544502618,
          0.14659685863874344,
          0.21727748691099477,
          0.0,
          0.0
        ]
      },
      "R10": {
        "n": 73,
        "action_dist": [
          0.0,
          0.0273972602739726,
          0.9041095890410958,
          0.0,
          0.0684931506849315
        ]
      }
    },
    "safety_ux_fscore": 0.9517684887459806
  },
  "bc_only_intervention": {
    "intervention_recall_high": 0.9401840490797546,
    "safety_recall": 0.9401840490797546,
    "over_refusal": 0.0,
    "over_intervention_rate": 0.0,
    "action_accuracy": 0.6965006729475101,
    "exact_action_accuracy_by_level": {
      "L0_Safe": 1.0,
      "L1_Mild": 0.8071428571428572,
      "L2_Moderate": 0.28391167192429023,
      "L3_High": 0.7850877192982456,
      "L4_Critical": 0.6326530612244898
    },
    "action_distribution": [
      0.3203230148048452,
      0.09488559892328398,
      0.351278600269179,
      0.16218034993270525,
      0.07133243606998654
    ],
    "per_level_action_dist": {
      "L0_Safe": {
        "n": 237,
        "action_dist": [
          1.0,
          0.0,
          0.0,
          0.0,
          0.0
        ]
      },
      "L1_Mild": {
        "n": 280,
        "action_dist": [
          0.8071428571428572,
          0.05357142857142857,
          0.11071428571428571,
          0.017857142857142856,
          0.010714285714285714
        ]
      },
      "L2_Moderate": {
        "n": 317,
        "action_dist": [
          0.031545741324921134,
          0.28391167192429023,
          0.5331230283911672,
          0.08832807570977919,
          0.06309148264984227
        ]
      },
      "L3_High": {
        "n": 456,
        "action_dist": [
          0.006578947368421052,
          0.07017543859649122,
          0.6513157894736842,
          0.20833333333333334,
          0.06359649122807018
        ]
      },
      "L4_Critical": {
        "n": 196,
        "action_dist": [
          0.0,
          0.02040816326530612,
          0.12755102040816327,
          0.576530612244898,
          0.2755102040816326
        ]
      }
    },
    "crisis_precision_l4": 0.5094339622641509,
    "crisis_precision": 0.5094339622641509,
    "crisis_appropriateness": 0.6509433962264151,
    "R1_high_critical_crisis_recall": 0.4690265486725664,
    "reject_rate_R6_R7_high": 0.7849462365591398,
    "strong_intervention_rate_L1": 0.1392857142857143,
    "per_category_action_dist": {
      "R1": {
        "n": 140,
        "action_dist": [
          0.02857142857142857,
          0.05,
          0.20714285714285716,
          0.24285714285714285,
          0.4714285714285714
        ]
      },
      "R2": {
        "n": 150,
        "action_dist": [
          0.1,
          0.25333333333333335,
          0.5,
          0.12,
          0.02666666666666667
        ]
      },
      "R3": {
        "n": 95,
        "action_dist": [
          0.021052631578947368,
          0.0,
          0.9368421052631579,
          0.042105263157894736,
          0.0
        ]
      },
      "R4": {
        "n": 176,
        "action_dist": [
          0.3977272727272727,
          0.028409090909090908,
          0.5625,
          0.011363636363636364,
          0.0
        ]
      },
      "R5": {
        "n": 64,
        "action_dist": [
          0.0,
          0.3125,
          0.4375,
          0.015625,
          0.234375
        ]
      },
      "R6": {
        "n": 100,
        "action_dist": [
          0.03,
          0.01,
          0.14,
          0.69,
          0.13
        ]
      },
      "R7": {
        "n": 91,
        "action_dist": [
          0.0,
          0.01098901098901099,
          0.0989010989010989,
          0.8681318681318682,
          0.02197802197802198
        ]
      },
      "R8": {
        "n": 215,
        "action_dist": [
          0.6604651162790698,
          0.10232558139534884,
          0.20930232558139536,
          0.023255813953488372,
          0.004651162790697674
        ]
      },
      "R9": {
        "n": 382,
        "action_dist": [
          0.6282722513089005,
          0.11780104712041885,
          0.193717277486911,
          0.0549738219895288,
          0.005235602094240838
        ]
      },
      "R10": {
        "n": 73,
        "action_dist": [
          0.0,
          0.0273972602739726,
          0.821917808219178,
          0.1095890410958904,
          0.0410958904109589
        ]
      }
    },
    "safety_ux_fscore": 0.9691699604743083
  },
  "ours_intervention": {
    "intervention_recall_high": 0.9524539877300614,
    "safety_recall": 0.9524539877300614,
    "over_refusal": 0.0,
    "over_intervention_rate": 0.0,
    "action_accuracy": 0.7059219380888291,
    "exact_action_accuracy_by_level": {
      "L0_Safe": 1.0,
      "L1_Mild": 0.8214285714285714,
      "L2_Moderate": 0.2807570977917981,
      "L3_High": 0.8048245614035088,
      "L4_Critical": 0.6428571428571429
    },
    "action_distribution": [
      0.32166890982503366,
      0.0901749663526245,
      0.3916554508748318,
      0.12584118438761777,
      0.07065948855989233
    ],
    "per_level_action_dist": {
      "L0_Safe": {
        "n": 237,
        "action_dist": [
          1.0,
          0.0,
          0.0,
          0.0,
          0.0
        ]
      },
      "L1_Mild": {
        "n": 280,
        "action_dist": [
          0.8214285714285714,
          0.07142857142857142,
          0.1,
          0.007142857142857143,
          0.0
        ]
      },
      "L2_Moderate": {
        "n": 317,
        "action_dist": [
          0.025236593059936908,
          0.27129337539432175,
          0.5930599369085173,
          0.0694006309148265,
          0.04100946372239748
        ]
      },
      "L3_High": {
        "n": 456,
        "action_dist": [
          0.006578947368421052,
          0.05921052631578947,
          0.7105263157894737,
          0.15350877192982457,
          0.07017543859649122
        ]
      },
      "L4_Critical": {
        "n": 196,
        "action_dist": [
          0.0,
          0.00510204081632653,
          0.21428571428571427,
          0.4744897959183674,
          0.30612244897959184
        ]
      }
    },
    "crisis_precision_l4": 0.5714285714285714,
    "crisis_precision": 0.5714285714285714,
    "crisis_appropriateness": 0.7523809523809524,
    "R1_high_critical_crisis_recall": 0.5663716814159292,
    "reject_rate_R6_R7_high": 0.7150537634408602,
    "strong_intervention_rate_L1": 0.10714285714285714,
    "per_category_action_dist": {
      "R1": {
        "n": 140,
        "action_dist": [
          0.03571428571428571,
          0.07857142857142857,
          0.2857142857142857,
          0.09285714285714286,
          0.5071428571428571
        ]
      },
      "R2": {
        "n": 150,
        "action_dist": [
          0.1,
          0.24,
          0.5666666666666667,
          0.06666666666666667,
          0.02666666666666667
        ]
      },
      "R3": {
        "n": 95,
        "action_dist": [
          0.021052631578947368,
          0.021052631578947368,
          0.9263157894736842,
          0.021052631578947368,
          0.010526315789473684
        ]
      },
      "R4": {
        "n": 176,
        "action_dist": [
          0.4034090909090909,
          0.07386363636363637,
          0.5170454545454546,
          0.0,
          0.005681818181818182
        ]
      },
      "R5": {
        "n": 64,
        "action_dist": [
          0.0,
          0.1875,
          0.65625,
          0.03125,
          0.125
        ]
      },
      "R6": {
        "n": 100,
        "action_dist": [
          0.03,
          0.0,
          0.25,
          0.56,
          0.16
        ]
      },
      "R7": {
        "n": 91,
        "action_dist": [
          0.0,
          0.01098901098901099,
          0.10989010989010989,
          0.8681318681318682,
          0.01098901098901099
        ]
      },
      "R8": {
        "n": 215,
        "action_dist": [
          0.6604651162790698,
          0.07441860465116279,
          0.24651162790697675,
          0.018604651162790697,
          0.0
        ]
      },
      "R9": {
        "n": 382,
        "action_dist": [
          0.6282722513089005,
          0.10732984293193717,
          0.21727748691099477,
          0.04450261780104712,
          0.002617801047120419
        ]
      },
      "R10": {
        "n": 73,
        "action_dist": [
          0.0,
          0.0273972602739726,
          0.8904109589041096,
          0.0547945205479452,
          0.0273972602739726
        ]
      }
    },
    "safety_ux_fscore": 0.9756480754124116
  }
}