Files
CompanionGuard-RL/scripts/annotate_data.py
wangyu 7d4345c29d feat: initial CompanionGuard-RL framework
Two-module pipeline for AI companion safety:
- Module B: context-aware risk detector with CrossAttention fusion
- Module C: PPO-based adaptive intervention policy

Includes CompanionRisk Taxonomy (10 primary + 14 fine-grained labels),
dataset generation/annotation pipeline, training scripts, and eval suite.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-09 17:21:11 +08:00

82 lines
2.4 KiB
Python

"""
Step 2: LLM judge pre-annotation.
Usage:
python scripts/annotate_data.py --input data/raw/generated.jsonl \
--output data/processed/annotated.jsonl \
--config configs/data_generation.yaml
"""
import argparse
import json
import yaml
import random
from pathlib import Path
from src.data.llm_judge import LLMJudge
from src.data.dataset import load_jsonl
def split_dataset(samples, train_ratio=0.8, val_ratio=0.1, seed=42):
random.seed(seed)
random.shuffle(samples)
n = len(samples)
n_train = int(n * train_ratio)
n_val = int(n * val_ratio)
return (
samples[:n_train],
samples[n_train: n_train + n_val],
samples[n_train + n_val:],
)
def save_jsonl(samples, path):
Path(path).parent.mkdir(parents=True, exist_ok=True)
with open(path, "w", encoding="utf-8") as f:
for s in samples:
f.write(json.dumps(s, ensure_ascii=False) + "\n")
print(f"Saved {len(samples)} samples → {path}")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--input", required=True)
parser.add_argument("--output", default="data/processed/annotated.jsonl")
parser.add_argument("--config", default="configs/data_generation.yaml")
parser.add_argument("--skip-annotation", action="store_true",
help="Skip LLM annotation (use existing labels)")
args = parser.parse_args()
with open(args.config) as f:
cfg = yaml.safe_load(f)
samples = load_jsonl(args.input)
print(f"Loaded {len(samples)} samples from {args.input}")
if not args.skip_annotation:
judge = LLMJudge(
api_type=cfg["api"]["type"],
model=cfg["annotation"]["judge_model"],
)
samples = judge.annotate_batch(samples, output_path=args.output)
else:
save_jsonl(samples, args.output)
split_cfg = cfg.get("split", {"train": 0.8, "val": 0.1, "test": 0.1, "seed": 42})
train, val, test = split_dataset(
samples,
train_ratio=split_cfg["train"],
val_ratio=split_cfg["val"],
seed=split_cfg.get("seed", 42),
)
base = Path(args.output).parent
save_jsonl(train, base / "train.jsonl")
save_jsonl(val, base / "val.jsonl")
save_jsonl(test, base / "test.jsonl")
print(f"Split: train={len(train)}, val={len(val)}, test={len(test)}")
if __name__ == "__main__":
main()