Two-module pipeline for AI companion safety: - Module B: context-aware risk detector with CrossAttention fusion - Module C: PPO-based adaptive intervention policy Includes CompanionRisk Taxonomy (10 primary + 14 fine-grained labels), dataset generation/annotation pipeline, training scripts, and eval suite. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
82 lines
2.4 KiB
Python
82 lines
2.4 KiB
Python
"""
|
|
Step 2: LLM judge pre-annotation.
|
|
|
|
Usage:
|
|
python scripts/annotate_data.py --input data/raw/generated.jsonl \
|
|
--output data/processed/annotated.jsonl \
|
|
--config configs/data_generation.yaml
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import yaml
|
|
import random
|
|
from pathlib import Path
|
|
from src.data.llm_judge import LLMJudge
|
|
from src.data.dataset import load_jsonl
|
|
|
|
|
|
def split_dataset(samples, train_ratio=0.8, val_ratio=0.1, seed=42):
|
|
random.seed(seed)
|
|
random.shuffle(samples)
|
|
n = len(samples)
|
|
n_train = int(n * train_ratio)
|
|
n_val = int(n * val_ratio)
|
|
return (
|
|
samples[:n_train],
|
|
samples[n_train: n_train + n_val],
|
|
samples[n_train + n_val:],
|
|
)
|
|
|
|
|
|
def save_jsonl(samples, path):
|
|
Path(path).parent.mkdir(parents=True, exist_ok=True)
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
for s in samples:
|
|
f.write(json.dumps(s, ensure_ascii=False) + "\n")
|
|
print(f"Saved {len(samples)} samples → {path}")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--input", required=True)
|
|
parser.add_argument("--output", default="data/processed/annotated.jsonl")
|
|
parser.add_argument("--config", default="configs/data_generation.yaml")
|
|
parser.add_argument("--skip-annotation", action="store_true",
|
|
help="Skip LLM annotation (use existing labels)")
|
|
args = parser.parse_args()
|
|
|
|
with open(args.config) as f:
|
|
cfg = yaml.safe_load(f)
|
|
|
|
samples = load_jsonl(args.input)
|
|
print(f"Loaded {len(samples)} samples from {args.input}")
|
|
|
|
if not args.skip_annotation:
|
|
judge = LLMJudge(
|
|
api_type=cfg["api"]["type"],
|
|
model=cfg["annotation"]["judge_model"],
|
|
)
|
|
samples = judge.annotate_batch(samples, output_path=args.output)
|
|
else:
|
|
save_jsonl(samples, args.output)
|
|
|
|
split_cfg = cfg.get("split", {"train": 0.8, "val": 0.1, "test": 0.1, "seed": 42})
|
|
train, val, test = split_dataset(
|
|
samples,
|
|
train_ratio=split_cfg["train"],
|
|
val_ratio=split_cfg["val"],
|
|
seed=split_cfg.get("seed", 42),
|
|
)
|
|
|
|
base = Path(args.output).parent
|
|
save_jsonl(train, base / "train.jsonl")
|
|
save_jsonl(val, base / "val.jsonl")
|
|
save_jsonl(test, base / "test.jsonl")
|
|
|
|
print(f"Split: train={len(train)}, val={len(val)}, test={len(test)}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|