feat: initial CompanionGuard-RL framework

Two-module pipeline for AI companion safety:
- Module B: context-aware risk detector with CrossAttention fusion
- Module C: PPO-based adaptive intervention policy

Includes CompanionRisk Taxonomy (10 primary + 14 fine-grained labels),
dataset generation/annotation pipeline, training scripts, and eval suite.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-09 17:21:11 +08:00
commit 7d4345c29d
29 changed files with 3317 additions and 0 deletions

40
scripts/generate_data.py Normal file
View File

@@ -0,0 +1,40 @@
"""
Step 1: Generate companion conversation dataset using LLM.
Usage:
python scripts/generate_data.py --config configs/data_generation.yaml
"""
import argparse
import yaml
from pathlib import Path
from src.data.data_generator import ConversationGenerator
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--config", default="configs/data_generation.yaml")
args = parser.parse_args()
with open(args.config) as f:
cfg = yaml.safe_load(f)
Path(cfg["output"]["raw_dir"]).mkdir(parents=True, exist_ok=True)
generator = ConversationGenerator(
api_type=cfg["api"]["type"],
model=cfg["api"]["model"],
)
count = generator.generate_dataset(
output_path=cfg["output"]["output_file"],
total_samples=cfg["generation"]["total_samples"],
samples_per_category=cfg["generation"]["samples_per_category"],
delay=cfg["generation"]["delay"],
)
print(f"Generated {count} samples → {cfg['output']['output_file']}")
if __name__ == "__main__":
main()