""" Step 1: Generate companion conversation dataset using LLM. Usage: python scripts/generate_data.py --config configs/data_generation.yaml """ import argparse import yaml from pathlib import Path from src.data.data_generator import ConversationGenerator def main(): parser = argparse.ArgumentParser() parser.add_argument("--config", default="configs/data_generation.yaml") args = parser.parse_args() with open(args.config) as f: cfg = yaml.safe_load(f) Path(cfg["output"]["raw_dir"]).mkdir(parents=True, exist_ok=True) generator = ConversationGenerator( api_type=cfg["api"]["type"], model=cfg["api"]["model"], ) count = generator.generate_dataset( output_path=cfg["output"]["output_file"], total_samples=cfg["generation"]["total_samples"], samples_per_category=cfg["generation"]["samples_per_category"], delay=cfg["generation"]["delay"], ) print(f"Generated {count} samples → {cfg['output']['output_file']}") if __name__ == "__main__": main()