CompanionGuard-RL/code/configs/detector_config_server.yaml

model:
  name: "/root/siton-data-2849d4ce327c4ccfb233ce33868fe7fe/zsy/macbert-large"
  hidden_size: 1024
  num_heads: 8
  dropout: 0.1
  use_lora: false

data:
  train_path: "data/processed/CompanionRisk-Bench/train.jsonl"
  val_path:   "data/processed/CompanionRisk-Bench/dev.jsonl"
  test_path:  "data/processed/CompanionRisk-Bench/test.jsonl"
  max_persona_len:   128
  max_context_len:   512
  max_response_len:  256
  max_history_turns: 5
  num_workers: 4            # Linux server: 4 workers; Windows: use 0

training:
  epochs: 10
  per_gpu_batch_size: 16    # 4 GPUs × 16 × accum 2 = effective batch 128
  gradient_accumulation_steps: 2
  lr: 2e-5
  warmup_steps: 100
  weight_decay: 0.01
  gradient_clip: 1.0
  eval_steps: 100           # global steps between validation runs
  mixed_precision: "bf16"   # RTX 5090 native bf16
  seed: 42

loss_weights:
  binary:  1.0
  level:   1.0
  primary: 1.0
  fine:    2.0     # ↑ 2.0: 加强细粒度标签损失权重（配合 fine_training 开启）

# Fine-grained label training options（下次训练时开启，当前 best.pt 不受影响）
# 两项均开启可显著改善 fine_macro_f1：
#   use_pos_weight: 对 Romanticization/CoRumination 等稀有标签设置 ~25 倍正样本权重
#   risky_only:     只在 y_risk=1 的样本上计算 fine loss，避免 safe 样本教模型预测全负
fine_training:
  use_pos_weight: true    # ✓ 开启：对稀有 fine 标签设置 pos_weight（max 30）
  risky_only:     true    # ✓ 开启：只在 y_risk=1 样本上计算 fine loss

evaluation:
  binary_threshold: 0.5
  fine_threshold:   0.4

logging:
  project:   "CompanionGuard-RL"
  run_name:  "detector-macbert-4gpu"
  use_wandb: false

output:
  checkpoint_dir: "checkpoints/detector"
-												chore: initial commit — unified project repo

Merged code repo (CompanionGuard-RL) into single project-level git.
Reorganized root: docs/, reference/, experiments/, tmp/active|archives/.
Gitignored: data/, checkpoints/, .venv, experiment logs, tmp/archives.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-05-14 11:28:42 +08:00
+								model:
-												feat: Module C v5/v6 training complete, ablations, SOTA baselines, paper updates

- Module C: BC+PPO training v5/v6 done; eval results in experiments/eval_intervention_v{5,6}.json
- Reward: v5 label-aligned constrained reward (code/src/rl/reward.py)
- Ablations: Module B (history_r, response_only, full) + Module C (wo_category_reward)
- SOTA baselines: WildGuard and ShieldGemma2b eval scripts and results
- Paper: update sections 05–08 (Module B/C description, experiments table, discussion)
- Docs: add record.md (change log), update state.md and exp.md; retire change.md
- Tools: add html-to-ppt utilities and run_shieldgemma2b.sh
- Configs: add ablation YAML configs for Module B and C
- Cleanup: remove stale reference/ PNG screenshots

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-05-20 14:24:09 +08:00
+								  name: "/root/siton-data-2849d4ce327c4ccfb233ce33868fe7fe/zsy/macbert-large"
-												chore: initial commit — unified project repo

Merged code repo (CompanionGuard-RL) into single project-level git.
Reorganized root: docs/, reference/, experiments/, tmp/active|archives/.
Gitignored: data/, checkpoints/, .venv, experiment logs, tmp/archives.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-05-14 11:28:42 +08:00
+								  hidden_size: 1024
 								  num_heads: 8
 								  dropout: 0.1
 								  use_lora: false
 								data:
 								  train_path: "data/processed/CompanionRisk-Bench/train.jsonl"
 								  val_path:   "data/processed/CompanionRisk-Bench/dev.jsonl"
 								  test_path:  "data/processed/CompanionRisk-Bench/test.jsonl"
 								  max_persona_len:   128
 								  max_context_len:   512
 								  max_response_len:  256
 								  max_history_turns: 5
 								  num_workers: 4            # Linux server: 4 workers; Windows: use 0
 								training:
 								  epochs: 10
 								  per_gpu_batch_size: 16    # 4 GPUs × 16 × accum 2 = effective batch 128
 								  gradient_accumulation_steps: 2
 								  lr: 2e-5
 								  warmup_steps: 100
 								  weight_decay: 0.01
 								  gradient_clip: 1.0
 								  eval_steps: 100           # global steps between validation runs
 								  mixed_precision: "bf16"   # RTX 5090 native bf16
 								  seed: 42
 								loss_weights:
 								  binary:  1.0
 								  level:   1.0
 								  primary: 1.0
 								  fine:    2.0     # ↑ 2.0: 加强细粒度标签损失权重（配合 fine_training 开启）
 								# Fine-grained label training options（下次训练时开启，当前 best.pt 不受影响）
 								# 两项均开启可显著改善 fine_macro_f1：
 								#   use_pos_weight: 对 Romanticization/CoRumination 等稀有标签设置 ~25 倍正样本权重
 								#   risky_only:     只在 y_risk=1 的样本上计算 fine loss，避免 safe 样本教模型预测全负
 								fine_training:
 								  use_pos_weight: true    # ✓ 开启：对稀有 fine 标签设置 pos_weight（max 30）
 								  risky_only:     true    # ✓ 开启：只在 y_risk=1 样本上计算 fine loss
 								evaluation:
 								  binary_threshold: 0.5
 								  fine_threshold:   0.4
 								logging:
 								  project:   "CompanionGuard-RL"
 								  run_name:  "detector-macbert-4gpu"
 								  use_wandb: false
 								output:
 								  checkpoint_dir: "checkpoints/detector"