chore: initial commit — unified project repo

Merged code repo (CompanionGuard-RL) into single project-level git. Reorganized root: docs/, reference/, experiments/, tmp/active|archives/. Gitignored: data/, checkpoints/, .venv, experiment logs, tmp/archives. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-14 11:28:42 +08:00
commit bd1f51c496
85 changed files with 20568 additions and 0 deletions
--- a/旧方向信息/scripts/preprocess/generate_noise.py
+++ b/旧方向信息/scripts/preprocess/generate_noise.py
@@ -0,0 +1,242 @@
+"""
+P0-4: Multimodal noise generation for robustness experiments.
+
+Supports three modalities: text, audio, visual.
+Each modality has configurable noise types and intensity levels.
+
+Usage:
+  python generate_noise.py --config configs/noise_configs.yaml \
+      --data_dir $ZSY/multimodal_affect/data/iemocap \
+      --out_dir  $ZSY/multimodal_affect/data/iemocap_noisy
+
+Config schema → see configs/noise_configs.yaml
+"""
+
+import os
+import json
+import argparse
+import yaml
+import numpy as np
+from pathlib import Path
+from typing import Dict, Any, Optional
+
+RNG = np.random.default_rng(42)
+
+
+# ═══════════════════════════════════════════════════════
+#  TEXT NOISE
+# ═══════════════════════════════════════════════════════
+
+def _word_drop(ids: np.ndarray, drop_rate: float) -> np.ndarray:
+    """Randomly zero-out token ids (simulates word deletion)."""
+    mask = RNG.random(ids.shape) < drop_rate
+    return np.where(mask, 0, ids)
+
+
+def _word_swap(ids: np.ndarray, swap_rate: float) -> np.ndarray:
+    """Randomly shuffle adjacent tokens."""
+    ids = ids.copy()
+    n = len(ids)
+    for i in range(n - 1):
+        if RNG.random() < swap_rate:
+            ids[i], ids[i + 1] = ids[i + 1], ids[i]
+    return ids
+
+
+def _random_replace(ids: np.ndarray, replace_rate: float, vocab_size: int = 30522) -> np.ndarray:
+    """Replace tokens with random vocab ids."""
+    ids = ids.copy()
+    mask = RNG.random(ids.shape) < replace_rate
+    rand_ids = RNG.integers(1, vocab_size, size=ids.shape)
+    return np.where(mask & (ids != 0), rand_ids, ids)
+
+
+def add_text_noise(features: np.ndarray, cfg: Dict) -> np.ndarray:
+    """Apply text noise to an array of token-id features (N, seq_len)."""
+    noise_type = cfg.get("type", "word_drop")
+    intensity = float(cfg.get("intensity", 0.1))
+
+    if noise_type == "word_drop":
+        return np.stack([_word_drop(row, intensity) for row in features])
+    if noise_type == "word_swap":
+        return np.stack([_word_swap(row, intensity) for row in features])
+    if noise_type == "random_replace":
+        return np.stack([_random_replace(row, intensity) for row in features])
+    if noise_type == "gaussian":
+        # for embedding features (N, dim) not token ids
+        noise = RNG.standard_normal(features.shape).astype(np.float32)
+        return features + intensity * noise
+    raise ValueError(f"Unknown text noise type: {noise_type}")
+
+
+# ═══════════════════════════════════════════════════════
+#  AUDIO NOISE
+# ═══════════════════════════════════════════════════════
+
+def add_audio_noise(features: np.ndarray, cfg: Dict) -> np.ndarray:
+    """Apply noise to audio feature matrix (N, n_mfcc)."""
+    noise_type = cfg.get("type", "gaussian")
+    intensity = float(cfg.get("intensity", 0.05))
+
+    if noise_type == "gaussian":
+        noise = RNG.standard_normal(features.shape).astype(np.float32)
+        return features + intensity * noise * features.std(axis=0, keepdims=True)
+
+    if noise_type == "masking":
+        # mask entire feature dimensions (simulates missing mic)
+        features = features.copy()
+        n_mask = max(1, int(features.shape[1] * intensity))
+        dims = RNG.choice(features.shape[1], n_mask, replace=False)
+        features[:, dims] = 0.0
+        return features
+
+    if noise_type == "time_mask":
+        # mask random samples (simulates packet loss for temporal features)
+        features = features.copy()
+        n_mask = max(1, int(features.shape[0] * intensity))
+        rows = RNG.choice(features.shape[0], n_mask, replace=False)
+        features[rows, :] = 0.0
+        return features
+
+    if noise_type == "scale":
+        # random amplitude scaling
+        scale = 1.0 + intensity * (RNG.random(features.shape[0]) - 0.5) * 2
+        return features * scale[:, None]
+
+    raise ValueError(f"Unknown audio noise type: {noise_type}")
+
+
+# ═══════════════════════════════════════════════════════
+#  VISUAL NOISE (operates on feature vectors, not pixels)
+# ═══════════════════════════════════════════════════════
+
+def add_visual_noise(features: np.ndarray, cfg: Dict) -> np.ndarray:
+    """Apply noise to visual feature matrix (N, feat_dim)."""
+    noise_type = cfg.get("type", "gaussian")
+    intensity = float(cfg.get("intensity", 0.1))
+
+    if noise_type == "gaussian":
+        noise = RNG.standard_normal(features.shape).astype(np.float32)
+        return features + intensity * noise
+
+    if noise_type == "dropout":
+        mask = (RNG.random(features.shape) > intensity).astype(np.float32)
+        return features * mask
+
+    if noise_type == "occlusion":
+        # zero out a contiguous block of feature dims
+        features = features.copy()
+        start = RNG.integers(0, max(1, features.shape[1] - 1))
+        length = max(1, int(features.shape[1] * intensity))
+        features[:, start:start + length] = 0.0
+        return features
+
+    if noise_type == "missing_modality":
+        # simulate completely missing video frames
+        features = features.copy()
+        n_missing = max(1, int(len(features) * intensity))
+        idx = RNG.choice(len(features), n_missing, replace=False)
+        features[idx, :] = 0.0
+        return features
+
+    raise ValueError(f"Unknown visual noise type: {noise_type}")
+
+
+# ═══════════════════════════════════════════════════════
+#  COMBINED MULTIMODAL NOISE
+# ═══════════════════════════════════════════════════════
+
+MODALITY_SPECS = [
+    ("text", ("text",), add_text_noise),
+    ("audio", ("audio",), add_audio_noise),
+    # Dataset files use *_vision.npy. Older configs used "visual", so keep it
+    # as an input alias but always write the canonical "vision" filename.
+    ("vision", ("vision", "visual"), add_visual_noise),
+]
+
+
+def _get_modality_cfg(noise_cfg: Dict, aliases: tuple) -> Dict:
+    for name in aliases:
+        if name in noise_cfg:
+            return noise_cfg[name]
+    return noise_cfg.get("default", {})
+
+
+def apply_noise_config(data_dir: Path, out_dir: Path, noise_cfg: Dict,
+                       splits: list = None):
+    """Apply noise config to all splits and modalities found in data_dir."""
+    if splits is None:
+        splits = ["train", "val", "test"]
+
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    for split in splits:
+        for modality, aliases, fn in MODALITY_SPECS:
+            src = data_dir / f"{split}_{modality}.npy"
+            if not src.exists():
+                continue
+
+            features = np.load(str(src))
+            mod_cfg = _get_modality_cfg(noise_cfg, aliases)
+
+            if mod_cfg:
+                noisy = fn(features.astype(np.float32), mod_cfg)
+            else:
+                noisy = features.astype(np.float32).copy()
+            dst = out_dir / f"{split}_{modality}.npy"
+            np.save(str(dst), noisy)
+            print(f"  {split}/{modality}: {features.shape} → {dst.name}")
+
+        # copy labels unchanged
+        label_src = data_dir / f"{split}_labels.npy"
+        if label_src.exists():
+            import shutil
+            shutil.copy2(str(label_src), str(out_dir / f"{split}_labels.npy"))
+
+    # copy metadata
+    for meta_file in ["label_map.json", "meta.json"]:
+        src = data_dir / meta_file
+        if src.exists():
+            import shutil
+            shutil.copy2(str(src), str(out_dir / meta_file))
+
+
+def generate_noise_variants(data_dir: str, out_base: str, config: Dict):
+    """Generate multiple noise variants as defined in config."""
+    data_dir = Path(data_dir)
+    out_base = Path(out_base)
+
+    variants = config.get("variants", [])
+    if not variants:
+        # single-variant mode: apply config directly
+        apply_noise_config(data_dir, out_base, config.get("noise", {}))
+        return
+
+    for variant in variants:
+        name = variant["name"]
+        noise_cfg = variant["noise"]
+        out_dir = out_base / name
+        print(f"\n[Variant: {name}]")
+        apply_noise_config(data_dir, out_dir, noise_cfg)
+        with open(out_dir / "noise_config.json", "w") as f:
+            json.dump(variant, f, indent=2)
+
+    print(f"\nAll variants saved under {out_base}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config", required=True,
+                        help="Path to noise_configs.yaml")
+    parser.add_argument("--data_dir", required=True,
+                        help="Dir with {split}_{modality}.npy files")
+    parser.add_argument("--out_dir", default=None,
+                        help="Output base dir (default: data_dir + '_noisy')")
+    args = parser.parse_args()
+
+    with open(args.config, encoding="utf-8") as f:
+        config = yaml.safe_load(f)
+
+    zsy = os.environ.get("ZSY", "/root")
+    out_dir = args.out_dir or args.data_dir.rstrip("/") + "_noisy"
+    generate_noise_variants(args.data_dir, out_dir, config)