chore: initial commit — unified project repo

Merged code repo (CompanionGuard-RL) into single project-level git.
Reorganized root: docs/, reference/, experiments/, tmp/active|archives/.
Gitignored: data/, checkpoints/, .venv, experiment logs, tmp/archives.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-14 11:28:42 +08:00
commit bd1f51c496
85 changed files with 20568 additions and 0 deletions

View File

@@ -0,0 +1,242 @@
"""
P0-4: Multimodal noise generation for robustness experiments.
Supports three modalities: text, audio, visual.
Each modality has configurable noise types and intensity levels.
Usage:
python generate_noise.py --config configs/noise_configs.yaml \
--data_dir $ZSY/multimodal_affect/data/iemocap \
--out_dir $ZSY/multimodal_affect/data/iemocap_noisy
Config schema → see configs/noise_configs.yaml
"""
import os
import json
import argparse
import yaml
import numpy as np
from pathlib import Path
from typing import Dict, Any, Optional
RNG = np.random.default_rng(42)
# ═══════════════════════════════════════════════════════
# TEXT NOISE
# ═══════════════════════════════════════════════════════
def _word_drop(ids: np.ndarray, drop_rate: float) -> np.ndarray:
"""Randomly zero-out token ids (simulates word deletion)."""
mask = RNG.random(ids.shape) < drop_rate
return np.where(mask, 0, ids)
def _word_swap(ids: np.ndarray, swap_rate: float) -> np.ndarray:
"""Randomly shuffle adjacent tokens."""
ids = ids.copy()
n = len(ids)
for i in range(n - 1):
if RNG.random() < swap_rate:
ids[i], ids[i + 1] = ids[i + 1], ids[i]
return ids
def _random_replace(ids: np.ndarray, replace_rate: float, vocab_size: int = 30522) -> np.ndarray:
"""Replace tokens with random vocab ids."""
ids = ids.copy()
mask = RNG.random(ids.shape) < replace_rate
rand_ids = RNG.integers(1, vocab_size, size=ids.shape)
return np.where(mask & (ids != 0), rand_ids, ids)
def add_text_noise(features: np.ndarray, cfg: Dict) -> np.ndarray:
"""Apply text noise to an array of token-id features (N, seq_len)."""
noise_type = cfg.get("type", "word_drop")
intensity = float(cfg.get("intensity", 0.1))
if noise_type == "word_drop":
return np.stack([_word_drop(row, intensity) for row in features])
if noise_type == "word_swap":
return np.stack([_word_swap(row, intensity) for row in features])
if noise_type == "random_replace":
return np.stack([_random_replace(row, intensity) for row in features])
if noise_type == "gaussian":
# for embedding features (N, dim) not token ids
noise = RNG.standard_normal(features.shape).astype(np.float32)
return features + intensity * noise
raise ValueError(f"Unknown text noise type: {noise_type}")
# ═══════════════════════════════════════════════════════
# AUDIO NOISE
# ═══════════════════════════════════════════════════════
def add_audio_noise(features: np.ndarray, cfg: Dict) -> np.ndarray:
"""Apply noise to audio feature matrix (N, n_mfcc)."""
noise_type = cfg.get("type", "gaussian")
intensity = float(cfg.get("intensity", 0.05))
if noise_type == "gaussian":
noise = RNG.standard_normal(features.shape).astype(np.float32)
return features + intensity * noise * features.std(axis=0, keepdims=True)
if noise_type == "masking":
# mask entire feature dimensions (simulates missing mic)
features = features.copy()
n_mask = max(1, int(features.shape[1] * intensity))
dims = RNG.choice(features.shape[1], n_mask, replace=False)
features[:, dims] = 0.0
return features
if noise_type == "time_mask":
# mask random samples (simulates packet loss for temporal features)
features = features.copy()
n_mask = max(1, int(features.shape[0] * intensity))
rows = RNG.choice(features.shape[0], n_mask, replace=False)
features[rows, :] = 0.0
return features
if noise_type == "scale":
# random amplitude scaling
scale = 1.0 + intensity * (RNG.random(features.shape[0]) - 0.5) * 2
return features * scale[:, None]
raise ValueError(f"Unknown audio noise type: {noise_type}")
# ═══════════════════════════════════════════════════════
# VISUAL NOISE (operates on feature vectors, not pixels)
# ═══════════════════════════════════════════════════════
def add_visual_noise(features: np.ndarray, cfg: Dict) -> np.ndarray:
"""Apply noise to visual feature matrix (N, feat_dim)."""
noise_type = cfg.get("type", "gaussian")
intensity = float(cfg.get("intensity", 0.1))
if noise_type == "gaussian":
noise = RNG.standard_normal(features.shape).astype(np.float32)
return features + intensity * noise
if noise_type == "dropout":
mask = (RNG.random(features.shape) > intensity).astype(np.float32)
return features * mask
if noise_type == "occlusion":
# zero out a contiguous block of feature dims
features = features.copy()
start = RNG.integers(0, max(1, features.shape[1] - 1))
length = max(1, int(features.shape[1] * intensity))
features[:, start:start + length] = 0.0
return features
if noise_type == "missing_modality":
# simulate completely missing video frames
features = features.copy()
n_missing = max(1, int(len(features) * intensity))
idx = RNG.choice(len(features), n_missing, replace=False)
features[idx, :] = 0.0
return features
raise ValueError(f"Unknown visual noise type: {noise_type}")
# ═══════════════════════════════════════════════════════
# COMBINED MULTIMODAL NOISE
# ═══════════════════════════════════════════════════════
MODALITY_SPECS = [
("text", ("text",), add_text_noise),
("audio", ("audio",), add_audio_noise),
# Dataset files use *_vision.npy. Older configs used "visual", so keep it
# as an input alias but always write the canonical "vision" filename.
("vision", ("vision", "visual"), add_visual_noise),
]
def _get_modality_cfg(noise_cfg: Dict, aliases: tuple) -> Dict:
for name in aliases:
if name in noise_cfg:
return noise_cfg[name]
return noise_cfg.get("default", {})
def apply_noise_config(data_dir: Path, out_dir: Path, noise_cfg: Dict,
splits: list = None):
"""Apply noise config to all splits and modalities found in data_dir."""
if splits is None:
splits = ["train", "val", "test"]
out_dir.mkdir(parents=True, exist_ok=True)
for split in splits:
for modality, aliases, fn in MODALITY_SPECS:
src = data_dir / f"{split}_{modality}.npy"
if not src.exists():
continue
features = np.load(str(src))
mod_cfg = _get_modality_cfg(noise_cfg, aliases)
if mod_cfg:
noisy = fn(features.astype(np.float32), mod_cfg)
else:
noisy = features.astype(np.float32).copy()
dst = out_dir / f"{split}_{modality}.npy"
np.save(str(dst), noisy)
print(f" {split}/{modality}: {features.shape}{dst.name}")
# copy labels unchanged
label_src = data_dir / f"{split}_labels.npy"
if label_src.exists():
import shutil
shutil.copy2(str(label_src), str(out_dir / f"{split}_labels.npy"))
# copy metadata
for meta_file in ["label_map.json", "meta.json"]:
src = data_dir / meta_file
if src.exists():
import shutil
shutil.copy2(str(src), str(out_dir / meta_file))
def generate_noise_variants(data_dir: str, out_base: str, config: Dict):
"""Generate multiple noise variants as defined in config."""
data_dir = Path(data_dir)
out_base = Path(out_base)
variants = config.get("variants", [])
if not variants:
# single-variant mode: apply config directly
apply_noise_config(data_dir, out_base, config.get("noise", {}))
return
for variant in variants:
name = variant["name"]
noise_cfg = variant["noise"]
out_dir = out_base / name
print(f"\n[Variant: {name}]")
apply_noise_config(data_dir, out_dir, noise_cfg)
with open(out_dir / "noise_config.json", "w") as f:
json.dump(variant, f, indent=2)
print(f"\nAll variants saved under {out_base}")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--config", required=True,
help="Path to noise_configs.yaml")
parser.add_argument("--data_dir", required=True,
help="Dir with {split}_{modality}.npy files")
parser.add_argument("--out_dir", default=None,
help="Output base dir (default: data_dir + '_noisy')")
args = parser.parse_args()
with open(args.config, encoding="utf-8") as f:
config = yaml.safe_load(f)
zsy = os.environ.get("ZSY", "/root")
out_dir = args.out_dir or args.data_dir.rstrip("/") + "_noisy"
generate_noise_variants(args.data_dir, out_dir, config)