Merged code repo (CompanionGuard-RL) into single project-level git. Reorganized root: docs/, reference/, experiments/, tmp/active|archives/. Gitignored: data/, checkpoints/, .venv, experiment logs, tmp/archives. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
243 lines
9.4 KiB
Python
243 lines
9.4 KiB
Python
"""
|
|
P0-4: Multimodal noise generation for robustness experiments.
|
|
|
|
Supports three modalities: text, audio, visual.
|
|
Each modality has configurable noise types and intensity levels.
|
|
|
|
Usage:
|
|
python generate_noise.py --config configs/noise_configs.yaml \
|
|
--data_dir $ZSY/multimodal_affect/data/iemocap \
|
|
--out_dir $ZSY/multimodal_affect/data/iemocap_noisy
|
|
|
|
Config schema → see configs/noise_configs.yaml
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
import argparse
|
|
import yaml
|
|
import numpy as np
|
|
from pathlib import Path
|
|
from typing import Dict, Any, Optional
|
|
|
|
RNG = np.random.default_rng(42)
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════
|
|
# TEXT NOISE
|
|
# ═══════════════════════════════════════════════════════
|
|
|
|
def _word_drop(ids: np.ndarray, drop_rate: float) -> np.ndarray:
|
|
"""Randomly zero-out token ids (simulates word deletion)."""
|
|
mask = RNG.random(ids.shape) < drop_rate
|
|
return np.where(mask, 0, ids)
|
|
|
|
|
|
def _word_swap(ids: np.ndarray, swap_rate: float) -> np.ndarray:
|
|
"""Randomly shuffle adjacent tokens."""
|
|
ids = ids.copy()
|
|
n = len(ids)
|
|
for i in range(n - 1):
|
|
if RNG.random() < swap_rate:
|
|
ids[i], ids[i + 1] = ids[i + 1], ids[i]
|
|
return ids
|
|
|
|
|
|
def _random_replace(ids: np.ndarray, replace_rate: float, vocab_size: int = 30522) -> np.ndarray:
|
|
"""Replace tokens with random vocab ids."""
|
|
ids = ids.copy()
|
|
mask = RNG.random(ids.shape) < replace_rate
|
|
rand_ids = RNG.integers(1, vocab_size, size=ids.shape)
|
|
return np.where(mask & (ids != 0), rand_ids, ids)
|
|
|
|
|
|
def add_text_noise(features: np.ndarray, cfg: Dict) -> np.ndarray:
|
|
"""Apply text noise to an array of token-id features (N, seq_len)."""
|
|
noise_type = cfg.get("type", "word_drop")
|
|
intensity = float(cfg.get("intensity", 0.1))
|
|
|
|
if noise_type == "word_drop":
|
|
return np.stack([_word_drop(row, intensity) for row in features])
|
|
if noise_type == "word_swap":
|
|
return np.stack([_word_swap(row, intensity) for row in features])
|
|
if noise_type == "random_replace":
|
|
return np.stack([_random_replace(row, intensity) for row in features])
|
|
if noise_type == "gaussian":
|
|
# for embedding features (N, dim) not token ids
|
|
noise = RNG.standard_normal(features.shape).astype(np.float32)
|
|
return features + intensity * noise
|
|
raise ValueError(f"Unknown text noise type: {noise_type}")
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════
|
|
# AUDIO NOISE
|
|
# ═══════════════════════════════════════════════════════
|
|
|
|
def add_audio_noise(features: np.ndarray, cfg: Dict) -> np.ndarray:
|
|
"""Apply noise to audio feature matrix (N, n_mfcc)."""
|
|
noise_type = cfg.get("type", "gaussian")
|
|
intensity = float(cfg.get("intensity", 0.05))
|
|
|
|
if noise_type == "gaussian":
|
|
noise = RNG.standard_normal(features.shape).astype(np.float32)
|
|
return features + intensity * noise * features.std(axis=0, keepdims=True)
|
|
|
|
if noise_type == "masking":
|
|
# mask entire feature dimensions (simulates missing mic)
|
|
features = features.copy()
|
|
n_mask = max(1, int(features.shape[1] * intensity))
|
|
dims = RNG.choice(features.shape[1], n_mask, replace=False)
|
|
features[:, dims] = 0.0
|
|
return features
|
|
|
|
if noise_type == "time_mask":
|
|
# mask random samples (simulates packet loss for temporal features)
|
|
features = features.copy()
|
|
n_mask = max(1, int(features.shape[0] * intensity))
|
|
rows = RNG.choice(features.shape[0], n_mask, replace=False)
|
|
features[rows, :] = 0.0
|
|
return features
|
|
|
|
if noise_type == "scale":
|
|
# random amplitude scaling
|
|
scale = 1.0 + intensity * (RNG.random(features.shape[0]) - 0.5) * 2
|
|
return features * scale[:, None]
|
|
|
|
raise ValueError(f"Unknown audio noise type: {noise_type}")
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════
|
|
# VISUAL NOISE (operates on feature vectors, not pixels)
|
|
# ═══════════════════════════════════════════════════════
|
|
|
|
def add_visual_noise(features: np.ndarray, cfg: Dict) -> np.ndarray:
|
|
"""Apply noise to visual feature matrix (N, feat_dim)."""
|
|
noise_type = cfg.get("type", "gaussian")
|
|
intensity = float(cfg.get("intensity", 0.1))
|
|
|
|
if noise_type == "gaussian":
|
|
noise = RNG.standard_normal(features.shape).astype(np.float32)
|
|
return features + intensity * noise
|
|
|
|
if noise_type == "dropout":
|
|
mask = (RNG.random(features.shape) > intensity).astype(np.float32)
|
|
return features * mask
|
|
|
|
if noise_type == "occlusion":
|
|
# zero out a contiguous block of feature dims
|
|
features = features.copy()
|
|
start = RNG.integers(0, max(1, features.shape[1] - 1))
|
|
length = max(1, int(features.shape[1] * intensity))
|
|
features[:, start:start + length] = 0.0
|
|
return features
|
|
|
|
if noise_type == "missing_modality":
|
|
# simulate completely missing video frames
|
|
features = features.copy()
|
|
n_missing = max(1, int(len(features) * intensity))
|
|
idx = RNG.choice(len(features), n_missing, replace=False)
|
|
features[idx, :] = 0.0
|
|
return features
|
|
|
|
raise ValueError(f"Unknown visual noise type: {noise_type}")
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════
|
|
# COMBINED MULTIMODAL NOISE
|
|
# ═══════════════════════════════════════════════════════
|
|
|
|
MODALITY_SPECS = [
|
|
("text", ("text",), add_text_noise),
|
|
("audio", ("audio",), add_audio_noise),
|
|
# Dataset files use *_vision.npy. Older configs used "visual", so keep it
|
|
# as an input alias but always write the canonical "vision" filename.
|
|
("vision", ("vision", "visual"), add_visual_noise),
|
|
]
|
|
|
|
|
|
def _get_modality_cfg(noise_cfg: Dict, aliases: tuple) -> Dict:
|
|
for name in aliases:
|
|
if name in noise_cfg:
|
|
return noise_cfg[name]
|
|
return noise_cfg.get("default", {})
|
|
|
|
|
|
def apply_noise_config(data_dir: Path, out_dir: Path, noise_cfg: Dict,
|
|
splits: list = None):
|
|
"""Apply noise config to all splits and modalities found in data_dir."""
|
|
if splits is None:
|
|
splits = ["train", "val", "test"]
|
|
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
for split in splits:
|
|
for modality, aliases, fn in MODALITY_SPECS:
|
|
src = data_dir / f"{split}_{modality}.npy"
|
|
if not src.exists():
|
|
continue
|
|
|
|
features = np.load(str(src))
|
|
mod_cfg = _get_modality_cfg(noise_cfg, aliases)
|
|
|
|
if mod_cfg:
|
|
noisy = fn(features.astype(np.float32), mod_cfg)
|
|
else:
|
|
noisy = features.astype(np.float32).copy()
|
|
dst = out_dir / f"{split}_{modality}.npy"
|
|
np.save(str(dst), noisy)
|
|
print(f" {split}/{modality}: {features.shape} → {dst.name}")
|
|
|
|
# copy labels unchanged
|
|
label_src = data_dir / f"{split}_labels.npy"
|
|
if label_src.exists():
|
|
import shutil
|
|
shutil.copy2(str(label_src), str(out_dir / f"{split}_labels.npy"))
|
|
|
|
# copy metadata
|
|
for meta_file in ["label_map.json", "meta.json"]:
|
|
src = data_dir / meta_file
|
|
if src.exists():
|
|
import shutil
|
|
shutil.copy2(str(src), str(out_dir / meta_file))
|
|
|
|
|
|
def generate_noise_variants(data_dir: str, out_base: str, config: Dict):
|
|
"""Generate multiple noise variants as defined in config."""
|
|
data_dir = Path(data_dir)
|
|
out_base = Path(out_base)
|
|
|
|
variants = config.get("variants", [])
|
|
if not variants:
|
|
# single-variant mode: apply config directly
|
|
apply_noise_config(data_dir, out_base, config.get("noise", {}))
|
|
return
|
|
|
|
for variant in variants:
|
|
name = variant["name"]
|
|
noise_cfg = variant["noise"]
|
|
out_dir = out_base / name
|
|
print(f"\n[Variant: {name}]")
|
|
apply_noise_config(data_dir, out_dir, noise_cfg)
|
|
with open(out_dir / "noise_config.json", "w") as f:
|
|
json.dump(variant, f, indent=2)
|
|
|
|
print(f"\nAll variants saved under {out_base}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--config", required=True,
|
|
help="Path to noise_configs.yaml")
|
|
parser.add_argument("--data_dir", required=True,
|
|
help="Dir with {split}_{modality}.npy files")
|
|
parser.add_argument("--out_dir", default=None,
|
|
help="Output base dir (default: data_dir + '_noisy')")
|
|
args = parser.parse_args()
|
|
|
|
with open(args.config, encoding="utf-8") as f:
|
|
config = yaml.safe_load(f)
|
|
|
|
zsy = os.environ.get("ZSY", "/root")
|
|
out_dir = args.out_dir or args.data_dir.rstrip("/") + "_noisy"
|
|
generate_noise_variants(args.data_dir, out_dir, config)
|