chore: initial commit — unified project repo
Merged code repo (CompanionGuard-RL) into single project-level git. Reorganized root: docs/, reference/, experiments/, tmp/active|archives/. Gitignored: data/, checkpoints/, .venv, experiment logs, tmp/archives. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
242
旧方向信息/scripts/preprocess/generate_noise.py
Normal file
242
旧方向信息/scripts/preprocess/generate_noise.py
Normal file
@@ -0,0 +1,242 @@
|
||||
"""
|
||||
P0-4: Multimodal noise generation for robustness experiments.
|
||||
|
||||
Supports three modalities: text, audio, visual.
|
||||
Each modality has configurable noise types and intensity levels.
|
||||
|
||||
Usage:
|
||||
python generate_noise.py --config configs/noise_configs.yaml \
|
||||
--data_dir $ZSY/multimodal_affect/data/iemocap \
|
||||
--out_dir $ZSY/multimodal_affect/data/iemocap_noisy
|
||||
|
||||
Config schema → see configs/noise_configs.yaml
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import argparse
|
||||
import yaml
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
RNG = np.random.default_rng(42)
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════
|
||||
# TEXT NOISE
|
||||
# ═══════════════════════════════════════════════════════
|
||||
|
||||
def _word_drop(ids: np.ndarray, drop_rate: float) -> np.ndarray:
|
||||
"""Randomly zero-out token ids (simulates word deletion)."""
|
||||
mask = RNG.random(ids.shape) < drop_rate
|
||||
return np.where(mask, 0, ids)
|
||||
|
||||
|
||||
def _word_swap(ids: np.ndarray, swap_rate: float) -> np.ndarray:
|
||||
"""Randomly shuffle adjacent tokens."""
|
||||
ids = ids.copy()
|
||||
n = len(ids)
|
||||
for i in range(n - 1):
|
||||
if RNG.random() < swap_rate:
|
||||
ids[i], ids[i + 1] = ids[i + 1], ids[i]
|
||||
return ids
|
||||
|
||||
|
||||
def _random_replace(ids: np.ndarray, replace_rate: float, vocab_size: int = 30522) -> np.ndarray:
|
||||
"""Replace tokens with random vocab ids."""
|
||||
ids = ids.copy()
|
||||
mask = RNG.random(ids.shape) < replace_rate
|
||||
rand_ids = RNG.integers(1, vocab_size, size=ids.shape)
|
||||
return np.where(mask & (ids != 0), rand_ids, ids)
|
||||
|
||||
|
||||
def add_text_noise(features: np.ndarray, cfg: Dict) -> np.ndarray:
|
||||
"""Apply text noise to an array of token-id features (N, seq_len)."""
|
||||
noise_type = cfg.get("type", "word_drop")
|
||||
intensity = float(cfg.get("intensity", 0.1))
|
||||
|
||||
if noise_type == "word_drop":
|
||||
return np.stack([_word_drop(row, intensity) for row in features])
|
||||
if noise_type == "word_swap":
|
||||
return np.stack([_word_swap(row, intensity) for row in features])
|
||||
if noise_type == "random_replace":
|
||||
return np.stack([_random_replace(row, intensity) for row in features])
|
||||
if noise_type == "gaussian":
|
||||
# for embedding features (N, dim) not token ids
|
||||
noise = RNG.standard_normal(features.shape).astype(np.float32)
|
||||
return features + intensity * noise
|
||||
raise ValueError(f"Unknown text noise type: {noise_type}")
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════
|
||||
# AUDIO NOISE
|
||||
# ═══════════════════════════════════════════════════════
|
||||
|
||||
def add_audio_noise(features: np.ndarray, cfg: Dict) -> np.ndarray:
|
||||
"""Apply noise to audio feature matrix (N, n_mfcc)."""
|
||||
noise_type = cfg.get("type", "gaussian")
|
||||
intensity = float(cfg.get("intensity", 0.05))
|
||||
|
||||
if noise_type == "gaussian":
|
||||
noise = RNG.standard_normal(features.shape).astype(np.float32)
|
||||
return features + intensity * noise * features.std(axis=0, keepdims=True)
|
||||
|
||||
if noise_type == "masking":
|
||||
# mask entire feature dimensions (simulates missing mic)
|
||||
features = features.copy()
|
||||
n_mask = max(1, int(features.shape[1] * intensity))
|
||||
dims = RNG.choice(features.shape[1], n_mask, replace=False)
|
||||
features[:, dims] = 0.0
|
||||
return features
|
||||
|
||||
if noise_type == "time_mask":
|
||||
# mask random samples (simulates packet loss for temporal features)
|
||||
features = features.copy()
|
||||
n_mask = max(1, int(features.shape[0] * intensity))
|
||||
rows = RNG.choice(features.shape[0], n_mask, replace=False)
|
||||
features[rows, :] = 0.0
|
||||
return features
|
||||
|
||||
if noise_type == "scale":
|
||||
# random amplitude scaling
|
||||
scale = 1.0 + intensity * (RNG.random(features.shape[0]) - 0.5) * 2
|
||||
return features * scale[:, None]
|
||||
|
||||
raise ValueError(f"Unknown audio noise type: {noise_type}")
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════
|
||||
# VISUAL NOISE (operates on feature vectors, not pixels)
|
||||
# ═══════════════════════════════════════════════════════
|
||||
|
||||
def add_visual_noise(features: np.ndarray, cfg: Dict) -> np.ndarray:
|
||||
"""Apply noise to visual feature matrix (N, feat_dim)."""
|
||||
noise_type = cfg.get("type", "gaussian")
|
||||
intensity = float(cfg.get("intensity", 0.1))
|
||||
|
||||
if noise_type == "gaussian":
|
||||
noise = RNG.standard_normal(features.shape).astype(np.float32)
|
||||
return features + intensity * noise
|
||||
|
||||
if noise_type == "dropout":
|
||||
mask = (RNG.random(features.shape) > intensity).astype(np.float32)
|
||||
return features * mask
|
||||
|
||||
if noise_type == "occlusion":
|
||||
# zero out a contiguous block of feature dims
|
||||
features = features.copy()
|
||||
start = RNG.integers(0, max(1, features.shape[1] - 1))
|
||||
length = max(1, int(features.shape[1] * intensity))
|
||||
features[:, start:start + length] = 0.0
|
||||
return features
|
||||
|
||||
if noise_type == "missing_modality":
|
||||
# simulate completely missing video frames
|
||||
features = features.copy()
|
||||
n_missing = max(1, int(len(features) * intensity))
|
||||
idx = RNG.choice(len(features), n_missing, replace=False)
|
||||
features[idx, :] = 0.0
|
||||
return features
|
||||
|
||||
raise ValueError(f"Unknown visual noise type: {noise_type}")
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════
|
||||
# COMBINED MULTIMODAL NOISE
|
||||
# ═══════════════════════════════════════════════════════
|
||||
|
||||
MODALITY_SPECS = [
|
||||
("text", ("text",), add_text_noise),
|
||||
("audio", ("audio",), add_audio_noise),
|
||||
# Dataset files use *_vision.npy. Older configs used "visual", so keep it
|
||||
# as an input alias but always write the canonical "vision" filename.
|
||||
("vision", ("vision", "visual"), add_visual_noise),
|
||||
]
|
||||
|
||||
|
||||
def _get_modality_cfg(noise_cfg: Dict, aliases: tuple) -> Dict:
|
||||
for name in aliases:
|
||||
if name in noise_cfg:
|
||||
return noise_cfg[name]
|
||||
return noise_cfg.get("default", {})
|
||||
|
||||
|
||||
def apply_noise_config(data_dir: Path, out_dir: Path, noise_cfg: Dict,
|
||||
splits: list = None):
|
||||
"""Apply noise config to all splits and modalities found in data_dir."""
|
||||
if splits is None:
|
||||
splits = ["train", "val", "test"]
|
||||
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for split in splits:
|
||||
for modality, aliases, fn in MODALITY_SPECS:
|
||||
src = data_dir / f"{split}_{modality}.npy"
|
||||
if not src.exists():
|
||||
continue
|
||||
|
||||
features = np.load(str(src))
|
||||
mod_cfg = _get_modality_cfg(noise_cfg, aliases)
|
||||
|
||||
if mod_cfg:
|
||||
noisy = fn(features.astype(np.float32), mod_cfg)
|
||||
else:
|
||||
noisy = features.astype(np.float32).copy()
|
||||
dst = out_dir / f"{split}_{modality}.npy"
|
||||
np.save(str(dst), noisy)
|
||||
print(f" {split}/{modality}: {features.shape} → {dst.name}")
|
||||
|
||||
# copy labels unchanged
|
||||
label_src = data_dir / f"{split}_labels.npy"
|
||||
if label_src.exists():
|
||||
import shutil
|
||||
shutil.copy2(str(label_src), str(out_dir / f"{split}_labels.npy"))
|
||||
|
||||
# copy metadata
|
||||
for meta_file in ["label_map.json", "meta.json"]:
|
||||
src = data_dir / meta_file
|
||||
if src.exists():
|
||||
import shutil
|
||||
shutil.copy2(str(src), str(out_dir / meta_file))
|
||||
|
||||
|
||||
def generate_noise_variants(data_dir: str, out_base: str, config: Dict):
|
||||
"""Generate multiple noise variants as defined in config."""
|
||||
data_dir = Path(data_dir)
|
||||
out_base = Path(out_base)
|
||||
|
||||
variants = config.get("variants", [])
|
||||
if not variants:
|
||||
# single-variant mode: apply config directly
|
||||
apply_noise_config(data_dir, out_base, config.get("noise", {}))
|
||||
return
|
||||
|
||||
for variant in variants:
|
||||
name = variant["name"]
|
||||
noise_cfg = variant["noise"]
|
||||
out_dir = out_base / name
|
||||
print(f"\n[Variant: {name}]")
|
||||
apply_noise_config(data_dir, out_dir, noise_cfg)
|
||||
with open(out_dir / "noise_config.json", "w") as f:
|
||||
json.dump(variant, f, indent=2)
|
||||
|
||||
print(f"\nAll variants saved under {out_base}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--config", required=True,
|
||||
help="Path to noise_configs.yaml")
|
||||
parser.add_argument("--data_dir", required=True,
|
||||
help="Dir with {split}_{modality}.npy files")
|
||||
parser.add_argument("--out_dir", default=None,
|
||||
help="Output base dir (default: data_dir + '_noisy')")
|
||||
args = parser.parse_args()
|
||||
|
||||
with open(args.config, encoding="utf-8") as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
zsy = os.environ.get("ZSY", "/root")
|
||||
out_dir = args.out_dir or args.data_dir.rstrip("/") + "_noisy"
|
||||
generate_noise_variants(args.data_dir, out_dir, config)
|
||||
Reference in New Issue
Block a user