chore: initial commit — unified project repo

Merged code repo (CompanionGuard-RL) into single project-level git.
Reorganized root: docs/, reference/, experiments/, tmp/active|archives/.
Gitignored: data/, checkpoints/, .venv, experiment logs, tmp/archives.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-14 11:28:42 +08:00
commit bd1f51c496
85 changed files with 20568 additions and 0 deletions

View File

@@ -0,0 +1,266 @@
"""Upload all Phase 1 implementation files to the server."""
import paramiko, warnings
warnings.filterwarnings("ignore")
client = paramiko.SSHClient()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
client.connect('10.82.3.180', port=20083, username='root', password='m2dGcwyrhI', timeout=30)
sftp = client.open_sftp()
ZSY = '/root/siton-data-2849d4ce327c4ccfb233ce33868fe7fe/zsy'
PROJ = ZSY + '/multimodal_affect'
files = {}
# ─── src/data/dataset.py ──────────────────────────────────────────────────
files['src/data/dataset.py'] = (
'import os\n'
'import numpy as np\n'
'import torch\n'
'from torch.utils.data import Dataset, DataLoader\n'
'from torch.utils.data.distributed import DistributedSampler\n'
'\n'
'NOISE_VARIANTS = [\n'
' "gaussian_light", "gaussian_heavy", "missing_audio",\n'
' "missing_visual", "text_word_drop_30", "audio_masking_50",\n'
' "realistic_mixed", "audio_time_mask",\n'
']\n'
'\n'
'\n'
'class MultimodalDataset(Dataset):\n'
' def __init__(self, data_dir, split, load_noisy=False, noise_root=None):\n'
' self.split = split\n'
' self.text = np.load(f"{data_dir}/{split}_text.npy").astype(np.float32)\n'
' self.audio = np.load(f"{data_dir}/{split}_audio.npy").astype(np.float32)\n'
' self.vision = np.load(f"{data_dir}/{split}_vision.npy").astype(np.float32)\n'
' self.labels = np.load(f"{data_dir}/{split}_labels.npy").astype(np.int64)\n'
'\n'
' self.noisy_variants = {}\n'
' if load_noisy and noise_root:\n'
' for v in NOISE_VARIANTS:\n'
' vd = os.path.join(noise_root, v)\n'
' tf = os.path.join(vd, f"{split}_text.npy")\n'
' af = os.path.join(vd, f"{split}_audio.npy")\n'
' vf = os.path.join(vd, f"{split}_vision.npy")\n'
' if os.path.exists(tf) or os.path.exists(af) or os.path.exists(vf):\n'
' self.noisy_variants[v] = {\n'
' "text": np.load(tf).astype(np.float32) if os.path.exists(tf) else self.text,\n'
' "audio": np.load(af).astype(np.float32) if os.path.exists(af) else self.audio,\n'
' "vision": np.load(vf).astype(np.float32) if os.path.exists(vf) else self.vision,\n'
' }\n'
' self.variant_names = sorted(self.noisy_variants.keys())\n'
'\n'
' def __len__(self):\n'
' return len(self.labels)\n'
'\n'
' def __getitem__(self, idx):\n'
' return {\n'
' "text": torch.from_numpy(self.text[idx].copy()),\n'
' "audio": torch.from_numpy(self.audio[idx].copy()),\n'
' "vision": torch.from_numpy(self.vision[idx].copy()),\n'
' "labels": torch.tensor(int(self.labels[idx])),\n'
' }\n'
'\n'
'\n'
'def get_dataloader(ds, batch_size, shuffle=True, distributed=False,\n'
' num_workers=4, drop_last=True):\n'
' sampler = DistributedSampler(ds, shuffle=shuffle) if distributed else None\n'
' return DataLoader(\n'
' ds, batch_size=batch_size,\n'
' shuffle=(shuffle and sampler is None),\n'
' sampler=sampler, num_workers=num_workers,\n'
' pin_memory=True, drop_last=drop_last,\n'
' )\n'
)
# ─── src/models/encoders.py ───────────────────────────────────────────────
files['src/models/encoders.py'] = (
'import torch\n'
'import torch.nn as nn\n'
'\n'
'\n'
'class ModalityProjector(nn.Module):\n'
' # Project low-dim pre-extracted features to shared proj_dim space\n'
' def __init__(self, in_dim: int, proj_dim: int = 1024):\n'
' super().__init__()\n'
' mid = max(in_dim * 4, 256)\n'
' self.net = nn.Sequential(\n'
' nn.Linear(in_dim, mid),\n'
' nn.LayerNorm(mid),\n'
' nn.GELU(),\n'
' nn.Dropout(0.1),\n'
' nn.Linear(mid, proj_dim),\n'
' nn.LayerNorm(proj_dim),\n'
' )\n'
'\n'
' def forward(self, x: torch.Tensor) -> torch.Tensor:\n'
' return self.net(x)\n'
'\n'
'\n'
'class ConfidenceEstimator(nn.Module):\n'
' # Lightweight MLP: proj_dim -> noise-quality confidence scalar in (0, 1)\n'
' def __init__(self, proj_dim: int = 1024, hidden: int = 256):\n'
' super().__init__()\n'
' self.net = nn.Sequential(\n'
' nn.Linear(proj_dim, hidden),\n'
' nn.ReLU(),\n'
' nn.Dropout(0.1),\n'
' nn.Linear(hidden, 1),\n'
' nn.Sigmoid(),\n'
' )\n'
'\n'
' def forward(self, x: torch.Tensor) -> torch.Tensor:\n'
' return self.net(x).squeeze(-1)\n'
'\n'
'\n'
'class MultimodalEncoder(nn.Module):\n'
' # Three-branch projector + three per-modality confidence estimators\n'
' def __init__(self,\n'
' text_dim: int = 300,\n'
' audio_dim: int = 74,\n'
' vision_dim: int = 35,\n'
' proj_dim: int = 1024):\n'
' super().__init__()\n'
' self.text_proj = ModalityProjector(text_dim, proj_dim)\n'
' self.audio_proj = ModalityProjector(audio_dim, proj_dim)\n'
' self.vision_proj = ModalityProjector(vision_dim, proj_dim)\n'
' self.text_conf = ConfidenceEstimator(proj_dim)\n'
' self.audio_conf = ConfidenceEstimator(proj_dim)\n'
' self.vision_conf = ConfidenceEstimator(proj_dim)\n'
'\n'
' def forward(self, text, audio, vision):\n'
' tf = self.text_proj(text)\n'
' af = self.audio_proj(audio)\n'
' vf = self.vision_proj(vision)\n'
' confs = torch.stack([\n'
' self.text_conf(tf),\n'
' self.audio_conf(af),\n'
' self.vision_conf(vf),\n'
' ], dim=1) # (B, 3)\n'
' return tf, af, vf, confs\n'
)
# ─── src/models/classifier.py ─────────────────────────────────────────────
files['src/models/classifier.py'] = (
'import torch.nn as nn\n'
'\n'
'\n'
'class EmotionClassifier(nn.Module):\n'
' def __init__(self, in_dim: int = 1024, num_classes: int = 4,\n'
' hidden: int = 512, dropout: float = 0.3):\n'
' super().__init__()\n'
' self.net = nn.Sequential(\n'
' nn.Linear(in_dim, hidden),\n'
' nn.LayerNorm(hidden),\n'
' nn.GELU(),\n'
' nn.Dropout(dropout),\n'
' nn.Linear(hidden, hidden // 2),\n'
' nn.GELU(),\n'
' nn.Dropout(dropout),\n'
' nn.Linear(hidden // 2, num_classes),\n'
' )\n'
'\n'
' def forward(self, x):\n'
' return self.net(x)\n'
)
# ─── src/rl/fusion_agent.py ───────────────────────────────────────────────
files['src/rl/fusion_agent.py'] = (
'import torch\n'
'import torch.nn as nn\n'
'import torch.nn.functional as F\n'
'from torch.distributions import Dirichlet\n'
'\n'
'\n'
'class ModalFusionAgent(nn.Module):\n'
' # PPO Actor-Critic for RL-adaptive modality fusion\n'
' # State s = [conf_text, conf_audio, conf_visual, noise_est] (R^4)\n'
' # Action a = fusion weights from Dirichlet distribution (simplex R^3)\n'
'\n'
' def __init__(self, state_dim: int = 4, hidden: int = 128):\n'
' super().__init__()\n'
' self.actor = nn.Sequential(\n'
' nn.Linear(state_dim, hidden), nn.Tanh(),\n'
' nn.Linear(hidden, hidden), nn.Tanh(),\n'
' nn.Linear(hidden, 3),\n'
' )\n'
' self.critic = nn.Sequential(\n'
' nn.Linear(state_dim, hidden), nn.Tanh(),\n'
' nn.Linear(hidden, hidden), nn.Tanh(),\n'
' nn.Linear(hidden, 1),\n'
' )\n'
'\n'
' def _concentration(self, state: torch.Tensor) -> torch.Tensor:\n'
' return F.softplus(self.actor(state)) + 1e-3\n'
'\n'
' def get_action_and_value(self, state: torch.Tensor):\n'
' conc = self._concentration(state)\n'
' dist = Dirichlet(conc)\n'
' weights = dist.rsample()\n'
' log_p = dist.log_prob(weights)\n'
' value = self.critic(state)\n'
' entropy = dist.entropy()\n'
' return weights, log_p, value, entropy\n'
'\n'
' def evaluate(self, state: torch.Tensor, weights: torch.Tensor):\n'
' # Recompute log-prob and value for stored actions (PPO update)\n'
' conc = self._concentration(state)\n'
' dist = Dirichlet(conc)\n'
' log_p = dist.log_prob(weights.clamp(1e-6, 1 - 1e-6))\n'
' value = self.critic(state)\n'
' entropy = dist.entropy()\n'
' return log_p, value, entropy\n'
)
# ─── src/rl/reward.py ─────────────────────────────────────────────────────
files['src/rl/reward.py'] = (
'import torch\n'
'import torch.nn.functional as F\n'
'from sklearn.metrics import f1_score\n'
'\n'
'\n'
'def compute_reward(logits, labels, confs, weights, prev_weights,\n'
' alpha: float = 1.0,\n'
' beta: float = 0.3,\n'
' gamma: float = 0.1):\n'
' # Per-sample reward: R = alpha*(-CE) + beta*Consistency - gamma*Instability\n'
' neg_ce = -F.cross_entropy(logits, labels, reduction="none")\n'
'\n'
' w_norm = F.normalize(weights, p=1, dim=-1)\n'
' c_norm = F.normalize(confs, p=1, dim=-1)\n'
' consistency = (w_norm * c_norm).sum(dim=-1)\n'
'\n'
' if prev_weights is not None:\n'
' delta = weights - prev_weights.unsqueeze(0).expand_as(weights)\n'
' instability = torch.norm(delta, p=2, dim=-1)\n'
' else:\n'
' instability = torch.zeros_like(neg_ce)\n'
'\n'
' reward = alpha * neg_ce + beta * consistency - gamma * instability\n'
'\n'
' with torch.no_grad():\n'
' wf1 = float(f1_score(\n'
' labels.cpu().numpy(),\n'
' logits.argmax(-1).cpu().numpy(),\n'
' average="weighted", zero_division=0,\n'
' ))\n'
'\n'
' info = {\n'
' "wf1": wf1,\n'
' "consistency": consistency.mean().item(),\n'
' "instability": instability.mean().item(),\n'
' "neg_ce": neg_ce.mean().item(),\n'
' }\n'
' return reward, info\n'
)
# Upload
for rel_path, content in files.items():
remote_path = f"{PROJ}/{rel_path}"
with sftp.open(remote_path, 'w') as f:
f.write(content)
print(f" uploaded: {rel_path}")
sftp.close()
client.close()
print("\nAll src files uploaded.")