chore: initial commit — unified project repo

Merged code repo (CompanionGuard-RL) into single project-level git. Reorganized root: docs/, reference/, experiments/, tmp/active|archives/. Gitignored: data/, checkpoints/, .venv, experiment logs, tmp/archives. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-14 11:28:42 +08:00
commit bd1f51c496
85 changed files with 20568 additions and 0 deletions
--- a/旧方向信息/scripts/upload_phase1.py
+++ b/旧方向信息/scripts/upload_phase1.py
@@ -0,0 +1,266 @@
+"""Upload all Phase 1 implementation files to the server."""
+import paramiko, warnings
+warnings.filterwarnings("ignore")
+
+client = paramiko.SSHClient()
+client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+client.connect('10.82.3.180', port=20083, username='root', password='m2dGcwyrhI', timeout=30)
+sftp = client.open_sftp()
+
+ZSY  = '/root/siton-data-2849d4ce327c4ccfb233ce33868fe7fe/zsy'
+PROJ = ZSY + '/multimodal_affect'
+
+files = {}
+
+# ─── src/data/dataset.py ──────────────────────────────────────────────────
+files['src/data/dataset.py'] = (
+'import os\n'
+'import numpy as np\n'
+'import torch\n'
+'from torch.utils.data import Dataset, DataLoader\n'
+'from torch.utils.data.distributed import DistributedSampler\n'
+'\n'
+'NOISE_VARIANTS = [\n'
+'    "gaussian_light", "gaussian_heavy", "missing_audio",\n'
+'    "missing_visual", "text_word_drop_30", "audio_masking_50",\n'
+'    "realistic_mixed", "audio_time_mask",\n'
+']\n'
+'\n'
+'\n'
+'class MultimodalDataset(Dataset):\n'
+'    def __init__(self, data_dir, split, load_noisy=False, noise_root=None):\n'
+'        self.split  = split\n'
+'        self.text   = np.load(f"{data_dir}/{split}_text.npy").astype(np.float32)\n'
+'        self.audio  = np.load(f"{data_dir}/{split}_audio.npy").astype(np.float32)\n'
+'        self.vision = np.load(f"{data_dir}/{split}_vision.npy").astype(np.float32)\n'
+'        self.labels = np.load(f"{data_dir}/{split}_labels.npy").astype(np.int64)\n'
+'\n'
+'        self.noisy_variants = {}\n'
+'        if load_noisy and noise_root:\n'
+'            for v in NOISE_VARIANTS:\n'
+'                vd = os.path.join(noise_root, v)\n'
+'                tf = os.path.join(vd, f"{split}_text.npy")\n'
+'                af = os.path.join(vd, f"{split}_audio.npy")\n'
+'                vf = os.path.join(vd, f"{split}_vision.npy")\n'
+'                if os.path.exists(tf) or os.path.exists(af) or os.path.exists(vf):\n'
+'                    self.noisy_variants[v] = {\n'
+'                        "text": np.load(tf).astype(np.float32) if os.path.exists(tf) else self.text,\n'
+'                        "audio": np.load(af).astype(np.float32) if os.path.exists(af) else self.audio,\n'
+'                        "vision": np.load(vf).astype(np.float32) if os.path.exists(vf) else self.vision,\n'
+'                    }\n'
+'        self.variant_names = sorted(self.noisy_variants.keys())\n'
+'\n'
+'    def __len__(self):\n'
+'        return len(self.labels)\n'
+'\n'
+'    def __getitem__(self, idx):\n'
+'        return {\n'
+'            "text":   torch.from_numpy(self.text[idx].copy()),\n'
+'            "audio":  torch.from_numpy(self.audio[idx].copy()),\n'
+'            "vision": torch.from_numpy(self.vision[idx].copy()),\n'
+'            "labels": torch.tensor(int(self.labels[idx])),\n'
+'        }\n'
+'\n'
+'\n'
+'def get_dataloader(ds, batch_size, shuffle=True, distributed=False,\n'
+'                   num_workers=4, drop_last=True):\n'
+'    sampler = DistributedSampler(ds, shuffle=shuffle) if distributed else None\n'
+'    return DataLoader(\n'
+'        ds, batch_size=batch_size,\n'
+'        shuffle=(shuffle and sampler is None),\n'
+'        sampler=sampler, num_workers=num_workers,\n'
+'        pin_memory=True, drop_last=drop_last,\n'
+'    )\n'
+)
+
+# ─── src/models/encoders.py ───────────────────────────────────────────────
+files['src/models/encoders.py'] = (
+'import torch\n'
+'import torch.nn as nn\n'
+'\n'
+'\n'
+'class ModalityProjector(nn.Module):\n'
+'    # Project low-dim pre-extracted features to shared proj_dim space\n'
+'    def __init__(self, in_dim: int, proj_dim: int = 1024):\n'
+'        super().__init__()\n'
+'        mid = max(in_dim * 4, 256)\n'
+'        self.net = nn.Sequential(\n'
+'            nn.Linear(in_dim, mid),\n'
+'            nn.LayerNorm(mid),\n'
+'            nn.GELU(),\n'
+'            nn.Dropout(0.1),\n'
+'            nn.Linear(mid, proj_dim),\n'
+'            nn.LayerNorm(proj_dim),\n'
+'        )\n'
+'\n'
+'    def forward(self, x: torch.Tensor) -> torch.Tensor:\n'
+'        return self.net(x)\n'
+'\n'
+'\n'
+'class ConfidenceEstimator(nn.Module):\n'
+'    # Lightweight MLP: proj_dim -> noise-quality confidence scalar in (0, 1)\n'
+'    def __init__(self, proj_dim: int = 1024, hidden: int = 256):\n'
+'        super().__init__()\n'
+'        self.net = nn.Sequential(\n'
+'            nn.Linear(proj_dim, hidden),\n'
+'            nn.ReLU(),\n'
+'            nn.Dropout(0.1),\n'
+'            nn.Linear(hidden, 1),\n'
+'            nn.Sigmoid(),\n'
+'        )\n'
+'\n'
+'    def forward(self, x: torch.Tensor) -> torch.Tensor:\n'
+'        return self.net(x).squeeze(-1)\n'
+'\n'
+'\n'
+'class MultimodalEncoder(nn.Module):\n'
+'    # Three-branch projector + three per-modality confidence estimators\n'
+'    def __init__(self,\n'
+'                 text_dim: int = 300,\n'
+'                 audio_dim: int = 74,\n'
+'                 vision_dim: int = 35,\n'
+'                 proj_dim: int = 1024):\n'
+'        super().__init__()\n'
+'        self.text_proj   = ModalityProjector(text_dim,   proj_dim)\n'
+'        self.audio_proj  = ModalityProjector(audio_dim,  proj_dim)\n'
+'        self.vision_proj = ModalityProjector(vision_dim, proj_dim)\n'
+'        self.text_conf   = ConfidenceEstimator(proj_dim)\n'
+'        self.audio_conf  = ConfidenceEstimator(proj_dim)\n'
+'        self.vision_conf = ConfidenceEstimator(proj_dim)\n'
+'\n'
+'    def forward(self, text, audio, vision):\n'
+'        tf = self.text_proj(text)\n'
+'        af = self.audio_proj(audio)\n'
+'        vf = self.vision_proj(vision)\n'
+'        confs = torch.stack([\n'
+'            self.text_conf(tf),\n'
+'            self.audio_conf(af),\n'
+'            self.vision_conf(vf),\n'
+'        ], dim=1)              # (B, 3)\n'
+'        return tf, af, vf, confs\n'
+)
+
+# ─── src/models/classifier.py ─────────────────────────────────────────────
+files['src/models/classifier.py'] = (
+'import torch.nn as nn\n'
+'\n'
+'\n'
+'class EmotionClassifier(nn.Module):\n'
+'    def __init__(self, in_dim: int = 1024, num_classes: int = 4,\n'
+'                 hidden: int = 512, dropout: float = 0.3):\n'
+'        super().__init__()\n'
+'        self.net = nn.Sequential(\n'
+'            nn.Linear(in_dim, hidden),\n'
+'            nn.LayerNorm(hidden),\n'
+'            nn.GELU(),\n'
+'            nn.Dropout(dropout),\n'
+'            nn.Linear(hidden, hidden // 2),\n'
+'            nn.GELU(),\n'
+'            nn.Dropout(dropout),\n'
+'            nn.Linear(hidden // 2, num_classes),\n'
+'        )\n'
+'\n'
+'    def forward(self, x):\n'
+'        return self.net(x)\n'
+)
+
+# ─── src/rl/fusion_agent.py ───────────────────────────────────────────────
+files['src/rl/fusion_agent.py'] = (
+'import torch\n'
+'import torch.nn as nn\n'
+'import torch.nn.functional as F\n'
+'from torch.distributions import Dirichlet\n'
+'\n'
+'\n'
+'class ModalFusionAgent(nn.Module):\n'
+'    # PPO Actor-Critic for RL-adaptive modality fusion\n'
+'    # State  s = [conf_text, conf_audio, conf_visual, noise_est]  (R^4)\n'
+'    # Action a = fusion weights from Dirichlet distribution  (simplex R^3)\n'
+'\n'
+'    def __init__(self, state_dim: int = 4, hidden: int = 128):\n'
+'        super().__init__()\n'
+'        self.actor = nn.Sequential(\n'
+'            nn.Linear(state_dim, hidden), nn.Tanh(),\n'
+'            nn.Linear(hidden, hidden),   nn.Tanh(),\n'
+'            nn.Linear(hidden, 3),\n'
+'        )\n'
+'        self.critic = nn.Sequential(\n'
+'            nn.Linear(state_dim, hidden), nn.Tanh(),\n'
+'            nn.Linear(hidden, hidden),   nn.Tanh(),\n'
+'            nn.Linear(hidden, 1),\n'
+'        )\n'
+'\n'
+'    def _concentration(self, state: torch.Tensor) -> torch.Tensor:\n'
+'        return F.softplus(self.actor(state)) + 1e-3\n'
+'\n'
+'    def get_action_and_value(self, state: torch.Tensor):\n'
+'        conc    = self._concentration(state)\n'
+'        dist    = Dirichlet(conc)\n'
+'        weights = dist.rsample()\n'
+'        log_p   = dist.log_prob(weights)\n'
+'        value   = self.critic(state)\n'
+'        entropy = dist.entropy()\n'
+'        return weights, log_p, value, entropy\n'
+'\n'
+'    def evaluate(self, state: torch.Tensor, weights: torch.Tensor):\n'
+'        # Recompute log-prob and value for stored actions (PPO update)\n'
+'        conc    = self._concentration(state)\n'
+'        dist    = Dirichlet(conc)\n'
+'        log_p   = dist.log_prob(weights.clamp(1e-6, 1 - 1e-6))\n'
+'        value   = self.critic(state)\n'
+'        entropy = dist.entropy()\n'
+'        return log_p, value, entropy\n'
+)
+
+# ─── src/rl/reward.py ─────────────────────────────────────────────────────
+files['src/rl/reward.py'] = (
+'import torch\n'
+'import torch.nn.functional as F\n'
+'from sklearn.metrics import f1_score\n'
+'\n'
+'\n'
+'def compute_reward(logits, labels, confs, weights, prev_weights,\n'
+'                   alpha: float = 1.0,\n'
+'                   beta:  float = 0.3,\n'
+'                   gamma: float = 0.1):\n'
+'    # Per-sample reward: R = alpha*(-CE) + beta*Consistency - gamma*Instability\n'
+'    neg_ce = -F.cross_entropy(logits, labels, reduction="none")\n'
+'\n'
+'    w_norm      = F.normalize(weights, p=1, dim=-1)\n'
+'    c_norm      = F.normalize(confs,   p=1, dim=-1)\n'
+'    consistency = (w_norm * c_norm).sum(dim=-1)\n'
+'\n'
+'    if prev_weights is not None:\n'
+'        delta       = weights - prev_weights.unsqueeze(0).expand_as(weights)\n'
+'        instability = torch.norm(delta, p=2, dim=-1)\n'
+'    else:\n'
+'        instability = torch.zeros_like(neg_ce)\n'
+'\n'
+'    reward = alpha * neg_ce + beta * consistency - gamma * instability\n'
+'\n'
+'    with torch.no_grad():\n'
+'        wf1 = float(f1_score(\n'
+'            labels.cpu().numpy(),\n'
+'            logits.argmax(-1).cpu().numpy(),\n'
+'            average="weighted", zero_division=0,\n'
+'        ))\n'
+'\n'
+'    info = {\n'
+'        "wf1":         wf1,\n'
+'        "consistency": consistency.mean().item(),\n'
+'        "instability": instability.mean().item(),\n'
+'        "neg_ce":      neg_ce.mean().item(),\n'
+'    }\n'
+'    return reward, info\n'
+)
+
+# Upload
+for rel_path, content in files.items():
+    remote_path = f"{PROJ}/{rel_path}"
+    with sftp.open(remote_path, 'w') as f:
+        f.write(content)
+    print(f"  uploaded: {rel_path}")
+
+sftp.close()
+client.close()
+print("\nAll src files uploaded.")