Files
CompanionGuard-RL/旧方向信息/scripts/preprocess/server_unpack_and_extract.sh
zhangsiyuan bd1f51c496 chore: initial commit — unified project repo
Merged code repo (CompanionGuard-RL) into single project-level git.
Reorganized root: docs/, reference/, experiments/, tmp/active|archives/.
Gitignored: data/, checkpoints/, .venv, experiment logs, tmp/archives.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-14 11:28:42 +08:00

116 lines
4.5 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# ─────────────────────────────────────────────────────────────────────────────
# server_unpack_and_extract.sh
# 服务器端:解压 + 特征提取一键脚本
# 前提:数据已上传到 $ZSY/multimodal_affect/data/raw/
#
# 目录约定:
# IEMOCAP zip: $ZSY/multimodal_affect/data/raw/IEMOCAP/*.zip
# MELD tar.gz: $ZSY/multimodal_affect/data/raw/MELD/MELD.Raw.tar.gz
# MOSI pkl: $ZSY/multimodal_affect/data/raw/MOSI/aligned_mosi.pkl
# ─────────────────────────────────────────────────────────────────────────────
set -e
source /root/.bashrc_zsy 2>/dev/null || true
ZSY=${ZSY:-/root/siton-data-2849d4ce327c4ccfb233ce33868fe7fe/zsy}
PROJ=$ZSY/multimodal_affect
RAW=$PROJ/data/raw
PY=$ZSY/envs/multimodal_affect/bin/python
echo "=========================================="
echo " Unpack & Extract — $(date)"
echo " PROJ=$PROJ"
echo "=========================================="
# ── IEMOCAP: 解压 zip ────────────────────────────────────────────────────────
IEMOCAP_RAW=$RAW/IEMOCAP
IEMOCAP_DEST=$RAW/IEMOCAP_full_release
if [ -d "$IEMOCAP_DEST/Session1" ]; then
echo "[skip] IEMOCAP already unpacked at $IEMOCAP_DEST"
elif ls "$IEMOCAP_RAW"/*.zip 1>/dev/null 2>&1; then
echo "[IEMOCAP] Unzipping..."
mkdir -p "$IEMOCAP_DEST"
for zf in "$IEMOCAP_RAW"/*.zip; do
echo " unzip $zf"
unzip -q "$zf" -d "$IEMOCAP_DEST"
done
echo "[IEMOCAP] Unzip done. Sessions:"
ls "$IEMOCAP_DEST/"
else
echo "[IEMOCAP] WARNING: no zip files found in $IEMOCAP_RAW"
fi
# ── MELD: 解压 tar.gz ─────────────────────────────────────────────────────────
MELD_RAW=$RAW/MELD
MELD_DEST=$MELD_RAW/MELD.Raw
if [ -d "$MELD_DEST" ]; then
echo "[skip] MELD already unpacked at $MELD_DEST"
elif [ -f "$MELD_RAW/MELD.Raw.tar.gz" ]; then
echo "[MELD] Extracting tar.gz (~10.8GB, takes a few minutes)..."
tar -xzf "$MELD_RAW/MELD.Raw.tar.gz" -C "$MELD_RAW"
echo "[MELD] Extract done."
ls "$MELD_RAW/"
else
echo "[MELD] WARNING: MELD.Raw.tar.gz not found in $MELD_RAW"
echo " CSV-only mode will be used (no audio features)"
fi
# ── 特征提取 ──────────────────────────────────────────────────────────────────
cd "$PROJ"
echo ""
echo "=== Feature Extraction ==="
# IEMOCAP
if [ -d "$IEMOCAP_DEST/Session1" ]; then
echo "[extract] IEMOCAP..."
$PY scripts/preprocess/extract_iemocap.py \
--data_root "$RAW" \
--out_dir "$PROJ/data/iemocap"
echo "[done] IEMOCAP features → $PROJ/data/iemocap"
else
echo "[skip] IEMOCAP not ready"
fi
# MOSI
MOSI_PKL=$RAW/MOSI/aligned_mosi.pkl
if [ -f "$MOSI_PKL" ]; then
echo "[extract] CMU-MOSI..."
$PY scripts/preprocess/extract_mosi.py \
--data_root "$RAW/MOSI" \
--out_dir "$PROJ/data/mosi"
echo "[done] MOSI features → $PROJ/data/mosi"
else
echo "[skip] MOSI aligned_mosi.pkl not found at $MOSI_PKL"
fi
# MELD
if [ -d "$MELD_DEST" ] || ls "$MELD_RAW"/*.csv 1>/dev/null 2>&1; then
echo "[extract] MELD..."
$PY scripts/preprocess/extract_meld.py \
--data_root "$MELD_RAW" \
--out_dir "$PROJ/data/meld"
echo "[done] MELD features → $PROJ/data/meld"
else
echo "[skip] MELD data not ready"
fi
# ── 噪声生成IEMOCAP 特征就位后运行)──────────────────────────────────────────
if [ -f "$PROJ/data/iemocap/train_labels.npy" ]; then
echo ""
echo "=== Noise Generation (8 variants) ==="
$PY scripts/preprocess/generate_noise.py \
--config configs/noise_configs.yaml \
--data_dir "$PROJ/data/iemocap" \
--out_dir "$PROJ/data/iemocap_noisy"
echo "[done] Noisy variants → $PROJ/data/iemocap_noisy"
fi
echo ""
echo "=========================================="
echo " ALL DONE — $(date)"
echo "=========================================="