Files
CompanionGuard-RL/旧方向信息/scripts/preprocess/server_unpack_and_extract.sh

116 lines
4.5 KiB
Bash
Raw Normal View History

#!/bin/bash
# ─────────────────────────────────────────────────────────────────────────────
# server_unpack_and_extract.sh
# 服务器端:解压 + 特征提取一键脚本
# 前提:数据已上传到 $ZSY/multimodal_affect/data/raw/
#
# 目录约定:
# IEMOCAP zip: $ZSY/multimodal_affect/data/raw/IEMOCAP/*.zip
# MELD tar.gz: $ZSY/multimodal_affect/data/raw/MELD/MELD.Raw.tar.gz
# MOSI pkl: $ZSY/multimodal_affect/data/raw/MOSI/aligned_mosi.pkl
# ─────────────────────────────────────────────────────────────────────────────
set -e
source /root/.bashrc_zsy 2>/dev/null || true
ZSY=${ZSY:-/root/siton-data-2849d4ce327c4ccfb233ce33868fe7fe/zsy}
PROJ=$ZSY/multimodal_affect
RAW=$PROJ/data/raw
PY=$ZSY/envs/multimodal_affect/bin/python
echo "=========================================="
echo " Unpack & Extract — $(date)"
echo " PROJ=$PROJ"
echo "=========================================="
# ── IEMOCAP: 解压 zip ────────────────────────────────────────────────────────
IEMOCAP_RAW=$RAW/IEMOCAP
IEMOCAP_DEST=$RAW/IEMOCAP_full_release
if [ -d "$IEMOCAP_DEST/Session1" ]; then
echo "[skip] IEMOCAP already unpacked at $IEMOCAP_DEST"
elif ls "$IEMOCAP_RAW"/*.zip 1>/dev/null 2>&1; then
echo "[IEMOCAP] Unzipping..."
mkdir -p "$IEMOCAP_DEST"
for zf in "$IEMOCAP_RAW"/*.zip; do
echo " unzip $zf"
unzip -q "$zf" -d "$IEMOCAP_DEST"
done
echo "[IEMOCAP] Unzip done. Sessions:"
ls "$IEMOCAP_DEST/"
else
echo "[IEMOCAP] WARNING: no zip files found in $IEMOCAP_RAW"
fi
# ── MELD: 解压 tar.gz ─────────────────────────────────────────────────────────
MELD_RAW=$RAW/MELD
MELD_DEST=$MELD_RAW/MELD.Raw
if [ -d "$MELD_DEST" ]; then
echo "[skip] MELD already unpacked at $MELD_DEST"
elif [ -f "$MELD_RAW/MELD.Raw.tar.gz" ]; then
echo "[MELD] Extracting tar.gz (~10.8GB, takes a few minutes)..."
tar -xzf "$MELD_RAW/MELD.Raw.tar.gz" -C "$MELD_RAW"
echo "[MELD] Extract done."
ls "$MELD_RAW/"
else
echo "[MELD] WARNING: MELD.Raw.tar.gz not found in $MELD_RAW"
echo " CSV-only mode will be used (no audio features)"
fi
# ── 特征提取 ──────────────────────────────────────────────────────────────────
cd "$PROJ"
echo ""
echo "=== Feature Extraction ==="
# IEMOCAP
if [ -d "$IEMOCAP_DEST/Session1" ]; then
echo "[extract] IEMOCAP..."
$PY scripts/preprocess/extract_iemocap.py \
--data_root "$RAW" \
--out_dir "$PROJ/data/iemocap"
echo "[done] IEMOCAP features → $PROJ/data/iemocap"
else
echo "[skip] IEMOCAP not ready"
fi
# MOSI
MOSI_PKL=$RAW/MOSI/aligned_mosi.pkl
if [ -f "$MOSI_PKL" ]; then
echo "[extract] CMU-MOSI..."
$PY scripts/preprocess/extract_mosi.py \
--data_root "$RAW/MOSI" \
--out_dir "$PROJ/data/mosi"
echo "[done] MOSI features → $PROJ/data/mosi"
else
echo "[skip] MOSI aligned_mosi.pkl not found at $MOSI_PKL"
fi
# MELD
if [ -d "$MELD_DEST" ] || ls "$MELD_RAW"/*.csv 1>/dev/null 2>&1; then
echo "[extract] MELD..."
$PY scripts/preprocess/extract_meld.py \
--data_root "$MELD_RAW" \
--out_dir "$PROJ/data/meld"
echo "[done] MELD features → $PROJ/data/meld"
else
echo "[skip] MELD data not ready"
fi
# ── 噪声生成IEMOCAP 特征就位后运行)──────────────────────────────────────────
if [ -f "$PROJ/data/iemocap/train_labels.npy" ]; then
echo ""
echo "=== Noise Generation (8 variants) ==="
$PY scripts/preprocess/generate_noise.py \
--config configs/noise_configs.yaml \
--data_dir "$PROJ/data/iemocap" \
--out_dir "$PROJ/data/iemocap_noisy"
echo "[done] Noisy variants → $PROJ/data/iemocap_noisy"
fi
echo ""
echo "=========================================="
echo " ALL DONE — $(date)"
echo "=========================================="