feat: Module C v5/v6 training complete, ablations, SOTA baselines, paper updates

- Module C: BC+PPO training v5/v6 done; eval results in experiments/eval_intervention_v{5,6}.json
- Reward: v5 label-aligned constrained reward (code/src/rl/reward.py)
- Ablations: Module B (history_r, response_only, full) + Module C (wo_category_reward)
- SOTA baselines: WildGuard and ShieldGemma2b eval scripts and results
- Paper: update sections 05–08 (Module B/C description, experiments table, discussion)
- Docs: add record.md (change log), update state.md and exp.md; retire change.md
- Tools: add html-to-ppt utilities and run_shieldgemma2b.sh
- Configs: add ablation YAML configs for Module B and C
- Cleanup: remove stale reference/ PNG screenshots

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-20 14:24:09 +08:00
parent 6d61a950f1
commit 52ba43f08d
55 changed files with 8239 additions and 1244 deletions

View File

@@ -0,0 +1,114 @@
const fs = require('fs/promises');
const path = require('path');
const { chromium } = require('playwright');
const input = process.argv[2];
const outputDir = process.argv[3];
const presenterNames = ['Zhihao Zhao', 'Zipeng Wang', 'Jiuqi Feng', 'Siyuan Zhang'];
function baseWithoutHash(url) {
const hashIndex = url.indexOf('#');
return hashIndex >= 0 ? url.slice(0, hashIndex) : url;
}
async function applyRequestedEdits(page) {
await page.evaluate((names) => {
const visible = Array.from(document.querySelectorAll('section')).find((section) => {
const style = getComputedStyle(section);
return style.visibility !== 'hidden' && style.opacity !== '0';
});
if (!visible) return;
if ((visible.getAttribute('data-label') || '').startsWith('01')) {
for (const el of Array.from(visible.querySelectorAll('div'))) {
const text = (el.innerText || '').replace(/\s+/g, ' ').trim();
if (text === 'PRESENTED [ Presentation Date ] 4-person team walkthrough · ~10 min') {
el.style.display = 'none';
}
}
}
if ((visible.getAttribute('data-label') || '').startsWith('15')) {
const placeholders = Array.from(visible.querySelectorAll('div'))
.filter((el) => (el.innerText || '').trim() === '[ name ]');
placeholders.forEach((el, index) => {
if (names[index]) {
el.textContent = names[index];
}
});
}
}, presenterNames);
}
(async () => {
if (!input || !outputDir) {
throw new Error('Usage: node html_to_ppt_capture.js <file-url> <output-dir>');
}
await fs.mkdir(outputDir, { recursive: true });
const launchOptions = { headless: true };
if (process.env.BROWSER_EXE) {
launchOptions.executablePath = process.env.BROWSER_EXE;
}
const browser = await chromium.launch(launchOptions);
const firstPage = await browser.newPage({
viewport: { width: 1920, height: 1080 },
deviceScaleFactor: 1,
});
const baseUrl = baseWithoutHash(input);
await firstPage.goto(`${baseUrl}#1`, { waitUntil: 'load', timeout: 30000 });
await firstPage.waitForSelector('section', { state: 'attached', timeout: 10000 });
await firstPage.evaluate(() => document.fonts && document.fonts.ready);
await firstPage.waitForTimeout(500);
const slideCount = await firstPage.evaluate(() => document.querySelectorAll('section').length);
await firstPage.close();
const labels = [];
for (let i = 1; i <= slideCount; i += 1) {
const page = await browser.newPage({
viewport: { width: 1920, height: 1080 },
deviceScaleFactor: 1,
});
await page.goto(`${baseUrl}#${i}`, { waitUntil: 'load', timeout: 30000 });
await page.waitForSelector('section', { state: 'attached', timeout: 10000 });
await page.evaluate(() => document.fonts && document.fonts.ready);
await page.waitForTimeout(900);
await applyRequestedEdits(page);
await page.waitForTimeout(100);
const { label, clip } = await page.evaluate(() => {
const visible = Array.from(document.querySelectorAll('section')).find((section) => {
const style = getComputedStyle(section);
return style.visibility !== 'hidden' && style.opacity !== '0';
});
const rect = visible?.getBoundingClientRect();
return {
label: visible?.getAttribute('data-label') || document.body.innerText.split('\n')[0] || '',
clip: rect
? {
x: Math.max(0, Math.round(rect.x)),
y: Math.max(0, Math.round(rect.y)),
width: Math.round(rect.width),
height: Math.round(rect.height),
}
: null,
};
});
labels.push({ index: i, label });
const file = path.join(outputDir, `slide_${String(i).padStart(2, '0')}.png`);
await page.screenshot({ path: file, fullPage: false, clip: clip || undefined });
console.log(`captured ${i}/${slideCount}: ${label}`);
await page.close();
}
await fs.writeFile(path.join(outputDir, 'slides.json'), JSON.stringify(labels, null, 2), 'utf8');
await browser.close();
})().catch((err) => {
console.error(err);
process.exit(1);
});

View File

@@ -0,0 +1,89 @@
const { chromium } = require('playwright');
const input = process.argv[2];
const presenterNames = ['Zhihao Zhao', 'Zipeng Wang', 'Jiuqi Feng', 'Siyuan Zhang'];
function baseWithoutHash(url) {
const hashIndex = url.indexOf('#');
return hashIndex >= 0 ? url.slice(0, hashIndex) : url;
}
async function applyRequestedEdits(page) {
await page.evaluate((names) => {
const visible = Array.from(document.querySelectorAll('section')).find((section) => {
const style = getComputedStyle(section);
return style.visibility !== 'hidden' && style.opacity !== '0';
});
if (!visible) return;
if ((visible.getAttribute('data-label') || '').startsWith('01')) {
for (const el of Array.from(visible.querySelectorAll('div'))) {
const text = (el.innerText || '').replace(/\s+/g, ' ').trim();
if (text === 'PRESENTED [ Presentation Date ] 4-person team walkthrough · ~10 min') {
el.style.display = 'none';
}
}
}
if ((visible.getAttribute('data-label') || '').startsWith('15')) {
const placeholders = Array.from(visible.querySelectorAll('div'))
.filter((el) => (el.innerText || '').trim() === '[ name ]');
placeholders.forEach((el, index) => {
if (names[index]) {
el.textContent = names[index];
}
});
}
}, presenterNames);
}
(async () => {
if (!input) {
throw new Error('Usage: node html_to_ppt_check_placeholders.js <file-url>');
}
const launchOptions = { headless: true };
if (process.env.BROWSER_EXE) {
launchOptions.executablePath = process.env.BROWSER_EXE;
}
const browser = await chromium.launch(launchOptions);
const baseUrl = baseWithoutHash(input);
const findings = [];
const nameChecks = [];
for (let i = 1; i <= 15; i += 1) {
const page = await browser.newPage({ viewport: { width: 1920, height: 1080 } });
await page.goto(`${baseUrl}#${i}`, { waitUntil: 'load', timeout: 30000 });
await page.waitForSelector('section', { state: 'attached', timeout: 10000 });
await page.waitForTimeout(500);
await applyRequestedEdits(page);
const result = await page.evaluate((names) => {
const visible = Array.from(document.querySelectorAll('section')).find((section) => {
const style = getComputedStyle(section);
return style.visibility !== 'hidden' && style.opacity !== '0';
});
const label = visible?.getAttribute('data-label') || '';
const text = visible?.innerText || '';
const placeholderMatches = text.match(/\[[^\]]+\]|Presentation Date|Slide Number|placeholder|PRESENTED\s+\[/gi) || [];
return {
label,
placeholderMatches,
presenterNamesPresent: names.map((name) => text.includes(name)),
};
}, presenterNames);
if (result.placeholderMatches.length) {
findings.push({ slide: i, label: result.label, matches: result.placeholderMatches });
}
if (i === 15) {
nameChecks.push(...result.presenterNamesPresent);
}
await page.close();
}
await browser.close();
console.log(JSON.stringify({ findings, slide15NamesPresent: nameChecks }, null, 2));
})().catch((err) => {
console.error(err);
process.exit(1);
});

68
tools/html_to_ppt_make.py Normal file
View File

@@ -0,0 +1,68 @@
import json
import sys
from pathlib import Path
from PIL import Image, ImageOps
from pptx import Presentation
from pptx.util import Inches
def main() -> None:
if len(sys.argv) != 3:
raise SystemExit("Usage: python html_to_ppt_make.py <image-dir> <output-pptx>")
image_dir = Path(sys.argv[1])
output_pptx = Path(sys.argv[2])
output_pptx.parent.mkdir(parents=True, exist_ok=True)
labels_path = image_dir / "slides.json"
labels = {}
if labels_path.exists():
labels = {item["index"]: item["label"] for item in json.loads(labels_path.read_text(encoding="utf-8"))}
image_paths = sorted(image_dir.glob("slide_*.png"))
if not image_paths:
raise SystemExit(f"No slide PNGs found in {image_dir}")
prs = Presentation()
prs.slide_width = Inches(13.333333)
prs.slide_height = Inches(7.5)
blank_layout = prs.slide_layouts[6]
for image_path in image_paths:
slide_num = int(image_path.stem.split("_")[-1])
slide = prs.slides.add_slide(blank_layout)
picture = slide.shapes.add_picture(
str(image_path),
0,
0,
width=prs.slide_width,
height=prs.slide_height,
)
picture.name = labels.get(slide_num, image_path.stem)
prs.save(output_pptx)
montage_path = output_pptx.with_suffix(".preview.png")
thumbs = []
for image_path in image_paths:
img = Image.open(image_path).convert("RGB")
img.thumbnail((384, 216), Image.Resampling.LANCZOS)
thumbs.append(ImageOps.expand(img, border=2, fill=(30, 30, 30)))
cols = 5
rows = (len(thumbs) + cols - 1) // cols
montage = Image.new("RGB", (cols * 388, rows * 220), (12, 15, 24))
for idx, thumb in enumerate(thumbs):
x = (idx % cols) * 388
y = (idx // cols) * 220
montage.paste(thumb, (x, y))
montage.save(montage_path)
print(f"pptx={output_pptx}")
print(f"preview={montage_path}")
print(f"slides={len(image_paths)}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,43 @@
const { chromium } = require('playwright');
const input = process.argv[2];
(async () => {
const launchOptions = { headless: true };
if (process.env.BROWSER_EXE) {
launchOptions.executablePath = process.env.BROWSER_EXE;
}
const browser = await chromium.launch(launchOptions);
const page = await browser.newPage({ viewport: { width: 1920, height: 1080 }, deviceScaleFactor: 1 });
await page.goto(input, { waitUntil: 'load', timeout: 30000 });
await page.waitForTimeout(1500);
await page.waitForSelector('section', { state: 'attached', timeout: 10000 });
const info = await page.evaluate(() => {
const sections = Array.from(document.querySelectorAll('section'));
return {
title: document.title,
url: location.href,
bodyText: document.body.innerText.slice(0, 500),
count: sections.length,
viewport: { w: innerWidth, h: innerHeight },
scroll: { w: document.documentElement.scrollWidth, h: document.documentElement.scrollHeight },
sections: sections.map((s, i) => {
const r = s.getBoundingClientRect();
const cs = getComputedStyle(s);
return {
index: i + 1,
label: s.getAttribute('data-label') || '',
rect: { x: r.x, y: r.y, w: r.width, h: r.height },
display: cs.display,
visibility: cs.visibility,
opacity: cs.opacity,
};
}),
};
});
console.log(JSON.stringify(info, null, 2));
await browser.close();
})().catch((err) => {
console.error(err);
process.exit(1);
});

View File

@@ -0,0 +1,75 @@
const fs = require('fs/promises');
const path = require('path');
const { chromium } = require('playwright');
const pptxgen = require('pptxgenjs');
const inputUrl = process.argv[2];
const imageDir = process.argv[3];
const outputPptx = process.argv[4];
async function extractNotes(url) {
const launchOptions = { headless: true };
if (process.env.BROWSER_EXE) {
launchOptions.executablePath = process.env.BROWSER_EXE;
}
const browser = await chromium.launch(launchOptions);
const page = await browser.newPage({ viewport: { width: 1920, height: 1080 } });
const baseUrl = url.includes('#') ? url.slice(0, url.indexOf('#')) : url;
await page.goto(`${baseUrl}#1`, { waitUntil: 'load', timeout: 30000 });
await page.waitForSelector('#speaker-notes', { state: 'attached', timeout: 10000 });
const notes = await page.evaluate(() => JSON.parse(document.getElementById('speaker-notes').textContent));
await browser.close();
return notes;
}
(async () => {
if (!inputUrl || !imageDir || !outputPptx) {
throw new Error('Usage: node html_to_ppt_with_notes.js <file-url> <image-dir> <output-pptx>');
}
const notes = await extractNotes(inputUrl);
const images = (await fs.readdir(imageDir))
.filter((name) => /^slide_\d+\.png$/i.test(name))
.sort()
.map((name) => path.join(imageDir, name));
if (notes.length !== images.length) {
throw new Error(`Notes count (${notes.length}) does not match image count (${images.length})`);
}
await fs.mkdir(path.dirname(outputPptx), { recursive: true });
await fs.writeFile(
path.join(path.dirname(outputPptx), 'Generative_Image_Dynamics.notes.json'),
JSON.stringify(notes.map((text, index) => ({ slide: index + 1, text })), null, 2),
'utf8',
);
const pptx = new pptxgen();
pptx.layout = 'LAYOUT_WIDE';
pptx.author = 'Codex';
pptx.subject = 'Generative Image Dynamics — CVPR 2024';
pptx.title = 'Generative Image Dynamics';
pptx.company = '';
pptx.lang = 'en-US';
pptx.theme = {
headFontFace: 'Aptos',
bodyFontFace: 'Aptos',
lang: 'en-US',
};
for (let i = 0; i < images.length; i += 1) {
const slide = pptx.addSlide();
slide.background = { color: '000000' };
slide.addImage({ path: images[i], x: 0, y: 0, w: 13.333333, h: 7.5 });
slide.addNotes(notes[i]);
}
await pptx.writeFile({ fileName: outputPptx });
console.log(`pptx=${outputPptx}`);
console.log(`slides=${images.length}`);
console.log(`notes=${notes.length}`);
})().catch((err) => {
console.error(err);
process.exit(1);
});

View File

@@ -0,0 +1,45 @@
#!/bin/bash
# 登录 HF → 下载 ShieldGemma-2B → 运行评估,全程写入日志
set -e
PROJ=/root/siton-data-2849d4ce327c4ccfb233ce33868fe7fe/zsy/CompanionGuard-RL
MODEL_DIR=/root/siton-data-2849d4ce327c4ccfb233ce33868fe7fe/zsy/shieldgemma-2b
PY=/opt/conda/envs/dlapo-py310-cu128/bin/python
HF=/opt/conda/envs/dlapo-py310-cu128/bin/hf
LOG=$PROJ/experiments/run_shieldgemma2b_$(date +%Y%m%d_%H%M%S).log
mkdir -p $PROJ/experiments
# 从这里开始把所有输出重定向到日志文件
exec > "$LOG" 2>&1
echo "=== $(date) START ==="
echo "PROJ=$PROJ"
echo "MODEL_DIR=$MODEL_DIR"
# 代理(服务器无外网)
export http_proxy=http://127.0.0.1:7890
export https_proxy=http://127.0.0.1:7890
echo ""
echo "--- [1/3] HuggingFace Login ---"
$HF auth login --token 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
echo ""
echo "--- [2/3] Downloading google/shieldgemma-2b ---"
$HF download google/shieldgemma-2b \
--local-dir "$MODEL_DIR"
echo ""
echo "--- [3/3] Running evaluation on CompanionRisk-Bench test set ---"
cd "$PROJ"
export PYTHONPATH="$PROJ"
CUDA_VISIBLE_DEVICES=0 $PY scripts/eval_sota_baselines.py \
--model shieldgemma2b \
--model-path "$MODEL_DIR" \
--test-data data/processed/CompanionRisk-Bench/test.jsonl \
--output experiments/eval_sota_shieldgemma2b.json
echo ""
echo "=== $(date) DONE ==="
echo "Result: $PROJ/experiments/eval_sota_shieldgemma2b.json"

View File

@@ -0,0 +1,44 @@
import re
import sys
import zipfile
from pathlib import Path
PLACEHOLDER_PATTERNS = [
r"<p:sp\b(?:(?!</p:sp>).)*?<p:ph\b[^>]*(?:type=\"sldNum\"|type=\"dt\"|type=\"ftr\")[^>]*/>(?:(?!</p:sp>).)*?</p:sp>",
r"<p:sp\b(?:(?!</p:sp>).)*?<a:t>Slide Number</a:t>(?:(?!</p:sp>).)*?</p:sp>",
r"<p:sp\b(?:(?!</p:sp>).)*?<a:t>Date Placeholder</a:t>(?:(?!</p:sp>).)*?</p:sp>",
r"<p:sp\b(?:(?!</p:sp>).)*?<a:t>Footer</a:t>(?:(?!</p:sp>).)*?</p:sp>",
]
def scrub_xml(xml: str) -> str:
for pattern in PLACEHOLDER_PATTERNS:
xml = re.sub(pattern, "", xml, flags=re.DOTALL)
xml = re.sub(r"\s+sldNum=\"[^\"]*\"", "", xml)
return xml
def main() -> None:
if len(sys.argv) != 3:
raise SystemExit("Usage: python scrub_pptx_placeholders.py <input.pptx> <output.pptx>")
src = Path(sys.argv[1])
dst = Path(sys.argv[2])
dst.parent.mkdir(parents=True, exist_ok=True)
with zipfile.ZipFile(src, "r") as zin, zipfile.ZipFile(dst, "w", zipfile.ZIP_DEFLATED) as zout:
for item in zin.infolist():
data = zin.read(item.filename)
if item.filename.endswith(".xml") and (
item.filename.startswith("ppt/notesSlides/")
or item.filename.startswith("ppt/notesMasters/")
or item.filename.startswith("ppt/slideMasters/")
):
text = data.decode("utf-8")
data = scrub_xml(text).encode("utf-8")
zout.writestr(item, data)
if __name__ == "__main__":
main()