feat: Module C v5/v6 training complete, ablations, SOTA baselines, paper updates
- Module C: BC+PPO training v5/v6 done; eval results in experiments/eval_intervention_v{5,6}.json
- Reward: v5 label-aligned constrained reward (code/src/rl/reward.py)
- Ablations: Module B (history_r, response_only, full) + Module C (wo_category_reward)
- SOTA baselines: WildGuard and ShieldGemma2b eval scripts and results
- Paper: update sections 05–08 (Module B/C description, experiments table, discussion)
- Docs: add record.md (change log), update state.md and exp.md; retire change.md
- Tools: add html-to-ppt utilities and run_shieldgemma2b.sh
- Configs: add ablation YAML configs for Module B and C
- Cleanup: remove stale reference/ PNG screenshots
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
114
tools/html_to_ppt_capture.js
Normal file
114
tools/html_to_ppt_capture.js
Normal file
@@ -0,0 +1,114 @@
|
||||
const fs = require('fs/promises');
|
||||
const path = require('path');
|
||||
const { chromium } = require('playwright');
|
||||
|
||||
const input = process.argv[2];
|
||||
const outputDir = process.argv[3];
|
||||
const presenterNames = ['Zhihao Zhao', 'Zipeng Wang', 'Jiuqi Feng', 'Siyuan Zhang'];
|
||||
|
||||
function baseWithoutHash(url) {
|
||||
const hashIndex = url.indexOf('#');
|
||||
return hashIndex >= 0 ? url.slice(0, hashIndex) : url;
|
||||
}
|
||||
|
||||
async function applyRequestedEdits(page) {
|
||||
await page.evaluate((names) => {
|
||||
const visible = Array.from(document.querySelectorAll('section')).find((section) => {
|
||||
const style = getComputedStyle(section);
|
||||
return style.visibility !== 'hidden' && style.opacity !== '0';
|
||||
});
|
||||
if (!visible) return;
|
||||
|
||||
if ((visible.getAttribute('data-label') || '').startsWith('01')) {
|
||||
for (const el of Array.from(visible.querySelectorAll('div'))) {
|
||||
const text = (el.innerText || '').replace(/\s+/g, ' ').trim();
|
||||
if (text === 'PRESENTED [ Presentation Date ] 4-person team walkthrough · ~10 min') {
|
||||
el.style.display = 'none';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((visible.getAttribute('data-label') || '').startsWith('15')) {
|
||||
const placeholders = Array.from(visible.querySelectorAll('div'))
|
||||
.filter((el) => (el.innerText || '').trim() === '[ name ]');
|
||||
placeholders.forEach((el, index) => {
|
||||
if (names[index]) {
|
||||
el.textContent = names[index];
|
||||
}
|
||||
});
|
||||
}
|
||||
}, presenterNames);
|
||||
}
|
||||
|
||||
(async () => {
|
||||
if (!input || !outputDir) {
|
||||
throw new Error('Usage: node html_to_ppt_capture.js <file-url> <output-dir>');
|
||||
}
|
||||
|
||||
await fs.mkdir(outputDir, { recursive: true });
|
||||
|
||||
const launchOptions = { headless: true };
|
||||
if (process.env.BROWSER_EXE) {
|
||||
launchOptions.executablePath = process.env.BROWSER_EXE;
|
||||
}
|
||||
|
||||
const browser = await chromium.launch(launchOptions);
|
||||
const firstPage = await browser.newPage({
|
||||
viewport: { width: 1920, height: 1080 },
|
||||
deviceScaleFactor: 1,
|
||||
});
|
||||
|
||||
const baseUrl = baseWithoutHash(input);
|
||||
await firstPage.goto(`${baseUrl}#1`, { waitUntil: 'load', timeout: 30000 });
|
||||
await firstPage.waitForSelector('section', { state: 'attached', timeout: 10000 });
|
||||
await firstPage.evaluate(() => document.fonts && document.fonts.ready);
|
||||
await firstPage.waitForTimeout(500);
|
||||
|
||||
const slideCount = await firstPage.evaluate(() => document.querySelectorAll('section').length);
|
||||
await firstPage.close();
|
||||
const labels = [];
|
||||
|
||||
for (let i = 1; i <= slideCount; i += 1) {
|
||||
const page = await browser.newPage({
|
||||
viewport: { width: 1920, height: 1080 },
|
||||
deviceScaleFactor: 1,
|
||||
});
|
||||
await page.goto(`${baseUrl}#${i}`, { waitUntil: 'load', timeout: 30000 });
|
||||
await page.waitForSelector('section', { state: 'attached', timeout: 10000 });
|
||||
await page.evaluate(() => document.fonts && document.fonts.ready);
|
||||
await page.waitForTimeout(900);
|
||||
await applyRequestedEdits(page);
|
||||
await page.waitForTimeout(100);
|
||||
|
||||
const { label, clip } = await page.evaluate(() => {
|
||||
const visible = Array.from(document.querySelectorAll('section')).find((section) => {
|
||||
const style = getComputedStyle(section);
|
||||
return style.visibility !== 'hidden' && style.opacity !== '0';
|
||||
});
|
||||
const rect = visible?.getBoundingClientRect();
|
||||
return {
|
||||
label: visible?.getAttribute('data-label') || document.body.innerText.split('\n')[0] || '',
|
||||
clip: rect
|
||||
? {
|
||||
x: Math.max(0, Math.round(rect.x)),
|
||||
y: Math.max(0, Math.round(rect.y)),
|
||||
width: Math.round(rect.width),
|
||||
height: Math.round(rect.height),
|
||||
}
|
||||
: null,
|
||||
};
|
||||
});
|
||||
labels.push({ index: i, label });
|
||||
|
||||
const file = path.join(outputDir, `slide_${String(i).padStart(2, '0')}.png`);
|
||||
await page.screenshot({ path: file, fullPage: false, clip: clip || undefined });
|
||||
console.log(`captured ${i}/${slideCount}: ${label}`);
|
||||
await page.close();
|
||||
}
|
||||
|
||||
await fs.writeFile(path.join(outputDir, 'slides.json'), JSON.stringify(labels, null, 2), 'utf8');
|
||||
await browser.close();
|
||||
})().catch((err) => {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user