feat: Module C v5/v6 training complete, ablations, SOTA baselines, paper updates
- Module C: BC+PPO training v5/v6 done; eval results in experiments/eval_intervention_v{5,6}.json
- Reward: v5 label-aligned constrained reward (code/src/rl/reward.py)
- Ablations: Module B (history_r, response_only, full) + Module C (wo_category_reward)
- SOTA baselines: WildGuard and ShieldGemma2b eval scripts and results
- Paper: update sections 05–08 (Module B/C description, experiments table, discussion)
- Docs: add record.md (change log), update state.md and exp.md; retire change.md
- Tools: add html-to-ppt utilities and run_shieldgemma2b.sh
- Configs: add ablation YAML configs for Module B and C
- Cleanup: remove stale reference/ PNG screenshots
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
44
tools/scrub_pptx_placeholders.py
Normal file
44
tools/scrub_pptx_placeholders.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import re
|
||||
import sys
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
PLACEHOLDER_PATTERNS = [
|
||||
r"<p:sp\b(?:(?!</p:sp>).)*?<p:ph\b[^>]*(?:type=\"sldNum\"|type=\"dt\"|type=\"ftr\")[^>]*/>(?:(?!</p:sp>).)*?</p:sp>",
|
||||
r"<p:sp\b(?:(?!</p:sp>).)*?<a:t>Slide Number</a:t>(?:(?!</p:sp>).)*?</p:sp>",
|
||||
r"<p:sp\b(?:(?!</p:sp>).)*?<a:t>Date Placeholder</a:t>(?:(?!</p:sp>).)*?</p:sp>",
|
||||
r"<p:sp\b(?:(?!</p:sp>).)*?<a:t>Footer</a:t>(?:(?!</p:sp>).)*?</p:sp>",
|
||||
]
|
||||
|
||||
|
||||
def scrub_xml(xml: str) -> str:
|
||||
for pattern in PLACEHOLDER_PATTERNS:
|
||||
xml = re.sub(pattern, "", xml, flags=re.DOTALL)
|
||||
xml = re.sub(r"\s+sldNum=\"[^\"]*\"", "", xml)
|
||||
return xml
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if len(sys.argv) != 3:
|
||||
raise SystemExit("Usage: python scrub_pptx_placeholders.py <input.pptx> <output.pptx>")
|
||||
|
||||
src = Path(sys.argv[1])
|
||||
dst = Path(sys.argv[2])
|
||||
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with zipfile.ZipFile(src, "r") as zin, zipfile.ZipFile(dst, "w", zipfile.ZIP_DEFLATED) as zout:
|
||||
for item in zin.infolist():
|
||||
data = zin.read(item.filename)
|
||||
if item.filename.endswith(".xml") and (
|
||||
item.filename.startswith("ppt/notesSlides/")
|
||||
or item.filename.startswith("ppt/notesMasters/")
|
||||
or item.filename.startswith("ppt/slideMasters/")
|
||||
):
|
||||
text = data.decode("utf-8")
|
||||
data = scrub_xml(text).encode("utf-8")
|
||||
zout.writestr(item, data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user