import re import sys import zipfile from pathlib import Path PLACEHOLDER_PATTERNS = [ r").)*?]*(?:type=\"sldNum\"|type=\"dt\"|type=\"ftr\")[^>]*/>(?:(?!).)*?", r").)*?Slide Number(?:(?!).)*?", r").)*?Date Placeholder(?:(?!).)*?", r").)*?Footer(?:(?!).)*?", ] def scrub_xml(xml: str) -> str: for pattern in PLACEHOLDER_PATTERNS: xml = re.sub(pattern, "", xml, flags=re.DOTALL) xml = re.sub(r"\s+sldNum=\"[^\"]*\"", "", xml) return xml def main() -> None: if len(sys.argv) != 3: raise SystemExit("Usage: python scrub_pptx_placeholders.py ") src = Path(sys.argv[1]) dst = Path(sys.argv[2]) dst.parent.mkdir(parents=True, exist_ok=True) with zipfile.ZipFile(src, "r") as zin, zipfile.ZipFile(dst, "w", zipfile.ZIP_DEFLATED) as zout: for item in zin.infolist(): data = zin.read(item.filename) if item.filename.endswith(".xml") and ( item.filename.startswith("ppt/notesSlides/") or item.filename.startswith("ppt/notesMasters/") or item.filename.startswith("ppt/slideMasters/") ): text = data.decode("utf-8") data = scrub_xml(text).encode("utf-8") zout.writestr(item, data) if __name__ == "__main__": main()