45 lines
1.5 KiB
Python
45 lines
1.5 KiB
Python
|
|
import re
|
||
|
|
import sys
|
||
|
|
import zipfile
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
|
||
|
|
PLACEHOLDER_PATTERNS = [
|
||
|
|
r"<p:sp\b(?:(?!</p:sp>).)*?<p:ph\b[^>]*(?:type=\"sldNum\"|type=\"dt\"|type=\"ftr\")[^>]*/>(?:(?!</p:sp>).)*?</p:sp>",
|
||
|
|
r"<p:sp\b(?:(?!</p:sp>).)*?<a:t>Slide Number</a:t>(?:(?!</p:sp>).)*?</p:sp>",
|
||
|
|
r"<p:sp\b(?:(?!</p:sp>).)*?<a:t>Date Placeholder</a:t>(?:(?!</p:sp>).)*?</p:sp>",
|
||
|
|
r"<p:sp\b(?:(?!</p:sp>).)*?<a:t>Footer</a:t>(?:(?!</p:sp>).)*?</p:sp>",
|
||
|
|
]
|
||
|
|
|
||
|
|
|
||
|
|
def scrub_xml(xml: str) -> str:
|
||
|
|
for pattern in PLACEHOLDER_PATTERNS:
|
||
|
|
xml = re.sub(pattern, "", xml, flags=re.DOTALL)
|
||
|
|
xml = re.sub(r"\s+sldNum=\"[^\"]*\"", "", xml)
|
||
|
|
return xml
|
||
|
|
|
||
|
|
|
||
|
|
def main() -> None:
|
||
|
|
if len(sys.argv) != 3:
|
||
|
|
raise SystemExit("Usage: python scrub_pptx_placeholders.py <input.pptx> <output.pptx>")
|
||
|
|
|
||
|
|
src = Path(sys.argv[1])
|
||
|
|
dst = Path(sys.argv[2])
|
||
|
|
dst.parent.mkdir(parents=True, exist_ok=True)
|
||
|
|
|
||
|
|
with zipfile.ZipFile(src, "r") as zin, zipfile.ZipFile(dst, "w", zipfile.ZIP_DEFLATED) as zout:
|
||
|
|
for item in zin.infolist():
|
||
|
|
data = zin.read(item.filename)
|
||
|
|
if item.filename.endswith(".xml") and (
|
||
|
|
item.filename.startswith("ppt/notesSlides/")
|
||
|
|
or item.filename.startswith("ppt/notesMasters/")
|
||
|
|
or item.filename.startswith("ppt/slideMasters/")
|
||
|
|
):
|
||
|
|
text = data.decode("utf-8")
|
||
|
|
data = scrub_xml(text).encode("utf-8")
|
||
|
|
zout.writestr(item, data)
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|