Files
lesson-highlights/temp/debug_slide1.py
T

24 lines
905 B
Python

import zipfile, re, os
ppt = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\钢琴演奏入门第一课.pptx"
out_dir = r"D:\F\NewI\opencode\daily-workspace\temp"
slide1_out = os.path.join(out_dir, "slide1_texts.txt")
xml_out = os.path.join(out_dir, "slide1_xml_preview.txt")
with zipfile.ZipFile(ppt, "r") as z:
slide1_file = "ppt/slides/slide1.xml"
content = z.read(slide1_file).decode("utf-8", errors="replace")
all_texts = re.findall(r"<a:t[^>]*>([^<]*)</a:t>", content)
meaningful = [t for t in all_texts if t.strip()]
with open(slide1_out, "w", encoding="utf-8") as f:
f.write(f"Total fragments: {len(all_texts)}\n")
f.write(f"Meaningful fragments: {len(meaningful)}\n\n")
for i, t in enumerate(meaningful):
f.write(f"[{i}] {t}\n")
with open(xml_out, "w", encoding="utf-8") as f:
f.write(content[:8000])
print("Done")