24 lines
905 B
Python
24 lines
905 B
Python
import zipfile, re, os
|
|
|
|
ppt = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\钢琴演奏入门第一课.pptx"
|
|
out_dir = r"D:\F\NewI\opencode\daily-workspace\temp"
|
|
slide1_out = os.path.join(out_dir, "slide1_texts.txt")
|
|
xml_out = os.path.join(out_dir, "slide1_xml_preview.txt")
|
|
|
|
with zipfile.ZipFile(ppt, "r") as z:
|
|
slide1_file = "ppt/slides/slide1.xml"
|
|
content = z.read(slide1_file).decode("utf-8", errors="replace")
|
|
all_texts = re.findall(r"<a:t[^>]*>([^<]*)</a:t>", content)
|
|
|
|
meaningful = [t for t in all_texts if t.strip()]
|
|
with open(slide1_out, "w", encoding="utf-8") as f:
|
|
f.write(f"Total fragments: {len(all_texts)}\n")
|
|
f.write(f"Meaningful fragments: {len(meaningful)}\n\n")
|
|
for i, t in enumerate(meaningful):
|
|
f.write(f"[{i}] {t}\n")
|
|
|
|
with open(xml_out, "w", encoding="utf-8") as f:
|
|
f.write(content[:8000])
|
|
|
|
print("Done")
|