import zipfile, re, os ppt = r"D:\F\yc\课程上架\福田商圈夜校\课程视频\钢琴演奏入门第一课.pptx" out_dir = r"D:\F\NewI\opencode\daily-workspace\temp" slide1_out = os.path.join(out_dir, "slide1_texts.txt") xml_out = os.path.join(out_dir, "slide1_xml_preview.txt") with zipfile.ZipFile(ppt, "r") as z: slide1_file = "ppt/slides/slide1.xml" content = z.read(slide1_file).decode("utf-8", errors="replace") all_texts = re.findall(r"]*>([^<]*)", content) meaningful = [t for t in all_texts if t.strip()] with open(slide1_out, "w", encoding="utf-8") as f: f.write(f"Total fragments: {len(all_texts)}\n") f.write(f"Meaningful fragments: {len(meaningful)}\n\n") for i, t in enumerate(meaningful): f.write(f"[{i}] {t}\n") with open(xml_out, "w", encoding="utf-8") as f: f.write(content[:8000]) print("Done")