Initial commit: skills library
- 70 skills with code and documentation - Add .gitignore (ignore __pycache__, output/, temp/, venv/) - Clean up test intermediates and caches
This commit is contained in:
@@ -0,0 +1,92 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
安全的PPTX读取脚本 - 不依赖markitdown,避免Google Vision API问题
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
from pptx import Presentation
|
||||
from pptx.enum.shapes import MSO_SHAPE_TYPE
|
||||
|
||||
|
||||
def extract_text_from_shape(shape):
|
||||
"""从形状中提取文本"""
|
||||
if not hasattr(shape, "text"):
|
||||
return ""
|
||||
return shape.text
|
||||
|
||||
|
||||
def extract_text_from_slide(slide):
|
||||
"""从幻灯片中提取所有文本"""
|
||||
texts = []
|
||||
|
||||
# 提取标题
|
||||
if slide.shapes.title:
|
||||
title = slide.shapes.title.text
|
||||
if title.strip():
|
||||
texts.append(f"标题: {title}")
|
||||
|
||||
# 提取所有文本框
|
||||
for shape in slide.shapes:
|
||||
if shape.shape_type == MSO_SHAPE_TYPE.TEXT_BOX:
|
||||
text = extract_text_from_shape(shape)
|
||||
if text.strip():
|
||||
texts.append(text)
|
||||
elif shape.shape_type == MSO_SHAPE_TYPE.PLACEHOLDER:
|
||||
text = extract_text_from_shape(shape)
|
||||
if text.strip():
|
||||
texts.append(text)
|
||||
elif hasattr(shape, "text") and shape.text:
|
||||
text = shape.text
|
||||
if text.strip():
|
||||
texts.append(text)
|
||||
|
||||
return texts
|
||||
|
||||
|
||||
def read_pptx_safe(file_path):
|
||||
"""安全读取PPTX文件"""
|
||||
try:
|
||||
prs = Presentation(file_path)
|
||||
all_content = []
|
||||
|
||||
for i, slide in enumerate(prs.slides):
|
||||
slide_content = extract_text_from_slide(slide)
|
||||
if slide_content:
|
||||
all_content.append(f"--- 幻灯片 {i + 1} ---")
|
||||
all_content.extend(slide_content)
|
||||
all_content.append("")
|
||||
|
||||
return "\n".join(all_content)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error reading PPTX: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: python pptx_reader_safe.py <presentation.pptx>", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
file_path = sys.argv[1]
|
||||
if not os.path.exists(file_path):
|
||||
print(f"File not found: {file_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
content = read_pptx_safe(file_path)
|
||||
if content:
|
||||
# 保存到临时文件以避免终端编码问题
|
||||
output_path = "temp/pptx_output.txt"
|
||||
os.makedirs("temp", exist_ok=True)
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
print(f"Content extracted to: {output_path}")
|
||||
print("\n" + content)
|
||||
else:
|
||||
print("Failed to extract content", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user