#!/usr/bin/env python3
"""
安全的PPTX读取脚本 - 不依赖markitdown，避免Google Vision API问题
"""

import sys
import os
from pptx import Presentation
from pptx.enum.shapes import MSO_SHAPE_TYPE


def extract_text_from_shape(shape):
    """从形状中提取文本"""
    if not hasattr(shape, "text"):
        return ""
    return shape.text


def extract_text_from_slide(slide):
    """从幻灯片中提取所有文本"""
    texts = []

    # 提取标题
    if slide.shapes.title:
        title = slide.shapes.title.text
        if title.strip():
            texts.append(f"标题: {title}")

    # 提取所有文本框
    for shape in slide.shapes:
        if shape.shape_type == MSO_SHAPE_TYPE.TEXT_BOX:
            text = extract_text_from_shape(shape)
            if text.strip():
                texts.append(text)
        elif shape.shape_type == MSO_SHAPE_TYPE.PLACEHOLDER:
            text = extract_text_from_shape(shape)
            if text.strip():
                texts.append(text)
        elif hasattr(shape, "text") and shape.text:
            text = shape.text
            if text.strip():
                texts.append(text)

    return texts


def read_pptx_safe(file_path):
    """安全读取PPTX文件"""
    try:
        prs = Presentation(file_path)
        all_content = []

        for i, slide in enumerate(prs.slides):
            slide_content = extract_text_from_slide(slide)
            if slide_content:
                all_content.append(f"--- 幻灯片 {i + 1} ---")
                all_content.extend(slide_content)
                all_content.append("")

        return "\n".join(all_content)

    except Exception as e:
        print(f"Error reading PPTX: {e}", file=sys.stderr)
        return None


def main():
    if len(sys.argv) != 2:
        print("Usage: python pptx_reader_safe.py <presentation.pptx>", file=sys.stderr)
        sys.exit(1)

    file_path = sys.argv[1]
    if not os.path.exists(file_path):
        print(f"File not found: {file_path}", file=sys.stderr)
        sys.exit(1)

    content = read_pptx_safe(file_path)
    if content:
        # 保存到临时文件以避免终端编码问题
        output_path = "temp/pptx_output.txt"
        os.makedirs("temp", exist_ok=True)
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(content)
        print(f"Content extracted to: {output_path}")
        print("\n" + content)
    else:
        print("Failed to extract content", file=sys.stderr)
        sys.exit(1)


if __name__ == "__main__":
    main()