From 6a5ec9c04f6538336574d4aeb600be9f4e006bcb Mon Sep 17 00:00:00 2001
From: hmo <hmo@yoin.fun>
Date: Mon, 4 May 2026 00:17:27 +0800
Subject: [PATCH] feat: add _find_title_in_transcript for single-title
 re-matching

---
 src/core/ppt_parser.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/src/core/ppt_parser.py b/src/core/ppt_parser.py
index 0a927d3..5ea03e1 100644
--- a/src/core/ppt_parser.py
+++ b/src/core/ppt_parser.py
@@ -963,6 +963,30 @@ class PPTParser:
             logger.warning(f"LLM提取片段失败: {e}")
             return None
 
+    def _find_title_in_transcript(self, title: str, corrected_segments: List[Dict]) -> Optional[Tuple[float, float]]:
+        """
+        在转录文本中搜索标题关键词，返回首次匹配的时间段。
+
+        匹配逻辑：子串匹配（transcript text 包含 title 即为匹配）
+
+        Args:
+            title: 知识点标题
+            corrected_segments: corrected_transcript.json 的 segments 列表
+                              每个元素格式: {start, end, text}
+
+        Returns:
+            (start, end) 时间戳元组，或 None（匹配不到）
+        """
+        if not title or not corrected_segments:
+            return None
+
+        # 搜索策略：在所有 segment text 中找包含 title 的 segment
+        for seg in corrected_segments:
+            text = seg.get('text', '')
+            if title in text:
+                return (seg['start'], seg['end'])
+        return None
+
     # ==================== 主流程 ====================
 
     def run(self) -> dict: