From fc76ded3e4b5ed8ba68a3c12e29d6ceeb5608595 Mon Sep 17 00:00:00 2001 From: hmo Date: Mon, 4 May 2026 00:22:55 +0800 Subject: [PATCH] feat: add Pipeline.add_clip_by_title --- src/core/pipeline.py | 69 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/src/core/pipeline.py b/src/core/pipeline.py index c5328f2..4291599 100644 --- a/src/core/pipeline.py +++ b/src/core/pipeline.py @@ -584,6 +584,75 @@ class Pipeline: # 保存更新后的 config self._save_config() + def add_clip_by_title(self, new_title): + """ + 用新标题在转录中匹配时间段,判断合并或新增。 + + Args: + new_title: 新知识点标题 + + Returns: + tuple: (clip_index, matched) — 新 clip 在 self.clips 中的索引,matched 是否匹配成功 + """ + # 1. 加载 corrected_transcript.json + transcript_path = os.path.join(self.inter_dir, 'corrected_transcript.json') + if not os.path.exists(transcript_path): + logger.warning(f"corrected_transcript.json not found") + return None, False + + with open(transcript_path, 'r', encoding='utf-8') as f: + corrected_segments = json.load(f) + + # 2. 匹配标题 + from .ppt_parser import PPTParser + parser = PPTParser.__new__(PPTParser) + parser.inter_dir = self.inter_dir + result = parser._find_title_in_transcript(new_title, corrected_segments) + + if result is None: + # 匹配不到:不加入 clips(用户可在 GUI 中看到未匹配状态) + logger.info(f"标题 '{new_title}' 在转录中未找到匹配") + return None, False + + start, end = result + + # 3. 构建新 clip + new_clip = { + 'title': new_title, + 'start': start, + 'end': end, + 'matched': True, + } + + # 4. 判断是否与现有 clip 重叠 + overlapped_index = None + for i, clip in enumerate(self.clips): + if clip.get('matched', True) is False: + continue + if start < clip['end'] and end > clip['start']: + overlapped_index = i + break + + if overlapped_index is not None: + # 有重叠:合并到现有 clip + self.clips.append(new_clip) + self.clips = parser._merge_overlapping_clips(self.clips) + else: + # 无重叠:直接追加 + self.clips.append(new_clip) + # 重新排序 + self.clips = sorted(self.clips, key=lambda c: c['start']) + + # 找到新 clip 的索引 + new_clip_index = next( + i for i, c in enumerate(self.clips) + if c['title'] == new_title and c['start'] == start + ) + + # 5. 保存 config + self._save_config() + return new_clip_index, True + # ==================== 主流程 ==================== def run(self) -> str: