feat: add Pipeline.add_clip_by_title

This commit is contained in:
hmo
2026-05-04 00:22:55 +08:00
parent 440f481599
commit fc76ded3e4
+69
View File
@@ -584,6 +584,75 @@ class Pipeline:
# 保存更新后的 config # 保存更新后的 config
self._save_config() self._save_config()
def add_clip_by_title(self, new_title):
"""
用新标题在转录中匹配时间段,判断合并或新增。
Args:
new_title: 新知识点标题
Returns:
tuple: (clip_index, matched) — 新 clip 在 self.clips 中的索引,matched 是否匹配成功
"""
# 1. 加载 corrected_transcript.json
transcript_path = os.path.join(self.inter_dir, 'corrected_transcript.json')
if not os.path.exists(transcript_path):
logger.warning(f"corrected_transcript.json not found")
return None, False
with open(transcript_path, 'r', encoding='utf-8') as f:
corrected_segments = json.load(f)
# 2. 匹配标题
from .ppt_parser import PPTParser
parser = PPTParser.__new__(PPTParser)
parser.inter_dir = self.inter_dir
result = parser._find_title_in_transcript(new_title, corrected_segments)
if result is None:
# 匹配不到:不加入 clips(用户可在 GUI 中看到未匹配状态)
logger.info(f"标题 '{new_title}' 在转录中未找到匹配")
return None, False
start, end = result
# 3. 构建新 clip
new_clip = {
'title': new_title,
'start': start,
'end': end,
'matched': True,
}
# 4. 判断是否与现有 clip 重叠
overlapped_index = None
for i, clip in enumerate(self.clips):
if clip.get('matched', True) is False:
continue
if start < clip['end'] and end > clip['start']:
overlapped_index = i
break
if overlapped_index is not None:
# 有重叠:合并到现有 clip
self.clips.append(new_clip)
self.clips = parser._merge_overlapping_clips(self.clips)
else:
# 无重叠:直接追加
self.clips.append(new_clip)
# 重新排序
self.clips = sorted(self.clips, key=lambda c: c['start'])
# 找到新 clip 的索引
new_clip_index = next(
i for i, c in enumerate(self.clips)
if c['title'] == new_title and c['start'] == start
)
# 5. 保存 config
self._save_config()
return new_clip_index, True
# ==================== 主流程 ==================== # ==================== 主流程 ====================
def run(self) -> str: def run(self) -> str: