feat: add Pipeline.reextract_clip for single-title re-matching

This commit is contained in:
hmo
2026-05-04 00:19:37 +08:00
parent 6a5ec9c04f
commit 0fbf8757fa
+77
View File
@@ -482,6 +482,83 @@ class Pipeline:
self.step_callback('burning')
return final_path
# ==================== 辅助方法 ====================
def _save_config(self) -> None:
"""将 self.clips 等配置写回 generated_config.yaml。"""
import yaml
config_path = os.path.join(self.output_dir, 'generated_config.yaml')
# 保留原有配置,只更新 clips 和 video_params
saved_config = {}
if os.path.exists(config_path):
try:
with open(config_path, 'r', encoding='utf-8') as f:
saved_config = yaml.safe_load(f) or {}
except Exception:
pass
saved_config['clips'] = self.clips
# 同步 video_params
if 'video_params' not in saved_config:
saved_config['video_params'] = self.config.get('video_params', {})
with open(config_path, 'w', encoding='utf-8') as f:
yaml.dump(saved_config, f, allow_unicode=True, default_flow_style=False)
logger.info(f"配置已保存: {config_path}")
def reextract_clip(self, clip_index: int, new_title: str) -> bool:
"""
用新标题重新匹配单个 clip 的时间段。
Args:
clip_index: clip 在 self.clips 中的索引
new_title: 新的标题文字
Returns:
bool: 是否匹配成功(匹配到了返回 True,匹配不到返回 False)
"""
# 1. 加载 corrected_transcript.json
transcript_path = os.path.join(self.inter_dir, 'corrected_transcript.json')
if not os.path.exists(transcript_path):
logger.warning(f"corrected_transcript.json not found: {transcript_path}")
return False
with open(transcript_path, 'r', encoding='utf-8') as f:
corrected_segments = json.load(f)
# 2. 调用 PPTParser._find_title_in_transcript 匹配新标题
from .ppt_parser import PPTParser
# 用 __new__ 绕过 __init__,只设置 inter_dir
parser = PPTParser.__new__(PPTParser)
parser.inter_dir = self.inter_dir
result = parser._find_title_in_transcript(new_title, corrected_segments)
if result is None:
# 匹配不到:标记为 unmatched,不参与烧录
self.clips[clip_index]['matched'] = False
self.clips[clip_index]['title'] = new_title
clip_json = os.path.join(self.inter_dir, f'clip{clip_index + 1}.json')
if os.path.exists(clip_json):
os.remove(clip_json)
self._save_config()
return False
start, end = result
self.clips[clip_index]['title'] = new_title
self.clips[clip_index]['start'] = start
self.clips[clip_index]['end'] = end
self.clips[clip_index]['matched'] = True
# 3. 删除对应 json(触发重新生成)
clip_json = os.path.join(self.inter_dir, f'clip{clip_index + 1}.json')
if os.path.exists(clip_json):
os.remove(clip_json)
# 4. 重新合并重叠片段
self.clips = parser._merge_overlapping_clips(self.clips)
# 5. 保存更新后的 config
self._save_config()
return True
# ==================== 主流程 ====================
def run(self) -> str: