04db423416
- 70 skills with code and documentation - Add .gitignore (ignore __pycache__, output/, temp/, venv/) - Clean up test intermediates and caches
322 lines
10 KiB
Python
322 lines
10 KiB
Python
"""
|
|
视频内容分析框架
|
|
用于分析心理学/恋爱技巧类视频内容
|
|
"""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
|
|
class VideoContentAnalyzer:
|
|
def __init__(self, video_title):
|
|
self.video_title = video_title
|
|
self.analysis = {
|
|
"title": video_title,
|
|
"category": self._determine_category(),
|
|
"key_concepts": [],
|
|
"core_principles": [],
|
|
"practical_techniques": [],
|
|
"psychological_insights": [],
|
|
"controversial_points": [],
|
|
"ethical_considerations": [],
|
|
"key_quotes": [],
|
|
"summary": "",
|
|
}
|
|
|
|
def _determine_category(self):
|
|
"""根据标题确定视频类别"""
|
|
title_lower = self.video_title.lower()
|
|
|
|
categories = {
|
|
"relationship_psychology": ["爱", "恋爱", "女人", "男人", "感情", "关系"],
|
|
"self_improvement": ["方法", "技巧", "提升", "改变"],
|
|
"controversial": ["脏", "强行", "套路", "操控"],
|
|
"educational": ["心理学", "心理", "科学", "研究"],
|
|
}
|
|
|
|
detected = []
|
|
for cat, keywords in categories.items():
|
|
for kw in keywords:
|
|
if kw in title_lower:
|
|
detected.append(cat)
|
|
break
|
|
|
|
return detected if detected else ["unknown"]
|
|
|
|
def analyze_transcript(self, transcript_text):
|
|
"""分析转录文本"""
|
|
print(f"分析视频: {self.video_title}")
|
|
print(f"类别: {', '.join(self.analysis['category'])}")
|
|
print(f"转录长度: {len(transcript_text)} 字符")
|
|
|
|
# 提取关键概念
|
|
self._extract_key_concepts(transcript_text)
|
|
|
|
# 提取核心原则
|
|
self._extract_core_principles(transcript_text)
|
|
|
|
# 提取实用技巧
|
|
self._extract_practical_techniques(transcript_text)
|
|
|
|
# 提取心理学洞察
|
|
self._extract_psychological_insights(transcript_text)
|
|
|
|
# 识别争议点
|
|
self._identify_controversial_points(transcript_text)
|
|
|
|
# 伦理考量
|
|
self._analyze_ethical_considerations(transcript_text)
|
|
|
|
# 提取关键引述
|
|
self._extract_key_quotes(transcript_text)
|
|
|
|
# 生成总结
|
|
self._generate_summary(transcript_text)
|
|
|
|
return self.analysis
|
|
|
|
def _extract_key_concepts(self, text):
|
|
"""提取关键概念"""
|
|
# 这里可以添加更复杂的NLP处理
|
|
concepts = []
|
|
|
|
# 简单关键词提取(实际应用中应该用更复杂的方法)
|
|
concept_keywords = [
|
|
"吸引力",
|
|
"价值",
|
|
"框架",
|
|
"需求感",
|
|
"投资",
|
|
"服从性",
|
|
"筛选",
|
|
"推拉",
|
|
"冷读",
|
|
"心锚",
|
|
]
|
|
|
|
for concept in concept_keywords:
|
|
if concept in text:
|
|
concepts.append(concept)
|
|
|
|
self.analysis["key_concepts"] = concepts
|
|
|
|
def _extract_core_principles(self, text):
|
|
"""提取核心原则"""
|
|
# 寻找原则性陈述
|
|
principle_indicators = [
|
|
"原则是",
|
|
"核心是",
|
|
"关键在于",
|
|
"最重要的是",
|
|
"本质是",
|
|
"根本在于",
|
|
]
|
|
principles = []
|
|
|
|
lines = text.split("。")
|
|
for line in lines:
|
|
for indicator in principle_indicators:
|
|
if indicator in line:
|
|
principles.append(line.strip())
|
|
break
|
|
|
|
self.analysis["core_principles"] = principles[:5] # 取前5个
|
|
|
|
def _extract_practical_techniques(self, text):
|
|
"""提取实用技巧"""
|
|
technique_indicators = ["方法", "技巧", "步骤", "操作", "做法", "策略", "战术"]
|
|
techniques = []
|
|
|
|
lines = text.split("。")
|
|
for line in lines:
|
|
for indicator in technique_indicators:
|
|
if indicator in line and len(line) < 100: # 避免太长的句子
|
|
techniques.append(line.strip())
|
|
break
|
|
|
|
self.analysis["practical_techniques"] = techniques[:10] # 取前10个
|
|
|
|
def _extract_psychological_insights(self, text):
|
|
"""提取心理学洞察"""
|
|
insight_indicators = [
|
|
"心理学",
|
|
"心理",
|
|
"潜意识",
|
|
"认知",
|
|
"情绪",
|
|
"动机",
|
|
"需求",
|
|
"人性",
|
|
]
|
|
insights = []
|
|
|
|
lines = text.split("。")
|
|
for line in lines:
|
|
for indicator in insight_indicators:
|
|
if indicator in line:
|
|
insights.append(line.strip())
|
|
break
|
|
|
|
self.analysis["psychological_insights"] = insights[:8]
|
|
|
|
def _identify_controversial_points(self, text):
|
|
"""识别争议点"""
|
|
controversial_indicators = [
|
|
"脏",
|
|
"强行",
|
|
"操控",
|
|
"套路",
|
|
"欺骗",
|
|
"利用",
|
|
"不道德",
|
|
"争议",
|
|
]
|
|
points = []
|
|
|
|
lines = text.split("。")
|
|
for line in lines:
|
|
for indicator in controversial_indicators:
|
|
if indicator in line:
|
|
points.append(line.strip())
|
|
break
|
|
|
|
self.analysis["controversial_points"] = points
|
|
|
|
def _analyze_ethical_considerations(self, text):
|
|
"""分析伦理考量"""
|
|
ethical_indicators = [
|
|
"尊重",
|
|
"真诚",
|
|
"诚实",
|
|
"道德",
|
|
"伦理",
|
|
"责任",
|
|
"伤害",
|
|
"欺骗",
|
|
]
|
|
considerations = []
|
|
|
|
lines = text.split("。")
|
|
for line in lines:
|
|
for indicator in ethical_indicators:
|
|
if indicator in line:
|
|
considerations.append(line.strip())
|
|
break
|
|
|
|
self.analysis["ethical_considerations"] = considerations
|
|
|
|
def _extract_key_quotes(self, text):
|
|
"""提取关键引述"""
|
|
# 寻找可能的重要陈述
|
|
lines = text.split("。")
|
|
quotes = []
|
|
|
|
for line in lines:
|
|
line = line.strip()
|
|
if len(line) > 20 and len(line) < 150: # 适中的长度
|
|
# 检查是否包含重要关键词
|
|
important_words = [
|
|
"爱",
|
|
"感情",
|
|
"关系",
|
|
"心理",
|
|
"方法",
|
|
"技巧",
|
|
"价值",
|
|
"吸引",
|
|
]
|
|
if any(word in line for word in important_words):
|
|
quotes.append(line)
|
|
|
|
self.analysis["key_quotes"] = quotes[:5]
|
|
|
|
def _generate_summary(self, text):
|
|
"""生成总结"""
|
|
# 简单的总结生成(实际应用中应该用LLM)
|
|
summary = f"视频《{self.video_title}》主要探讨了"
|
|
|
|
if self.analysis["key_concepts"]:
|
|
summary += f"关于{', '.join(self.analysis['key_concepts'][:3])}等概念"
|
|
|
|
if self.analysis["practical_techniques"]:
|
|
summary += f",提出了{len(self.analysis['practical_techniques'])}个实用技巧"
|
|
|
|
if self.analysis["controversial_points"]:
|
|
summary += f",其中包含一些具有争议性的观点"
|
|
|
|
if self.analysis["ethical_considerations"]:
|
|
summary += f",同时也涉及伦理考量"
|
|
|
|
summary += "。"
|
|
|
|
self.analysis["summary"] = summary
|
|
|
|
def save_analysis(self, output_path):
|
|
"""保存分析结果"""
|
|
with open(output_path, "w", encoding="utf-8") as f:
|
|
json.dump(self.analysis, f, ensure_ascii=False, indent=2)
|
|
|
|
print(f"分析结果已保存到: {output_path}")
|
|
|
|
def print_analysis(self):
|
|
"""打印分析结果"""
|
|
print("\n" + "=" * 60)
|
|
print("视频内容分析报告")
|
|
print("=" * 60)
|
|
|
|
print(f"\n📺 视频标题: {self.analysis['title']}")
|
|
print(f"📂 类别: {', '.join(self.analysis['category'])}")
|
|
print(f"📝 总结: {self.analysis['summary']}")
|
|
|
|
print(f"\n🔑 关键概念 ({len(self.analysis['key_concepts'])}个):")
|
|
for concept in self.analysis["key_concepts"]:
|
|
print(f" • {concept}")
|
|
|
|
print(f"\n🎯 核心原则 ({len(self.analysis['core_principles'])}个):")
|
|
for i, principle in enumerate(self.analysis["core_principles"], 1):
|
|
print(f" {i}. {principle}")
|
|
|
|
print(f"\n🛠️ 实用技巧 ({len(self.analysis['practical_techniques'])}个):")
|
|
for i, technique in enumerate(self.analysis["practical_techniques"][:5], 1):
|
|
print(f" {i}. {technique}")
|
|
if len(self.analysis["practical_techniques"]) > 5:
|
|
print(f" ... 还有{len(self.analysis['practical_techniques']) - 5}个技巧")
|
|
|
|
print(f"\n🧠 心理学洞察 ({len(self.analysis['psychological_insights'])}个):")
|
|
for i, insight in enumerate(self.analysis["psychological_insights"][:3], 1):
|
|
print(f" {i}. {insight}")
|
|
|
|
if self.analysis["controversial_points"]:
|
|
print(f"\n⚠️ 争议点 ({len(self.analysis['controversial_points'])}个):")
|
|
for i, point in enumerate(self.analysis["controversial_points"], 1):
|
|
print(f" {i}. {point}")
|
|
|
|
if self.analysis["ethical_considerations"]:
|
|
print(f"\n⚖️ 伦理考量 ({len(self.analysis['ethical_considerations'])}个):")
|
|
for i, consideration in enumerate(
|
|
self.analysis["ethical_considerations"], 1
|
|
):
|
|
print(f" {i}. {consideration}")
|
|
|
|
if self.analysis["key_quotes"]:
|
|
print(f"\n💬 关键引述 ({len(self.analysis['key_quotes'])}个):")
|
|
for i, quote in enumerate(self.analysis["key_quotes"], 1):
|
|
print(f' {i}. "{quote}"')
|
|
|
|
|
|
# 使用示例
|
|
if __name__ == "__main__":
|
|
video_title = "一个很'脏'的方法,让你喜欢的女人强行爱上你!"
|
|
|
|
analyzer = VideoContentAnalyzer(video_title)
|
|
|
|
# 这里应该读取转录文本
|
|
# transcript = "转录文本内容..."
|
|
# analysis = analyzer.analyze_transcript(transcript)
|
|
|
|
# 保存分析结果
|
|
# analyzer.save_analysis("video_analysis.json")
|
|
# analyzer.print_analysis()
|
|
|
|
print("分析框架已创建,等待转录文本...")
|