Initial commit: skills library

- 70 skills with code and documentation - Add .gitignore (ignore __pycache__, output/, temp/, venv/) - Clean up test intermediates and caches
2026-04-26 19:27:40 +08:00
commit 04db423416
861 changed files with 210414 additions and 0 deletions
@@ -0,0 +1,321 @@
+"""
+视频内容分析框架
+用于分析心理学/恋爱技巧类视频内容
+"""
+
+import json
+from pathlib import Path
+
+
+class VideoContentAnalyzer:
+    def __init__(self, video_title):
+        self.video_title = video_title
+        self.analysis = {
+            "title": video_title,
+            "category": self._determine_category(),
+            "key_concepts": [],
+            "core_principles": [],
+            "practical_techniques": [],
+            "psychological_insights": [],
+            "controversial_points": [],
+            "ethical_considerations": [],
+            "key_quotes": [],
+            "summary": "",
+        }
+
+    def _determine_category(self):
+        """根据标题确定视频类别"""
+        title_lower = self.video_title.lower()
+
+        categories = {
+            "relationship_psychology": ["爱", "恋爱", "女人", "男人", "感情", "关系"],
+            "self_improvement": ["方法", "技巧", "提升", "改变"],
+            "controversial": ["脏", "强行", "套路", "操控"],
+            "educational": ["心理学", "心理", "科学", "研究"],
+        }
+
+        detected = []
+        for cat, keywords in categories.items():
+            for kw in keywords:
+                if kw in title_lower:
+                    detected.append(cat)
+                    break
+
+        return detected if detected else ["unknown"]
+
+    def analyze_transcript(self, transcript_text):
+        """分析转录文本"""
+        print(f"分析视频: {self.video_title}")
+        print(f"类别: {', '.join(self.analysis['category'])}")
+        print(f"转录长度: {len(transcript_text)} 字符")
+
+        # 提取关键概念
+        self._extract_key_concepts(transcript_text)
+
+        # 提取核心原则
+        self._extract_core_principles(transcript_text)
+
+        # 提取实用技巧
+        self._extract_practical_techniques(transcript_text)
+
+        # 提取心理学洞察
+        self._extract_psychological_insights(transcript_text)
+
+        # 识别争议点
+        self._identify_controversial_points(transcript_text)
+
+        # 伦理考量
+        self._analyze_ethical_considerations(transcript_text)
+
+        # 提取关键引述
+        self._extract_key_quotes(transcript_text)
+
+        # 生成总结
+        self._generate_summary(transcript_text)
+
+        return self.analysis
+
+    def _extract_key_concepts(self, text):
+        """提取关键概念"""
+        # 这里可以添加更复杂的NLP处理
+        concepts = []
+
+        # 简单关键词提取（实际应用中应该用更复杂的方法）
+        concept_keywords = [
+            "吸引力",
+            "价值",
+            "框架",
+            "需求感",
+            "投资",
+            "服从性",
+            "筛选",
+            "推拉",
+            "冷读",
+            "心锚",
+        ]
+
+        for concept in concept_keywords:
+            if concept in text:
+                concepts.append(concept)
+
+        self.analysis["key_concepts"] = concepts
+
+    def _extract_core_principles(self, text):
+        """提取核心原则"""
+        # 寻找原则性陈述
+        principle_indicators = [
+            "原则是",
+            "核心是",
+            "关键在于",
+            "最重要的是",
+            "本质是",
+            "根本在于",
+        ]
+        principles = []
+
+        lines = text.split("。")
+        for line in lines:
+            for indicator in principle_indicators:
+                if indicator in line:
+                    principles.append(line.strip())
+                    break
+
+        self.analysis["core_principles"] = principles[:5]  # 取前5个
+
+    def _extract_practical_techniques(self, text):
+        """提取实用技巧"""
+        technique_indicators = ["方法", "技巧", "步骤", "操作", "做法", "策略", "战术"]
+        techniques = []
+
+        lines = text.split("。")
+        for line in lines:
+            for indicator in technique_indicators:
+                if indicator in line and len(line) < 100:  # 避免太长的句子
+                    techniques.append(line.strip())
+                    break
+
+        self.analysis["practical_techniques"] = techniques[:10]  # 取前10个
+
+    def _extract_psychological_insights(self, text):
+        """提取心理学洞察"""
+        insight_indicators = [
+            "心理学",
+            "心理",
+            "潜意识",
+            "认知",
+            "情绪",
+            "动机",
+            "需求",
+            "人性",
+        ]
+        insights = []
+
+        lines = text.split("。")
+        for line in lines:
+            for indicator in insight_indicators:
+                if indicator in line:
+                    insights.append(line.strip())
+                    break
+
+        self.analysis["psychological_insights"] = insights[:8]
+
+    def _identify_controversial_points(self, text):
+        """识别争议点"""
+        controversial_indicators = [
+            "脏",
+            "强行",
+            "操控",
+            "套路",
+            "欺骗",
+            "利用",
+            "不道德",
+            "争议",
+        ]
+        points = []
+
+        lines = text.split("。")
+        for line in lines:
+            for indicator in controversial_indicators:
+                if indicator in line:
+                    points.append(line.strip())
+                    break
+
+        self.analysis["controversial_points"] = points
+
+    def _analyze_ethical_considerations(self, text):
+        """分析伦理考量"""
+        ethical_indicators = [
+            "尊重",
+            "真诚",
+            "诚实",
+            "道德",
+            "伦理",
+            "责任",
+            "伤害",
+            "欺骗",
+        ]
+        considerations = []
+
+        lines = text.split("。")
+        for line in lines:
+            for indicator in ethical_indicators:
+                if indicator in line:
+                    considerations.append(line.strip())
+                    break
+
+        self.analysis["ethical_considerations"] = considerations
+
+    def _extract_key_quotes(self, text):
+        """提取关键引述"""
+        # 寻找可能的重要陈述
+        lines = text.split("。")
+        quotes = []
+
+        for line in lines:
+            line = line.strip()
+            if len(line) > 20 and len(line) < 150:  # 适中的长度
+                # 检查是否包含重要关键词
+                important_words = [
+                    "爱",
+                    "感情",
+                    "关系",
+                    "心理",
+                    "方法",
+                    "技巧",
+                    "价值",
+                    "吸引",
+                ]
+                if any(word in line for word in important_words):
+                    quotes.append(line)
+
+        self.analysis["key_quotes"] = quotes[:5]
+
+    def _generate_summary(self, text):
+        """生成总结"""
+        # 简单的总结生成（实际应用中应该用LLM）
+        summary = f"视频《{self.video_title}》主要探讨了"
+
+        if self.analysis["key_concepts"]:
+            summary += f"关于{', '.join(self.analysis['key_concepts'][:3])}等概念"
+
+        if self.analysis["practical_techniques"]:
+            summary += f"，提出了{len(self.analysis['practical_techniques'])}个实用技巧"
+
+        if self.analysis["controversial_points"]:
+            summary += f"，其中包含一些具有争议性的观点"
+
+        if self.analysis["ethical_considerations"]:
+            summary += f"，同时也涉及伦理考量"
+
+        summary += "。"
+
+        self.analysis["summary"] = summary
+
+    def save_analysis(self, output_path):
+        """保存分析结果"""
+        with open(output_path, "w", encoding="utf-8") as f:
+            json.dump(self.analysis, f, ensure_ascii=False, indent=2)
+
+        print(f"分析结果已保存到: {output_path}")
+
+    def print_analysis(self):
+        """打印分析结果"""
+        print("\n" + "=" * 60)
+        print("视频内容分析报告")
+        print("=" * 60)
+
+        print(f"\n📺 视频标题: {self.analysis['title']}")
+        print(f"📂 类别: {', '.join(self.analysis['category'])}")
+        print(f"📝 总结: {self.analysis['summary']}")
+
+        print(f"\n🔑 关键概念 ({len(self.analysis['key_concepts'])}个):")
+        for concept in self.analysis["key_concepts"]:
+            print(f"  • {concept}")
+
+        print(f"\n🎯 核心原则 ({len(self.analysis['core_principles'])}个):")
+        for i, principle in enumerate(self.analysis["core_principles"], 1):
+            print(f"  {i}. {principle}")
+
+        print(f"\n🛠️ 实用技巧 ({len(self.analysis['practical_techniques'])}个):")
+        for i, technique in enumerate(self.analysis["practical_techniques"][:5], 1):
+            print(f"  {i}. {technique}")
+        if len(self.analysis["practical_techniques"]) > 5:
+            print(f"  ... 还有{len(self.analysis['practical_techniques']) - 5}个技巧")
+
+        print(f"\n🧠 心理学洞察 ({len(self.analysis['psychological_insights'])}个):")
+        for i, insight in enumerate(self.analysis["psychological_insights"][:3], 1):
+            print(f"  {i}. {insight}")
+
+        if self.analysis["controversial_points"]:
+            print(f"\n⚠️ 争议点 ({len(self.analysis['controversial_points'])}个):")
+            for i, point in enumerate(self.analysis["controversial_points"], 1):
+                print(f"  {i}. {point}")
+
+        if self.analysis["ethical_considerations"]:
+            print(f"\n⚖️ 伦理考量 ({len(self.analysis['ethical_considerations'])}个):")
+            for i, consideration in enumerate(
+                self.analysis["ethical_considerations"], 1
+            ):
+                print(f"  {i}. {consideration}")
+
+        if self.analysis["key_quotes"]:
+            print(f"\n💬 关键引述 ({len(self.analysis['key_quotes'])}个):")
+            for i, quote in enumerate(self.analysis["key_quotes"], 1):
+                print(f'  {i}. "{quote}"')
+
+
+# 使用示例
+if __name__ == "__main__":
+    video_title = "一个很'脏'的方法，让你喜欢的女人强行爱上你！"
+
+    analyzer = VideoContentAnalyzer(video_title)
+
+    # 这里应该读取转录文本
+    # transcript = "转录文本内容..."
+    # analysis = analyzer.analyze_transcript(transcript)
+
+    # 保存分析结果
+    # analyzer.save_analysis("video_analysis.json")
+    # analyzer.print_analysis()
+
+    print("分析框架已创建，等待转录文本...")
@@ -0,0 +1,155 @@
+"""
+快速转录方案：如果本地Whisper太慢，尝试其他方法
+"""
+
+import os
+import subprocess
+from pathlib import Path
+import json
+
+
+def extract_audio_from_video(video_path):
+    """从视频提取音频"""
+    audio_path = video_path.with_suffix(".wav")
+
+    print(f"提取音频: {video_path.name} → {audio_path.name}")
+
+    cmd = [
+        "ffmpeg",
+        "-y",
+        "-i",
+        str(video_path),
+        "-vn",
+        "-acodec",
+        "pcm_s16le",
+        "-ar",
+        "16000",
+        "-ac",
+        "1",
+        str(audio_path),
+    ]
+
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    if result.returncode != 0:
+        print(f"音频提取失败: {result.stderr}")
+        return None
+
+    print(f"✅ 音频提取完成: {audio_path.stat().st_size / 1024 / 1024:.2f} MB")
+    return audio_path
+
+
+def check_whisper_availability():
+    """检查Whisper是否可用"""
+    try:
+        import whisper
+
+        print("✅ Whisper已安装")
+        return True
+    except ImportError:
+        print("❌ Whisper未安装")
+        return False
+
+
+def transcribe_with_whisper(audio_path):
+    """使用Whisper转录"""
+    try:
+        import whisper
+
+        print("加载Whisper模型...")
+        # 使用最小的模型加快速度
+        model = whisper.load_model("tiny")
+
+        print("开始转录...")
+        result = model.transcribe(str(audio_path), language="zh")
+
+        return result
+    except Exception as e:
+        print(f"Whisper转录失败: {e}")
+        return None
+
+
+def save_transcription(result, video_path):
+    """保存转录结果"""
+    if not result:
+        return False
+
+    # 保存文本
+    txt_path = video_path.with_suffix(".txt")
+    with open(txt_path, "w", encoding="utf-8") as f:
+        f.write(result["text"])
+
+    print(f"✅ 转录文本保存到: {txt_path.name}")
+    print(f"文本长度: {len(result['text'])} 字符")
+
+    # 保存完整结果（JSON）
+    json_path = video_path.with_suffix(".json")
+    with open(json_path, "w", encoding="utf-8") as f:
+        json.dump(result, f, ensure_ascii=False, indent=2)
+
+    print(f"✅ 完整结果保存到: {json_path.name}")
+
+    # 预览
+    print("\n=== 转录预览（前500字符）===")
+    preview = result["text"][:500]
+    print(preview + "..." if len(result["text"]) > 500 else preview)
+
+    return True
+
+
+def main():
+    print("=== 视频转录程序 ===")
+
+    # 查找视频文件
+    video_dir = Path(r"D:\F\NewI\opencode\daily-workspace\temp")
+    video_files = list(video_dir.glob("*.mp4"))
+
+    if not video_files:
+        print("❌ 未找到mp4文件")
+        return
+
+    video_path = video_files[0]
+    print(f"处理视频: {video_path.name}")
+    print(f"文件大小: {video_path.stat().st_size / 1024 / 1024:.2f} MB")
+
+    # 检查是否已有转录文件
+    txt_path = video_path.with_suffix(".txt")
+    if txt_path.exists():
+        print(f"✅ 已有转录文件: {txt_path.name}")
+        with open(txt_path, "r", encoding="utf-8") as f:
+            text = f.read()
+        print(f"文本长度: {len(text)} 字符")
+        print("\n=== 现有转录预览 ===")
+        print(text[:500] + "..." if len(text) > 500 else text)
+        return
+
+    # 提取音频
+    audio_path = extract_audio_from_video(video_path)
+    if not audio_path:
+        print("❌ 无法提取音频")
+        return
+
+    # 检查Whisper
+    if not check_whisper_availability():
+        print("请安装Whisper: pip install openai-whisper")
+        return
+
+    # 转录
+    result = transcribe_with_whisper(audio_path)
+    if not result:
+        print("❌ 转录失败")
+        return
+
+    # 保存结果
+    if save_transcription(result, video_path):
+        print("\n✅ 转录完成！")
+
+        # 清理临时音频文件
+        if audio_path.exists():
+            audio_path.unlink()
+            print("临时音频文件已删除")
+    else:
+        print("❌ 保存转录结果失败")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,43 @@
+import whisper
+import os
+from pathlib import Path
+
+# 视频文件路径
+video_dir = Path(r"D:\F\NewI\opencode\daily-workspace\temp")
+video_files = list(video_dir.glob("*.mp4"))
+
+if not video_files:
+    print("未找到mp4文件")
+    exit(1)
+
+video_file = video_files[0]
+print(f"找到视频文件: {video_file.name}")
+print(f"文件大小: {video_file.stat().st_size / 1024 / 1024:.2f} MB")
+
+# 使用Whisper转录
+print("\n加载Whisper模型...")
+model = whisper.load_model("base")  # 使用base模型，速度较快
+
+print("开始转录...")
+result = model.transcribe(str(video_file), language="zh")
+
+# 保存结果
+output_file = video_file.with_suffix(".txt")
+with open(output_file, "w", encoding="utf-8") as f:
+    f.write(result["text"])
+
+print(f"\n✅ 转录完成！保存到: {output_file.name}")
+print(f"转录文本长度: {len(result['text'])} 字符")
+
+# 显示前500字符预览
+print("\n=== 转录预览（前500字符）===")
+print(result["text"][:500] + "...")
+
+# 如果有分段信息，也保存
+if "segments" in result:
+    json_file = video_file.with_suffix(".json")
+    import json
+
+    with open(json_file, "w", encoding="utf-8") as f:
+        json.dump(result, f, ensure_ascii=False, indent=2)
+    print(f"详细分段信息保存到: {json_file.name}")
@@ -0,0 +1,108 @@
+import os
+import subprocess
+import json
+import re
+from pathlib import Path
+
+# 视频文件路径
+video_path = Path(
+    r'D:\F\NewI\opencode\daily-workspace\temp\一个很"脏"的方法，让你喜欢的女人强行爱上你！.mp4'
+)
+
+print(f"处理视频: {video_path.name}")
+print(f"文件大小: {video_path.stat().st_size / 1024 / 1024:.2f} MB")
+
+# 1. 提取音频
+audio_path = video_path.with_suffix(".wav")
+print(f"\n1. 提取音频到: {audio_path.name}")
+
+ffmpeg_cmd = [
+    "ffmpeg",
+    "-y",
+    "-i",
+    str(video_path),
+    "-vn",
+    "-acodec",
+    "pcm_s16le",
+    "-ar",
+    "16000",
+    "-ac",
+    "1",
+    str(audio_path),
+]
+
+print(f"运行命令: {' '.join(ffmpeg_cmd[:4])}...")
+result = subprocess.run(ffmpeg_cmd, capture_output=True, text=True)
+
+if result.returncode != 0:
+    print(f"提取音频失败: {result.stderr}")
+    exit(1)
+
+print("✅ 音频提取完成")
+
+# 2. 检查是否安装了funasr
+print("\n2. 检查FunASR安装...")
+try:
+    import funasr
+
+    print("✅ FunASR已安装")
+except ImportError:
+    print("❌ FunASR未安装，正在安装...")
+    subprocess.run(["pip", "install", "funasr", "modelscope"], capture_output=True)
+    print("✅ FunASR安装完成")
+
+# 3. 转录音频
+print("\n3. 开始转录...")
+try:
+    from funasr import AutoModel
+
+    # 加载模型
+    print("加载Paraformer模型...")
+    model = AutoModel(
+        model="paraformer-zh",
+        vad_model="fsmn-vad",
+        punc_model="ct-punc",
+        disable_update=True,
+    )
+
+    # 转录
+    print("转录中...")
+    result = model.generate(
+        input=str(audio_path), batch_size_s=300, timestamp_granularity="sentence"
+    )
+
+    # 保存结果
+    output_path = video_path.with_suffix(".json")
+    with open(output_path, "w", encoding="utf-8") as f:
+        json.dump(result, f, ensure_ascii=False, indent=2)
+
+    print(f"✅ 转录完成，保存到: {output_path.name}")
+
+    # 显示摘要
+    print("\n转录摘要:")
+    if isinstance(result, list) and len(result) > 0:
+        full_text = ""
+        for item in result:
+            if "text" in item:
+                full_text += item["text"]
+
+        print(f"总字符数: {len(full_text)}")
+        print(f"句子数: {len(result)}")
+        print("\n前3句:")
+        for i, item in enumerate(result[:3]):
+            if "text" in item:
+                print(f"  {i + 1}. {item['text'][:100]}...")
+
+except Exception as e:
+    print(f"❌ 转录失败: {e}")
+    import traceback
+
+    traceback.print_exc()
+
+# 4. 清理临时文件
+print("\n4. 清理临时文件...")
+if audio_path.exists():
+    audio_path.unlink()
+    print("✅ 临时音频文件已删除")
+
+print("\n✅ 处理完成！")