Initial commit: skills library

- 70 skills with code and documentation - Add .gitignore (ignore __pycache__, output/, temp/, venv/) - Clean up test intermediates and caches
2026-04-26 19:27:40 +08:00
commit 04db423416
861 changed files with 210414 additions and 0 deletions
@@ -0,0 +1,86 @@
+import pandas as pd
+import chardet
+import os
+
+
+def check_file_format(file_path):
+    """检测文件格式和编码"""
+    print(f"检查文件: {file_path}")
+
+    # 检查文件扩展名
+    ext = os.path.splitext(file_path)[1].lower()
+    print(f"文件扩展名: {ext}")
+
+    if ext in [".xls", ".xlsx"]:
+        print("检测到Excel文件，尝试读取...")
+        try:
+            # 首先尝试读取二进制内容来判断格式
+            with open(file_path, "rb") as f:
+                header = f.read(512)
+
+            # 检查是否是二进制格式（.xls）
+            if b"\x09\x08\x10\x00\x00\x06\x05\x00" in header or b"Workbook" in header:
+                print("确认是.xls (二进制) 格式")
+
+                # 尝试用xlrd读取
+                try:
+                    import xlrd
+
+                    workbook = xlrd.open_workbook(file_path, encoding_override="gbk")
+                    print(f"工作表数量: {len(workbook.sheets())}")
+                    for i, sheet in enumerate(workbook.sheets()):
+                        print(
+                            f"  表{i}: {sheet.name} ({sheet.nrows}行, {sheet.ncols}列)"
+                        )
+                except:
+                    print("使用xlrd读取失败")
+
+            elif ext == ".xlsx":
+                print("检测到.xlsx格式")
+                try:
+                    df = pd.read_excel(file_path, sheet_name=None)
+                    print(f"工作表数量: {len(df.keys())}")
+                    for sheet_name, sheet_df in df.items():
+                        print(
+                            f"  表: {sheet_name} ({len(sheet_df)}行, {len(sheet_df.columns)}列)"
+                        )
+                except Exception as e:
+                    print(f"读取.xlsx失败: {e}")
+
+        except Exception as e:
+            print(f"检测Excel文件失败: {e}")
+
+    else:
+        # 对于文本文件，检测编码
+        try:
+            with open(file_path, "rb") as f:
+                raw_data = f.read(10000)  # 读取前10KB用于检测
+                encoding_result = chardet.detect(raw_data)
+                print(
+                    f"检测到编码: {encoding_result['encoding']} (置信度: {encoding_result['confidence']:.2f})"
+                )
+
+                # 尝试以检测到的编码读取前几行
+                try:
+                    decoded_content = raw_data.decode(encoding_result["encoding"])
+                    lines = decoded_content.split("\n")[:10]  # 前10行
+                    print("前几行内容:")
+                    for i, line in enumerate(lines):
+                        if line.strip():
+                            print(
+                                f"  {i + 1}: {line[:100]}{'...' if len(line) > 100 else ''}"
+                            )
+                except Exception as e:
+                    print(f"解码失败: {e}")
+
+        except Exception as e:
+            print(f"检测文本文件失败: {e}")
+
+
+if __name__ == "__main__":
+    import sys
+
+    if len(sys.argv) > 1:
+        check_file_format(sys.argv[1])
+    else:
+        print("用法: python check_file_format.py <file_path>")
@@ -0,0 +1,38 @@
+import pandas as pd
+import sys
+import os
+
+
+def convert_xls_to_xlsx(xls_file, xlsx_file=None):
+    """将.xls文件转换为.xlsx文件"""
+    if not xlsx_file:
+        xlsx_file = os.path.splitext(xls_file)[0] + ".xlsx"
+
+    try:
+        # 尝试使用xlrd读取.xls文件
+        df = pd.read_excel(xls_file, engine="xlrd")
+
+        # 保存为.xlsx格式
+        df.to_excel(xlsx_file, index=False)
+
+        print(f"成功转换: {xls_file} -> {xlsx_file}")
+        print(f"数据形状: {df.shape}")
+        print("前几行预览:")
+        print(df.head())
+
+        return True
+
+    except Exception as e:
+        print(f"转换失败: {e}")
+        return False
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("用法: python convert_xls_to_xlsx.py <input.xls> [output.xlsx]")
+        sys.exit(1)
+
+    input_file = sys.argv[1]
+    output_file = sys.argv[2] if len(sys.argv) > 2 else None
+
+    convert_xls_to_xlsx(input_file, output_file)
@@ -0,0 +1,65 @@
+import pandas as pd
+import sys
+
+
+def parse_holdings_correct(file_path):
+    """修正版持仓解析器 - 支持.csv和.xls格式"""
+    try:
+        # 尝试检测文件类型并用相应方式读取
+        if file_path.lower().endswith(".csv") or "\t" in open(file_path, "rb").read(
+            100
+        ).decode("utf-8", errors="ignore"):
+            # 尝试作为CSV读取（制表符分隔）
+            try:
+                df = pd.read_csv(file_path, encoding="utf-8", sep="\t")
+                print("成功以UTF-8制表符分隔方式读取")
+            except:
+                try:
+                    df = pd.read_csv(file_path, encoding="gbk", sep="\t")
+                    print("成功以GBK制表符分隔方式读取")
+                except:
+                    df = pd.read_csv(file_path, encoding="gb2312", sep="\t")
+                    print("成功以GB2312制表符分隔方式读取")
+        elif file_path.lower().endswith(".xls"):
+            # 使用xlrd读取xls文件
+            try:
+                df = pd.read_excel(file_path, engine="xlrd", encoding="gbk")
+                print("成功以.xls格式读取")
+            except:
+                # 尝试作为制表符分隔的文本文件读取
+                df = pd.read_csv(file_path, sep="\t", encoding="gbk")
+                print("成功以制表符分隔文本格式读取.xls文件")
+        elif file_path.lower().endswith(".xlsx"):
+            df = pd.read_excel(file_path, engine="openpyxl")
+            print("成功以.xlsx格式读取")
+        else:
+            # 尝试作为普通CSV读取
+            try:
+                df = pd.read_csv(file_path, encoding="utf-8")
+                print("成功以UTF-8 CSV格式读取")
+            except:
+                df = pd.read_csv(file_path, encoding="gbk")
+                print("成功以GBK CSV格式读取")
+
+        print(f"数据形状: {df.shape}")
+        print("列名:")
+        for i, col in enumerate(df.columns):
+            print(f"  {i}: {col}")
+
+        print("\n前5行数据:")
+        print(df.head())
+
+        return df
+
+    except Exception as e:
+        print(f"解析失败: {e}")
+        return None
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("用法: python parse_holdings_correct.py <file_path>")
+        sys.exit(1)
+
+    file_path = sys.argv[1]
+    parse_holdings_correct(file_path)