Initial commit: skills library

- 70 skills with code and documentation
- Add .gitignore (ignore __pycache__, output/, temp/, venv/)
- Clean up test intermediates and caches
This commit is contained in:
hmo
2026-04-26 19:27:40 +08:00
commit 04db423416
861 changed files with 210414 additions and 0 deletions
@@ -0,0 +1,86 @@
import pandas as pd
import chardet
import os
def check_file_format(file_path):
"""检测文件格式和编码"""
print(f"检查文件: {file_path}")
# 检查文件扩展名
ext = os.path.splitext(file_path)[1].lower()
print(f"文件扩展名: {ext}")
if ext in [".xls", ".xlsx"]:
print("检测到Excel文件,尝试读取...")
try:
# 首先尝试读取二进制内容来判断格式
with open(file_path, "rb") as f:
header = f.read(512)
# 检查是否是二进制格式(.xls
if b"\x09\x08\x10\x00\x00\x06\x05\x00" in header or b"Workbook" in header:
print("确认是.xls (二进制) 格式")
# 尝试用xlrd读取
try:
import xlrd
workbook = xlrd.open_workbook(file_path, encoding_override="gbk")
print(f"工作表数量: {len(workbook.sheets())}")
for i, sheet in enumerate(workbook.sheets()):
print(
f"{i}: {sheet.name} ({sheet.nrows}行, {sheet.ncols}列)"
)
except:
print("使用xlrd读取失败")
elif ext == ".xlsx":
print("检测到.xlsx格式")
try:
df = pd.read_excel(file_path, sheet_name=None)
print(f"工作表数量: {len(df.keys())}")
for sheet_name, sheet_df in df.items():
print(
f" 表: {sheet_name} ({len(sheet_df)}行, {len(sheet_df.columns)}列)"
)
except Exception as e:
print(f"读取.xlsx失败: {e}")
except Exception as e:
print(f"检测Excel文件失败: {e}")
else:
# 对于文本文件,检测编码
try:
with open(file_path, "rb") as f:
raw_data = f.read(10000) # 读取前10KB用于检测
encoding_result = chardet.detect(raw_data)
print(
f"检测到编码: {encoding_result['encoding']} (置信度: {encoding_result['confidence']:.2f})"
)
# 尝试以检测到的编码读取前几行
try:
decoded_content = raw_data.decode(encoding_result["encoding"])
lines = decoded_content.split("\n")[:10] # 前10行
print("前几行内容:")
for i, line in enumerate(lines):
if line.strip():
print(
f" {i + 1}: {line[:100]}{'...' if len(line) > 100 else ''}"
)
except Exception as e:
print(f"解码失败: {e}")
except Exception as e:
print(f"检测文本文件失败: {e}")
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
check_file_format(sys.argv[1])
else:
print("用法: python check_file_format.py <file_path>")
@@ -0,0 +1,38 @@
import pandas as pd
import sys
import os
def convert_xls_to_xlsx(xls_file, xlsx_file=None):
"""将.xls文件转换为.xlsx文件"""
if not xlsx_file:
xlsx_file = os.path.splitext(xls_file)[0] + ".xlsx"
try:
# 尝试使用xlrd读取.xls文件
df = pd.read_excel(xls_file, engine="xlrd")
# 保存为.xlsx格式
df.to_excel(xlsx_file, index=False)
print(f"成功转换: {xls_file} -> {xlsx_file}")
print(f"数据形状: {df.shape}")
print("前几行预览:")
print(df.head())
return True
except Exception as e:
print(f"转换失败: {e}")
return False
if __name__ == "__main__":
if len(sys.argv) < 2:
print("用法: python convert_xls_to_xlsx.py <input.xls> [output.xlsx]")
sys.exit(1)
input_file = sys.argv[1]
output_file = sys.argv[2] if len(sys.argv) > 2 else None
convert_xls_to_xlsx(input_file, output_file)
@@ -0,0 +1,65 @@
import pandas as pd
import sys
def parse_holdings_correct(file_path):
"""修正版持仓解析器 - 支持.csv和.xls格式"""
try:
# 尝试检测文件类型并用相应方式读取
if file_path.lower().endswith(".csv") or "\t" in open(file_path, "rb").read(
100
).decode("utf-8", errors="ignore"):
# 尝试作为CSV读取(制表符分隔)
try:
df = pd.read_csv(file_path, encoding="utf-8", sep="\t")
print("成功以UTF-8制表符分隔方式读取")
except:
try:
df = pd.read_csv(file_path, encoding="gbk", sep="\t")
print("成功以GBK制表符分隔方式读取")
except:
df = pd.read_csv(file_path, encoding="gb2312", sep="\t")
print("成功以GB2312制表符分隔方式读取")
elif file_path.lower().endswith(".xls"):
# 使用xlrd读取xls文件
try:
df = pd.read_excel(file_path, engine="xlrd", encoding="gbk")
print("成功以.xls格式读取")
except:
# 尝试作为制表符分隔的文本文件读取
df = pd.read_csv(file_path, sep="\t", encoding="gbk")
print("成功以制表符分隔文本格式读取.xls文件")
elif file_path.lower().endswith(".xlsx"):
df = pd.read_excel(file_path, engine="openpyxl")
print("成功以.xlsx格式读取")
else:
# 尝试作为普通CSV读取
try:
df = pd.read_csv(file_path, encoding="utf-8")
print("成功以UTF-8 CSV格式读取")
except:
df = pd.read_csv(file_path, encoding="gbk")
print("成功以GBK CSV格式读取")
print(f"数据形状: {df.shape}")
print("列名:")
for i, col in enumerate(df.columns):
print(f" {i}: {col}")
print("\n前5行数据:")
print(df.head())
return df
except Exception as e:
print(f"解析失败: {e}")
return None
if __name__ == "__main__":
if len(sys.argv) < 2:
print("用法: python parse_holdings_correct.py <file_path>")
sys.exit(1)
file_path = sys.argv[1]
parse_holdings_correct(file_path)