Initial commit: skills library

- 70 skills with code and documentation
- Add .gitignore (ignore __pycache__, output/, temp/, venv/)
- Clean up test intermediates and caches
This commit is contained in:
hmo
2026-04-26 19:27:40 +08:00
commit 04db423416
861 changed files with 210414 additions and 0 deletions
@@ -0,0 +1,86 @@
import pandas as pd
import chardet
import os
def check_file_format(file_path):
"""检测文件格式和编码"""
print(f"检查文件: {file_path}")
# 检查文件扩展名
ext = os.path.splitext(file_path)[1].lower()
print(f"文件扩展名: {ext}")
if ext in [".xls", ".xlsx"]:
print("检测到Excel文件,尝试读取...")
try:
# 首先尝试读取二进制内容来判断格式
with open(file_path, "rb") as f:
header = f.read(512)
# 检查是否是二进制格式(.xls
if b"\x09\x08\x10\x00\x00\x06\x05\x00" in header or b"Workbook" in header:
print("确认是.xls (二进制) 格式")
# 尝试用xlrd读取
try:
import xlrd
workbook = xlrd.open_workbook(file_path, encoding_override="gbk")
print(f"工作表数量: {len(workbook.sheets())}")
for i, sheet in enumerate(workbook.sheets()):
print(
f"{i}: {sheet.name} ({sheet.nrows}行, {sheet.ncols}列)"
)
except:
print("使用xlrd读取失败")
elif ext == ".xlsx":
print("检测到.xlsx格式")
try:
df = pd.read_excel(file_path, sheet_name=None)
print(f"工作表数量: {len(df.keys())}")
for sheet_name, sheet_df in df.items():
print(
f" 表: {sheet_name} ({len(sheet_df)}行, {len(sheet_df.columns)}列)"
)
except Exception as e:
print(f"读取.xlsx失败: {e}")
except Exception as e:
print(f"检测Excel文件失败: {e}")
else:
# 对于文本文件,检测编码
try:
with open(file_path, "rb") as f:
raw_data = f.read(10000) # 读取前10KB用于检测
encoding_result = chardet.detect(raw_data)
print(
f"检测到编码: {encoding_result['encoding']} (置信度: {encoding_result['confidence']:.2f})"
)
# 尝试以检测到的编码读取前几行
try:
decoded_content = raw_data.decode(encoding_result["encoding"])
lines = decoded_content.split("\n")[:10] # 前10行
print("前几行内容:")
for i, line in enumerate(lines):
if line.strip():
print(
f" {i + 1}: {line[:100]}{'...' if len(line) > 100 else ''}"
)
except Exception as e:
print(f"解码失败: {e}")
except Exception as e:
print(f"检测文本文件失败: {e}")
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
check_file_format(sys.argv[1])
else:
print("用法: python check_file_format.py <file_path>")