Files
skills/multi-source-stock-query/tools/parse_holdings_correct.py
T
hmo 04db423416 Initial commit: skills library
- 70 skills with code and documentation
- Add .gitignore (ignore __pycache__, output/, temp/, venv/)
- Clean up test intermediates and caches
2026-04-26 19:27:40 +08:00

66 lines
2.3 KiB
Python

import pandas as pd
import sys
def parse_holdings_correct(file_path):
"""修正版持仓解析器 - 支持.csv和.xls格式"""
try:
# 尝试检测文件类型并用相应方式读取
if file_path.lower().endswith(".csv") or "\t" in open(file_path, "rb").read(
100
).decode("utf-8", errors="ignore"):
# 尝试作为CSV读取(制表符分隔)
try:
df = pd.read_csv(file_path, encoding="utf-8", sep="\t")
print("成功以UTF-8制表符分隔方式读取")
except:
try:
df = pd.read_csv(file_path, encoding="gbk", sep="\t")
print("成功以GBK制表符分隔方式读取")
except:
df = pd.read_csv(file_path, encoding="gb2312", sep="\t")
print("成功以GB2312制表符分隔方式读取")
elif file_path.lower().endswith(".xls"):
# 使用xlrd读取xls文件
try:
df = pd.read_excel(file_path, engine="xlrd", encoding="gbk")
print("成功以.xls格式读取")
except:
# 尝试作为制表符分隔的文本文件读取
df = pd.read_csv(file_path, sep="\t", encoding="gbk")
print("成功以制表符分隔文本格式读取.xls文件")
elif file_path.lower().endswith(".xlsx"):
df = pd.read_excel(file_path, engine="openpyxl")
print("成功以.xlsx格式读取")
else:
# 尝试作为普通CSV读取
try:
df = pd.read_csv(file_path, encoding="utf-8")
print("成功以UTF-8 CSV格式读取")
except:
df = pd.read_csv(file_path, encoding="gbk")
print("成功以GBK CSV格式读取")
print(f"数据形状: {df.shape}")
print("列名:")
for i, col in enumerate(df.columns):
print(f" {i}: {col}")
print("\n前5行数据:")
print(df.head())
return df
except Exception as e:
print(f"解析失败: {e}")
return None
if __name__ == "__main__":
if len(sys.argv) < 2:
print("用法: python parse_holdings_correct.py <file_path>")
sys.exit(1)
file_path = sys.argv[1]
parse_holdings_correct(file_path)