04db423416
- 70 skills with code and documentation - Add .gitignore (ignore __pycache__, output/, temp/, venv/) - Clean up test intermediates and caches
66 lines
2.3 KiB
Python
66 lines
2.3 KiB
Python
import pandas as pd
|
|
import sys
|
|
|
|
|
|
def parse_holdings_correct(file_path):
|
|
"""修正版持仓解析器 - 支持.csv和.xls格式"""
|
|
try:
|
|
# 尝试检测文件类型并用相应方式读取
|
|
if file_path.lower().endswith(".csv") or "\t" in open(file_path, "rb").read(
|
|
100
|
|
).decode("utf-8", errors="ignore"):
|
|
# 尝试作为CSV读取(制表符分隔)
|
|
try:
|
|
df = pd.read_csv(file_path, encoding="utf-8", sep="\t")
|
|
print("成功以UTF-8制表符分隔方式读取")
|
|
except:
|
|
try:
|
|
df = pd.read_csv(file_path, encoding="gbk", sep="\t")
|
|
print("成功以GBK制表符分隔方式读取")
|
|
except:
|
|
df = pd.read_csv(file_path, encoding="gb2312", sep="\t")
|
|
print("成功以GB2312制表符分隔方式读取")
|
|
elif file_path.lower().endswith(".xls"):
|
|
# 使用xlrd读取xls文件
|
|
try:
|
|
df = pd.read_excel(file_path, engine="xlrd", encoding="gbk")
|
|
print("成功以.xls格式读取")
|
|
except:
|
|
# 尝试作为制表符分隔的文本文件读取
|
|
df = pd.read_csv(file_path, sep="\t", encoding="gbk")
|
|
print("成功以制表符分隔文本格式读取.xls文件")
|
|
elif file_path.lower().endswith(".xlsx"):
|
|
df = pd.read_excel(file_path, engine="openpyxl")
|
|
print("成功以.xlsx格式读取")
|
|
else:
|
|
# 尝试作为普通CSV读取
|
|
try:
|
|
df = pd.read_csv(file_path, encoding="utf-8")
|
|
print("成功以UTF-8 CSV格式读取")
|
|
except:
|
|
df = pd.read_csv(file_path, encoding="gbk")
|
|
print("成功以GBK CSV格式读取")
|
|
|
|
print(f"数据形状: {df.shape}")
|
|
print("列名:")
|
|
for i, col in enumerate(df.columns):
|
|
print(f" {i}: {col}")
|
|
|
|
print("\n前5行数据:")
|
|
print(df.head())
|
|
|
|
return df
|
|
|
|
except Exception as e:
|
|
print(f"解析失败: {e}")
|
|
return None
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) < 2:
|
|
print("用法: python parse_holdings_correct.py <file_path>")
|
|
sys.exit(1)
|
|
|
|
file_path = sys.argv[1]
|
|
parse_holdings_correct(file_path)
|