fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
358 lines
9.9 KiB
Python
358 lines
9.9 KiB
Python
# -*- coding:utf-8 -*-
|
|
|
|
"""
|
|
获取股票分类数据接口
|
|
Created on 2015/02/01
|
|
@author: Jimmy Liu
|
|
@group : waditu
|
|
@contact: jimmysoa@sina.cn
|
|
"""
|
|
|
|
import pandas as pd
|
|
from tushare.stock import cons as ct
|
|
from tushare.stock import ref_vars as rv
|
|
import json
|
|
import re
|
|
import time
|
|
import tushare.stock.fundamental as fd
|
|
from tushare.util.netbase import Client
|
|
|
|
try:
|
|
from urllib.request import urlopen, Request
|
|
except ImportError:
|
|
from urllib2 import urlopen, Request
|
|
|
|
|
|
def get_industry_classified(standard='sina'):
|
|
"""
|
|
获取行业分类数据
|
|
Parameters
|
|
----------
|
|
standard
|
|
sina:新浪行业 sw:申万 行业
|
|
|
|
Returns
|
|
-------
|
|
DataFrame
|
|
code :股票代码
|
|
name :股票名称
|
|
c_name :行业名称
|
|
"""
|
|
if standard == 'sw':
|
|
# df = _get_type_data(ct.SINA_INDUSTRY_INDEX_URL%(ct.P_TYPE['http'],
|
|
# ct.DOMAINS['vsf'], ct.PAGES['ids_sw']))
|
|
df = pd.read_csv(ct.TSDATA_CLASS%(ct.P_TYPE['http'], ct.DOMAINS['oss'], 'industry_sw'),
|
|
dtype={'code':object})
|
|
else:
|
|
# df = _get_type_data(ct.SINA_INDUSTRY_INDEX_URL%(ct.P_TYPE['http'],
|
|
# ct.DOMAINS['vsf'], ct.PAGES['ids']))
|
|
df = pd.read_csv(ct.TSDATA_CLASS%(ct.P_TYPE['http'], ct.DOMAINS['oss'], 'industry'),
|
|
dtype={'code':object})
|
|
# data = []
|
|
# ct._write_head()
|
|
# for row in df.values:
|
|
# rowDf = _get_detail(row[0], retry_count=10, pause=0.01)
|
|
# rowDf['c_name'] = row[1]
|
|
# data.append(rowDf)
|
|
# data = pd.concat(data, ignore_index=True)
|
|
return df
|
|
|
|
|
|
def get_concept_classified():
|
|
"""
|
|
获取概念分类数据
|
|
Return
|
|
--------
|
|
DataFrame
|
|
code :股票代码
|
|
name :股票名称
|
|
c_name :概念名称
|
|
"""
|
|
df = pd.read_csv(ct.TSDATA_CLASS%(ct.P_TYPE['http'], ct.DOMAINS['oss'], 'concept'),
|
|
dtype={'code':object})
|
|
return df
|
|
|
|
|
|
def concetps():
|
|
ct._write_head()
|
|
df = _get_type_data(ct.SINA_CONCEPTS_INDEX_URL%(ct.P_TYPE['http'],
|
|
ct.DOMAINS['sf'], ct.PAGES['cpt']))
|
|
data = []
|
|
for row in df.values:
|
|
rowDf = _get_detail(row[0])
|
|
if rowDf is not None:
|
|
rowDf['c_name'] = row[1]
|
|
data.append(rowDf)
|
|
if len(data) > 0:
|
|
data = pd.concat(data, ignore_index=True)
|
|
data.to_csv('d:\\cpt.csv', index=False)
|
|
|
|
|
|
|
|
def get_concepts(src='dfcf'):
|
|
"""
|
|
获取概念板块行情数据
|
|
Return
|
|
--------
|
|
DataFrame
|
|
code :股票代码
|
|
name :股票名称
|
|
c_name :概念名称
|
|
"""
|
|
clt = Client(ct.ET_CONCEPTS_INDEX_URL%(ct.P_TYPE['http'],
|
|
ct.DOMAINS['dfcf'], _random(15)), ref='')
|
|
content = clt.gvalue()
|
|
content = content.decode('utf-8') if ct.PY3 else content
|
|
js = json.loads(content)
|
|
data = []
|
|
for row in js:
|
|
cols = row.split(',')
|
|
cs = cols[6].split('|')
|
|
arr = [cols[2], cols[3], cs[0], cs[2], cols[7], cols[9]]
|
|
data.append(arr)
|
|
df = pd.DataFrame(data, columns=['concept', 'change', 'up', 'down', 'top_code', 'top_name'])
|
|
return df
|
|
|
|
|
|
def get_area_classified():
|
|
"""
|
|
获取地域分类数据
|
|
Return
|
|
--------
|
|
DataFrame
|
|
code :股票代码
|
|
name :股票名称
|
|
area :地域名称
|
|
"""
|
|
df = fd.get_stock_basics()
|
|
df = df[['name', 'area']]
|
|
df.reset_index(inplace=True)
|
|
df = df.sort_values('area').reset_index(drop=True)
|
|
return df
|
|
|
|
|
|
def get_gem_classified():
|
|
"""
|
|
获取创业板股票
|
|
Return
|
|
--------
|
|
DataFrame
|
|
code :股票代码
|
|
name :股票名称
|
|
"""
|
|
df = fd.get_stock_basics()
|
|
df.reset_index(inplace=True)
|
|
df = df[ct.FOR_CLASSIFY_COLS]
|
|
df = df.loc[df.code.str[0] == '3']
|
|
df = df.sort_values('code').reset_index(drop=True)
|
|
return df
|
|
|
|
|
|
def get_sme_classified():
|
|
"""
|
|
获取中小板股票
|
|
Return
|
|
--------
|
|
DataFrame
|
|
code :股票代码
|
|
name :股票名称
|
|
"""
|
|
df = fd.get_stock_basics()
|
|
df.reset_index(inplace=True)
|
|
df = df[ct.FOR_CLASSIFY_COLS]
|
|
df = df.loc[df.code.str[0:3] == '002']
|
|
df = df.sort_values('code').reset_index(drop=True)
|
|
return df
|
|
|
|
def get_st_classified():
|
|
"""
|
|
获取风险警示板股票
|
|
Return
|
|
--------
|
|
DataFrame
|
|
code :股票代码
|
|
name :股票名称
|
|
"""
|
|
df = fd.get_stock_basics()
|
|
df.reset_index(inplace=True)
|
|
df = df[ct.FOR_CLASSIFY_COLS]
|
|
df = df.loc[df.name.str.contains('ST')]
|
|
df = df.sort_values('code').reset_index(drop=True)
|
|
return df
|
|
|
|
|
|
def _get_detail(tag, retry_count=3, pause=0.001):
|
|
dfc = pd.DataFrame()
|
|
p = 0
|
|
num_limit = 100
|
|
while(True):
|
|
p = p+1
|
|
for _ in range(retry_count):
|
|
time.sleep(pause)
|
|
try:
|
|
ct._write_console()
|
|
request = Request(ct.SINA_DATA_DETAIL_URL%(ct.P_TYPE['http'],
|
|
ct.DOMAINS['vsf'], ct.PAGES['jv'],
|
|
p,tag))
|
|
text = urlopen(request, timeout=10).read()
|
|
text = text.decode('gbk')
|
|
except:
|
|
pass
|
|
else:
|
|
break
|
|
reg = re.compile(r'\,(.*?)\:')
|
|
text = reg.sub(r',"\1":', text)
|
|
text = text.replace('"{symbol', '{"symbol')
|
|
text = text.replace('{symbol', '{"symbol"')
|
|
jstr = json.dumps(text)
|
|
js = json.loads(jstr)
|
|
df = pd.DataFrame(pd.read_json(js, dtype={'code':object}), columns=ct.THE_FIELDS)
|
|
# df = df[ct.FOR_CLASSIFY_B_COLS]
|
|
df = df[['code', 'name']]
|
|
dfc = pd.concat([dfc, df])
|
|
if df.shape[0] < num_limit:
|
|
return dfc
|
|
#raise IOError(ct.NETWORK_URL_ERROR_MSG)
|
|
|
|
|
|
def _get_type_data(url):
|
|
try:
|
|
request = Request(url)
|
|
data_str = urlopen(request, timeout=10).read()
|
|
data_str = data_str.decode('GBK')
|
|
data_str = data_str.split('=')[1]
|
|
data_json = json.loads(data_str)
|
|
df = pd.DataFrame([[row.split(',')[0], row.split(',')[1]] for row in data_json.values()],
|
|
columns=['tag', 'name'])
|
|
return df
|
|
except Exception as er:
|
|
print(str(er))
|
|
|
|
|
|
def get_hs300s():
|
|
"""
|
|
获取沪深300当前成份股及所占权重
|
|
Return
|
|
--------
|
|
DataFrame
|
|
code :股票代码
|
|
name :股票名称
|
|
date :日期
|
|
weight:权重
|
|
"""
|
|
try:
|
|
wt = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['http'], ct.DOMAINS['idx'],
|
|
ct.PAGES['hs300w']), usecols=[0, 4, 5, 8])
|
|
wt.columns = ct.FOR_CLASSIFY_W_COLS
|
|
wt['code'] = wt['code'].map(lambda x :str(x).zfill(6))
|
|
return wt
|
|
except Exception as er:
|
|
print(str(er))
|
|
|
|
|
|
def get_sz50s():
|
|
"""
|
|
获取上证50成份股
|
|
Return
|
|
--------
|
|
DataFrame
|
|
date :日期
|
|
code :股票代码
|
|
name :股票名称
|
|
"""
|
|
try:
|
|
df = pd.read_excel(ct.SZ_CLASSIFY_URL_FTP%(ct.P_TYPE['http'], ct.DOMAINS['idx'],
|
|
ct.PAGES['sz50b']), parse_cols=[0, 4, 5])
|
|
df.columns = ct.FOR_CLASSIFY_B_COLS
|
|
df['code'] = df['code'].map(lambda x :str(x).zfill(6))
|
|
return df
|
|
except Exception as er:
|
|
print(str(er))
|
|
|
|
|
|
def get_zz500s():
|
|
"""
|
|
获取中证500成份股
|
|
Return
|
|
--------
|
|
DataFrame
|
|
date :日期
|
|
code :股票代码
|
|
name :股票名称
|
|
weight : 权重
|
|
"""
|
|
try:
|
|
wt = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['http'], ct.DOMAINS['idx'],
|
|
ct.PAGES['zz500wt']), usecols=[0, 4, 5, 8])
|
|
wt.columns = ct.FOR_CLASSIFY_W_COLS
|
|
wt['code'] = wt['code'].map(lambda x :str(x).zfill(6))
|
|
return wt
|
|
except Exception as er:
|
|
print(str(er))
|
|
|
|
|
|
def get_terminated():
|
|
"""
|
|
获取终止上市股票列表
|
|
Return
|
|
--------
|
|
DataFrame
|
|
code :股票代码
|
|
name :股票名称
|
|
oDate:上市日期
|
|
tDate:终止上市日期
|
|
"""
|
|
try:
|
|
|
|
ref = ct.SSEQ_CQ_REF_URL%(ct.P_TYPE['http'], ct.DOMAINS['sse'])
|
|
clt = Client(rv.TERMINATED_URL%(ct.P_TYPE['http'], ct.DOMAINS['sseq'],
|
|
ct.PAGES['ssecq'], _random(5),
|
|
_random()), ref=ref, cookie=rv.MAR_SH_COOKIESTR)
|
|
lines = clt.gvalue()
|
|
lines = lines.decode('utf-8') if ct.PY3 else lines
|
|
lines = lines[19:-1]
|
|
lines = json.loads(lines)
|
|
df = pd.DataFrame(lines['result'], columns=rv.TERMINATED_T_COLS)
|
|
df.columns = rv.TERMINATED_COLS
|
|
return df
|
|
except Exception as er:
|
|
print(str(er))
|
|
|
|
|
|
def get_suspended():
|
|
"""
|
|
获取暂停上市股票列表
|
|
Return
|
|
--------
|
|
DataFrame
|
|
code :股票代码
|
|
name :股票名称
|
|
oDate:上市日期
|
|
tDate:终止上市日期
|
|
"""
|
|
try:
|
|
|
|
ref = ct.SSEQ_CQ_REF_URL%(ct.P_TYPE['http'], ct.DOMAINS['sse'])
|
|
clt = Client(rv.SUSPENDED_URL%(ct.P_TYPE['http'], ct.DOMAINS['sseq'],
|
|
ct.PAGES['ssecq'], _random(5),
|
|
_random()), ref=ref, cookie=rv.MAR_SH_COOKIESTR)
|
|
lines = clt.gvalue()
|
|
lines = lines.decode('utf-8') if ct.PY3 else lines
|
|
lines = lines[19:-1]
|
|
lines = json.loads(lines)
|
|
df = pd.DataFrame(lines['result'], columns=rv.TERMINATED_T_COLS)
|
|
df.columns = rv.TERMINATED_COLS
|
|
return df
|
|
except Exception as er:
|
|
print(str(er))
|
|
|
|
|
|
|
|
def _random(n=13):
|
|
from random import randint
|
|
start = 10**(n-1)
|
|
end = (10**n)-1
|
|
return str(randint(start, end))
|
|
|
|
|