Files
MoFin/venv/lib/python3.12/site-packages/akshare/futures/requests_fun.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

90 lines
2.8 KiB
Python

#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Date: 2023/9/15 19:00
Desc: 请求网站内容的函数: 在链接失败后可重复 20 次
"""
import time
from io import StringIO
from typing import Dict
import pandas as pd
import requests
def requests_link(
url: str,
encoding: str = "utf-8",
method: str = "get",
data: Dict = None,
headers: Dict = None,
):
"""
利用 requests 请求网站, 爬取网站内容, 如网站链接失败, 可重复爬取 20 次
:param url: string 网站地址
:param encoding: string 编码类型: "utf-8", "gbk", "gb2312"
:param method: string 访问方法: "get", "post"
:param data: dict 上传数据: 键值对
:param headers: dict 游览器请求头: 键值对
:return: requests.response 爬取返回内容: response
"""
i = 0
while True:
try:
if method == "get":
r = requests.get(url, timeout=20, headers=headers)
r.encoding = encoding
return r
elif method == "post":
r = requests.post(url, timeout=20, data=data, headers=headers)
r.encoding = encoding
return r
else:
raise ValueError("请提供正确的请求方式")
except: # noqa: E722
i += 1
print(f"{str(i)}次链接失败, 最多尝试 20 次")
time.sleep(5)
if i > 20:
return None
def pandas_read_html_link(
url: str,
encoding: str = "utf-8",
method: str = "get",
data: Dict = None,
headers: Dict = None,
):
"""
利用 pandas 提供的 read_html 函数来直接提取网页中的表格内容, 如网站链接失败, 可重复爬取 20 次
:param url: string 网站地址
:param encoding: string 编码类型: "utf-8", "gbk", "gb2312"
:param method: string 访问方法: "get", "post"
:param data: dict 上传数据: 键值对
:param headers: dict 游览器请求头: 键值对
:return: requests.response 爬取返回内容: response
"""
i = 0
while True:
try:
if method == "get":
r = requests.get(url, timeout=20, headers=headers)
r.encoding = encoding
r = pd.read_html(StringIO(r.text), encoding=encoding)
return r
elif method == "post":
r = requests.post(url, timeout=20, data=data, headers=headers)
r.encoding = encoding
r = pd.read_html(StringIO(r.text), encoding=encoding)
return r
else:
raise ValueError("请提供正确的请求方式")
except requests.exceptions.Timeout as e:
i += 1
print(f"{str(i)}次链接失败, 最多尝试20次", e)
time.sleep(5)
if i > 20:
return None