fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
485 lines
13 KiB
Python
485 lines
13 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding:utf-8 -*-
|
|
"""
|
|
Date: 2024/2/20 14:00
|
|
Desc: 电影票房数据
|
|
https://ys.endata.cn/BoxOffice/Movie
|
|
"""
|
|
|
|
import datetime
|
|
import json
|
|
import os
|
|
|
|
import pandas as pd
|
|
import requests
|
|
import py_mini_racer
|
|
|
|
|
|
def _get_js_path(name: str = "", module_file: str = "") -> str:
|
|
"""
|
|
get JS file path
|
|
:param name: file name
|
|
:type name: str
|
|
:param module_file: filename
|
|
:type module_file: str
|
|
:return: 路径
|
|
:rtype: str
|
|
"""
|
|
module_folder = os.path.abspath(os.path.dirname(os.path.dirname(module_file)))
|
|
module_json_path = os.path.join(module_folder, "movie", name)
|
|
return module_json_path
|
|
|
|
|
|
def _get_file_content(file_name: str = "jm.js"):
|
|
"""
|
|
read the file content
|
|
:param file_name: filename
|
|
:type file_name: str
|
|
:return: file content
|
|
:rtype: str
|
|
"""
|
|
setting_file_name = file_name
|
|
setting_file_path = _get_js_path(setting_file_name, __file__)
|
|
with open(setting_file_path) as f:
|
|
file_data = f.read()
|
|
return file_data
|
|
|
|
|
|
def get_current_week(date: str = "20201019") -> datetime.date:
|
|
"""
|
|
当前周的周一
|
|
:param date: 具体的日期
|
|
:type date: str
|
|
:return: 当前周的周一
|
|
:rtype: datetime.date
|
|
"""
|
|
monday = datetime.datetime.strptime(date, "%Y%m%d").date()
|
|
one_day = datetime.timedelta(days=1)
|
|
while monday.weekday() != 0:
|
|
monday -= one_day
|
|
return monday
|
|
|
|
|
|
def decrypt(origin_data: str = "") -> str:
|
|
"""
|
|
解密
|
|
:param origin_data: 解密前的字符串
|
|
:type origin_data: str
|
|
:return: 解密后的字符串
|
|
:rtype: str
|
|
"""
|
|
file_data = _get_file_content(file_name="jm.js")
|
|
ctx = py_mini_racer.MiniRacer()
|
|
ctx.eval(file_data)
|
|
data = ctx.call("webInstace.shell", origin_data)
|
|
return data
|
|
|
|
|
|
def movie_boxoffice_realtime() -> pd.DataFrame:
|
|
"""
|
|
电影票房-实时票房
|
|
https://ys.endata.cn/BoxOffice/Movie
|
|
:return: 实时票房数据
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
today = datetime.datetime.today().date().strftime("%Y%m%d")
|
|
url = "https://www.endata.com.cn/API/GetData.ashx"
|
|
payload = {
|
|
"showDate": "",
|
|
"tdate": f"{today[:4]}-{today[4:6]}-{today[6:]}",
|
|
"MethodName": "BoxOffice_GetHourBoxOffice",
|
|
}
|
|
r = requests.post(url, data=payload)
|
|
r.encoding = "utf8"
|
|
data_json = json.loads(decrypt(r.text))
|
|
temp_df = pd.DataFrame(data_json["Data"]["Table1"])
|
|
temp_df = temp_df.iloc[:, :7]
|
|
temp_df.columns = [
|
|
"排序",
|
|
"_",
|
|
"影片名称",
|
|
"实时票房",
|
|
"累计票房",
|
|
"上映天数",
|
|
"票房占比",
|
|
]
|
|
temp_df = temp_df[
|
|
["排序", "影片名称", "实时票房", "票房占比", "上映天数", "累计票房"]
|
|
]
|
|
return temp_df
|
|
|
|
|
|
def movie_boxoffice_daily(date: str = "20240219") -> pd.DataFrame:
|
|
"""
|
|
电影票房-单日票房
|
|
https://www.endata.com.cn/BoxOffice/BO/Day/index.html
|
|
:param date: 只能设置当前日期的前一天的票房数据
|
|
:type date: str
|
|
:return: 每日票房
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
last_date = datetime.datetime.strptime(date, "%Y%m%d") - datetime.timedelta(days=1)
|
|
last_date = last_date.strftime("%Y%m%d")
|
|
url = "https://www.endata.com.cn/API/GetData.ashx"
|
|
payload = {
|
|
"sdate": f"{date[:4]}-{date[4:6]}-{date[6:]}",
|
|
"edate": f"{last_date[:4]}-{last_date[4:6]}-{last_date[6:]}",
|
|
"MethodName": "BoxOffice_GetDayBoxOffice",
|
|
}
|
|
r = requests.post(url, data=payload)
|
|
r.encoding = "utf8"
|
|
data_json = json.loads(decrypt(r.text))
|
|
temp_df = pd.DataFrame(data_json["Data"]["Table"])
|
|
temp_df.columns = [
|
|
"排序",
|
|
"_",
|
|
"影片名称",
|
|
"_",
|
|
"累计票房",
|
|
"平均票价",
|
|
"上映天数",
|
|
"场均人次",
|
|
"_",
|
|
"_",
|
|
"_",
|
|
"_",
|
|
"_",
|
|
"单日票房",
|
|
"环比变化",
|
|
"_",
|
|
"口碑指数",
|
|
]
|
|
temp_df = temp_df[
|
|
[
|
|
"排序",
|
|
"影片名称",
|
|
"单日票房",
|
|
"环比变化",
|
|
"累计票房",
|
|
"平均票价",
|
|
"场均人次",
|
|
"口碑指数",
|
|
"上映天数",
|
|
]
|
|
]
|
|
return temp_df
|
|
|
|
|
|
def movie_boxoffice_weekly(date: str = "20240218") -> pd.DataFrame:
|
|
"""
|
|
电影票房-单周票房
|
|
https://www.endata.com.cn/BoxOffice/BO/Week/oneWeek.html
|
|
:param date: 只能获取指定日期所在完整周的票房数据
|
|
:type date: str
|
|
:return: 单周票房
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
url = "https://www.endata.com.cn/API/GetData.ashx"
|
|
payload = {
|
|
"sdate": get_current_week(date=date).strftime("%Y-%m-%d"),
|
|
"MethodName": "BoxOffice_GetWeekInfoData",
|
|
}
|
|
r = requests.post(url, data=payload)
|
|
data_json = json.loads(decrypt(r.text))
|
|
temp_df = pd.DataFrame(data_json["Data"]["Table"])
|
|
temp_df.columns = [
|
|
"排序",
|
|
"_",
|
|
"影片名称",
|
|
"单周票房",
|
|
"累计票房",
|
|
"_",
|
|
"上映天数",
|
|
"平均票价",
|
|
"场均人次",
|
|
"环比变化",
|
|
"_",
|
|
"_",
|
|
"_",
|
|
"排名变化",
|
|
"口碑指数",
|
|
]
|
|
temp_df = temp_df[
|
|
[
|
|
"排序",
|
|
"影片名称",
|
|
"排名变化",
|
|
"单周票房",
|
|
"环比变化",
|
|
"累计票房",
|
|
"平均票价",
|
|
"场均人次",
|
|
"口碑指数",
|
|
"上映天数",
|
|
]
|
|
]
|
|
temp_df["单周票房"] = pd.to_numeric(temp_df["单周票房"], errors="coerce")
|
|
temp_df["环比变化"] = pd.to_numeric(temp_df["环比变化"], errors="coerce")
|
|
temp_df["累计票房"] = pd.to_numeric(temp_df["累计票房"], errors="coerce")
|
|
return temp_df
|
|
|
|
|
|
def movie_boxoffice_monthly(date: str = "20240218") -> pd.DataFrame:
|
|
"""
|
|
电影票房-单月票房
|
|
https://www.endata.com.cn/BoxOffice/BO/Month/oneMonth.html
|
|
:param date: 指定日期所在月份的月度票房
|
|
:type date: str
|
|
:return: 单月票房
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
url = "https://www.endata.com.cn/API/GetData.ashx"
|
|
payload = {
|
|
"startTime": f"{date[:4]}-{date[4:6]}-01",
|
|
"MethodName": "BoxOffice_GetMonthBox",
|
|
}
|
|
r = requests.post(url, data=payload)
|
|
r.encoding = "utf8"
|
|
data_json = json.loads(decrypt(r.text))
|
|
temp_df = pd.DataFrame(data_json["Data"]["Table"])
|
|
temp_df.columns = [
|
|
"排序",
|
|
"_",
|
|
"影片名称",
|
|
"月内天数",
|
|
"单月票房",
|
|
"平均票价",
|
|
"场均人次",
|
|
"月度占比",
|
|
"上映日期",
|
|
"_",
|
|
"口碑指数",
|
|
]
|
|
temp_df = temp_df[
|
|
[
|
|
"排序",
|
|
"影片名称",
|
|
"单月票房",
|
|
"月度占比",
|
|
"平均票价",
|
|
"场均人次",
|
|
"上映日期",
|
|
"口碑指数",
|
|
"月内天数",
|
|
]
|
|
]
|
|
temp_df["上映日期"] = pd.to_datetime(temp_df["上映日期"], errors="coerce").dt.date
|
|
return temp_df
|
|
|
|
|
|
def movie_boxoffice_yearly(date: str = "20240218") -> pd.DataFrame:
|
|
"""
|
|
电影票房-年度票房
|
|
https://www.endata.com.cn/BoxOffice/BO/Year/index.html
|
|
:param date: 当前日期所在年度的票房数据
|
|
:type date: str
|
|
:return: 年度票房
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
url = "https://www.endata.com.cn/API/GetData.ashx"
|
|
payload = {
|
|
"year": f"{date[:4]}",
|
|
"MethodName": "BoxOffice_GetYearInfoData",
|
|
}
|
|
r = requests.post(url, data=payload)
|
|
r.encoding = "utf8"
|
|
data_json = json.loads(decrypt(r.text))
|
|
temp_df = pd.DataFrame(data_json["Data"]["Table"])
|
|
temp_df.reset_index(inplace=True)
|
|
temp_df.columns = [
|
|
"排序",
|
|
"_",
|
|
"影片名称",
|
|
"类型",
|
|
"总票房",
|
|
"平均票价",
|
|
"场均人次",
|
|
"国家及地区",
|
|
"上映日期",
|
|
"_",
|
|
]
|
|
temp_df["排序"] = range(1, len(temp_df) + 1)
|
|
temp_df = temp_df[
|
|
[
|
|
"排序",
|
|
"影片名称",
|
|
"类型",
|
|
"总票房",
|
|
"平均票价",
|
|
"场均人次",
|
|
"国家及地区",
|
|
"上映日期",
|
|
]
|
|
]
|
|
temp_df["上映日期"] = pd.to_datetime(temp_df["上映日期"], errors="coerce").dt.date
|
|
return temp_df
|
|
|
|
|
|
def movie_boxoffice_yearly_first_week(date: str = "20201018") -> pd.DataFrame:
|
|
"""
|
|
电影票房-年度票房-年度首周票房
|
|
https://www.endata.com.cn/BoxOffice/BO/Year/firstWeek.html
|
|
:param date: 当前日期所在年度的年度首周票房票房数据
|
|
:type date: str
|
|
:return: 年度首周票房
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
url = "https://www.endata.com.cn/API/GetData.ashx"
|
|
payload = {
|
|
"year": f"{date[:4]}",
|
|
"MethodName": "BoxOffice_getYearInfo_fData",
|
|
}
|
|
r = requests.post(url, data=payload)
|
|
r.encoding = "utf8"
|
|
data_json = json.loads(decrypt(r.text))
|
|
temp_df = pd.DataFrame(data_json["Data"]["Table"])
|
|
temp_df.reset_index(inplace=True)
|
|
temp_df.columns = [
|
|
"排序",
|
|
"_",
|
|
"_",
|
|
"影片名称",
|
|
"首周票房",
|
|
"场均人次",
|
|
"上映日期",
|
|
"首周天数",
|
|
"类型",
|
|
"国家及地区",
|
|
"_",
|
|
"占总票房比重",
|
|
]
|
|
temp_df["排序"] = range(1, len(temp_df) + 1)
|
|
temp_df = temp_df[
|
|
[
|
|
"排序",
|
|
"影片名称",
|
|
"类型",
|
|
"首周票房",
|
|
"占总票房比重",
|
|
"场均人次",
|
|
"国家及地区",
|
|
"上映日期",
|
|
"首周天数",
|
|
]
|
|
]
|
|
temp_df["上映日期"] = pd.to_datetime(temp_df["上映日期"], errors="coerce").dt.date
|
|
return temp_df
|
|
|
|
|
|
def movie_boxoffice_cinema_daily(date: str = "20240219") -> pd.DataFrame:
|
|
"""
|
|
电影票房-影院票房-日票房排行
|
|
https://www.endata.com.cn/BoxOffice/BO/Cinema/day.html
|
|
:param date: 当前日期前一日的票房数据
|
|
:type date: str
|
|
:return: 影票房-影院票房-日票房排行
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
url = "https://www.endata.com.cn/API/GetData.ashx"
|
|
payload = {
|
|
"rowNum1": "1",
|
|
"rowNum2": "100",
|
|
"date": date,
|
|
"MethodName": "BoxOffice_GetCinemaDayBoxOffice",
|
|
}
|
|
r = requests.post(url, data=payload)
|
|
r.encoding = "utf8"
|
|
data_json = json.loads(decrypt(r.text))
|
|
temp_df = pd.DataFrame(data_json["Data"]["Table"])
|
|
temp_df.columns = [
|
|
"排序",
|
|
"_",
|
|
"影院名称",
|
|
"单日票房",
|
|
"单日场次",
|
|
"_",
|
|
"_",
|
|
"场均票价",
|
|
"场均人次",
|
|
"上座率",
|
|
]
|
|
temp_df = temp_df[
|
|
["排序", "影院名称", "单日票房", "单日场次", "场均人次", "场均票价", "上座率"]
|
|
]
|
|
return temp_df
|
|
|
|
|
|
def movie_boxoffice_cinema_weekly(date: str = "20240219") -> pd.DataFrame:
|
|
"""
|
|
电影票房-影院票房-周票房排行
|
|
https://www.endata.com.cn/BoxOffice/BO/Cinema/week.html
|
|
:param date: 当前日期前完整一周的票房数据
|
|
:type date: str
|
|
:return: 影票房-影院票房-轴票房排行
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
url = "https://www.endata.com.cn/API/GetData.ashx"
|
|
payload = {
|
|
"dateID": str(
|
|
datetime.date.fromisoformat(
|
|
f"{date[:4]}-{date[4:6]}-{date[6:]}"
|
|
).isocalendar()[1]
|
|
- 1
|
|
- 41
|
|
+ 1128
|
|
),
|
|
"rowNum1": "1",
|
|
"rowNum2": "100",
|
|
"MethodName": "BoxOffice_GetCinemaWeekBoxOffice",
|
|
}
|
|
r = requests.post(url, data=payload)
|
|
r.encoding = "utf8"
|
|
data_json = json.loads(decrypt(r.text))
|
|
temp_df = pd.DataFrame(data_json["Data"]["Table"])
|
|
temp_df.columns = [
|
|
"排序",
|
|
"_",
|
|
"影院名称",
|
|
"当周票房",
|
|
"_",
|
|
"单银幕票房",
|
|
"场均人次",
|
|
"单日单厅票房",
|
|
"单日单厅场次",
|
|
]
|
|
temp_df = temp_df[
|
|
[
|
|
"排序",
|
|
"影院名称",
|
|
"当周票房",
|
|
"单银幕票房",
|
|
"场均人次",
|
|
"单日单厅票房",
|
|
"单日单厅场次",
|
|
]
|
|
]
|
|
return temp_df
|
|
|
|
|
|
if __name__ == "__main__":
|
|
movie_boxoffice_realtime_df = movie_boxoffice_realtime()
|
|
print(movie_boxoffice_realtime_df)
|
|
|
|
movie_boxoffice_daily_df = movie_boxoffice_daily(date="20240219")
|
|
print(movie_boxoffice_daily_df)
|
|
|
|
movie_boxoffice_weekly_df = movie_boxoffice_weekly(date="20240218")
|
|
print(movie_boxoffice_weekly_df)
|
|
|
|
movie_boxoffice_monthly_df = movie_boxoffice_monthly(date="20240218")
|
|
print(movie_boxoffice_monthly_df)
|
|
|
|
movie_boxoffice_yearly_df = movie_boxoffice_yearly(date="20240218")
|
|
print(movie_boxoffice_yearly_df)
|
|
|
|
movie_boxoffice_yearly_first_week_df = movie_boxoffice_yearly_first_week(
|
|
date="20201018"
|
|
)
|
|
print(movie_boxoffice_yearly_first_week_df)
|
|
|
|
movie_boxoffice_cinema_daily_df = movie_boxoffice_cinema_daily(date="20240219")
|
|
print(movie_boxoffice_cinema_daily_df)
|
|
|
|
movie_boxoffice_cinema_weekly_df = movie_boxoffice_cinema_weekly(date="20240219")
|
|
print(movie_boxoffice_cinema_weekly_df)
|