From 75ab5475ab52f732c22c9a0c5d7c3193f25b3c5f Mon Sep 17 00:00:00 2001
From: justinzm <3907721@qq.com>
Date: Wed, 28 Sep 2022 13:53:33 +0800
Subject: [PATCH] 0.3.7
---
.idea/workspace.xml | 109 ++++++++++--------
README.md | 3 +
gopup/index/baidu_decrypt.py | 217 +++++++++++++++++++++++++++++++++++
gopup/index/errors.py | 52 +++++++++
gopup/index/index_baidu.py | 184 ++++++++++++++---------------
setup.py | 5 +-
6 files changed, 427 insertions(+), 143 deletions(-)
create mode 100644 gopup/index/baidu_decrypt.py
create mode 100644 gopup/index/errors.py
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index 6f3956f..31dcec5 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -5,11 +5,10 @@
-
-
+
-
+
@@ -72,13 +71,13 @@
"RunOnceActivity.OpenProjectViewOnStart": "true",
"RunOnceActivity.ShowReadmeOnStart": "true",
"WebServerToolWindowFactoryState": "false",
- "last_opened_file_path": "D:/dev/pythonFile/gopup",
+ "last_opened_file_path": "D:/dev/pythonFile/spider-BaiduIndex-master",
"node.js.detected.package.eslint": "true",
"node.js.detected.package.tslint": "true",
"node.js.selected.package.eslint": "(autodetect)",
"node.js.selected.package.tslint": "(autodetect)",
"nodejs_package_manager_path": "npm",
- "settings.editor.selected.configurable": "com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable"
+ "settings.editor.selected.configurable": "preferences.pluginManager"
}
}]]>
@@ -96,7 +95,7 @@
-
+
@@ -104,12 +103,12 @@
-
+
-
+
@@ -209,10 +208,10 @@
+
-
@@ -287,6 +286,8 @@
+
+
@@ -469,11 +470,6 @@
206
-
- file://$PROJECT_DIR$/gopup/index/index_baidu.py
- 355
-
-
file://$PROJECT_DIR$/gopup/economic/marco_cn.py
82
@@ -564,36 +560,6 @@
34
-
- file://$PROJECT_DIR$/gopup/index/index_baidu.py
- 262
-
-
-
- file://$PROJECT_DIR$/gopup/index/index_baidu.py
- 264
-
-
-
- file://$PROJECT_DIR$/gopup/index/index_baidu.py
- 265
-
-
-
- file://$PROJECT_DIR$/gopup/index/index_baidu.py
- 266
-
-
-
- file://$PROJECT_DIR$/gopup/index/index_baidu.py
- 267
-
-
-
- file://$PROJECT_DIR$/gopup/index/index_baidu.py
- 268
-
-
file://$PROJECT_DIR$/gopup/economic/marco_cn.py
89
@@ -746,9 +712,59 @@
file://$PROJECT_DIR$/gopup/index/index_baidu.py
- 175
+ 177
+
+ file://$PROJECT_DIR$/gopup/index/index_baidu.py
+ 229
+
+
+
+ file://$PROJECT_DIR$/gopup/index/index_baidu.py
+ 231
+
+
+
+ file://$PROJECT_DIR$/gopup/index/index_baidu.py
+ 230
+
+
+
+ file://$PROJECT_DIR$/gopup/index/index_baidu.py
+ 241
+
+
+
+ file://$PROJECT_DIR$/gopup/index/index_baidu.py
+ 242
+
+
+
+ file://$PROJECT_DIR$/gopup/index/index_baidu.py
+ 275
+
+
+
+ file://$PROJECT_DIR$/gopup/index/index_baidu.py
+ 272
+
+
+
+ file://$PROJECT_DIR$/gopup/index/index_baidu.py
+ 306
+
+
+
+ file://$PROJECT_DIR$/gopup/index/baidu_decrypt.py
+ 148
+
+
+
+ file://$PROJECT_DIR$/gopup/index/index_baidu.py
+ 301
+
+
@@ -761,6 +777,7 @@
-
+
+
\ No newline at end of file
diff --git a/README.md b/README.md
index 9da604b..f397e77 100644
--- a/README.md
+++ b/README.md
@@ -134,6 +134,9 @@ print(df)
#### 版本更新
+ 0.3.7
+ 指数数据:迭代百度指数
+
0.3.6
生活数据: 老黄历
信息数据: 中国电竞价值排行榜
diff --git a/gopup/index/baidu_decrypt.py b/gopup/index/baidu_decrypt.py
new file mode 100644
index 0000000..ba3a422
--- /dev/null
+++ b/gopup/index/baidu_decrypt.py
@@ -0,0 +1,217 @@
+#!/usr/bin/env python
+# _*_ coding: utf-8 _*_
+# @Time : 2022/9/28 11:10
+# @Author : justin.郑 3907721@qq.com
+# @File : baidu_decrypt.py
+# @desc : 百度解密
+
+from errors import ErrorCode, GopupError
+from typing import List, Dict, Tuple
+from urllib.parse import urlencode, quote
+from errors import ErrorCode, GopupError
+from Crypto.Cipher import AES
+from base64 import b64encode
+import datetime
+import requests
+import json
+
+
+headers = {
+ 'Host': 'index.baidu.com',
+ 'Connection': 'keep-alive',
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36',
+}
+
+ALL_KIND = ['all', 'pc', 'wise']
+
+
+def decrypt_func(key: str, data: str) -> List[str]:
+ """
+ 数据解密方法
+ """
+ a = key
+ i = data
+ n = {}
+ s = []
+ for o in range(len(a)//2):
+ n[a[o]] = a[len(a)//2 + o]
+ for r in range(len(data)):
+ s.append(n[i[r]])
+ return ''.join(s).split(',')
+
+
+def http_get(url: str, cookies: str, cipher_text: str = "") -> str:
+ """
+ 发送get请求, 程序中所有的get都是调这个方法
+ 如果想使用多cookies抓取, 和请求重试功能
+ 在这自己添加
+ """
+ _headers = headers.copy()
+ _headers['Cookie'] = cookies
+ if cipher_text:
+ _headers["Cipher-Text"] = cipher_text
+ try:
+ response = requests.get(url, headers=_headers, timeout=30)
+ except requests.Timeout:
+ raise GopupError(ErrorCode.NETWORK_ERROR)
+ if response.status_code != 200:
+ raise GopupError(ErrorCode.NETWORK_ERROR)
+ return response.text
+
+
+def get_cipher_text(keyword: str) -> str:
+ byte_list = [
+ b"\x00", b"\x01", b"\x02", b"\x03", b"\x04", b"\x05", b"\x06", b"\x07",
+ b"\x08", b"\x09", b"\x0a", b"\x0b", b"\x0c", b"\x0d", b"\x0e", b"\x0f",
+ b"\x10"
+ ]
+ # 这个数是从acs-2057.js里写死的,但这个脚本请求时代时间戳,不确定是不是一个动态变化的脚本
+ start_time = 1652338834776
+ end_time = int(datetime.datetime.now().timestamp()*1000)
+
+ wait_encrypted_data = {
+ "ua": headers["User-Agent"],
+ "url": quote(f"https://index.baidu.com/v2/main/index.html#/trend/{keyword}?words={keyword}"),
+ "platform": "MacIntel",
+ "clientTs": end_time,
+ "version": "2.1.0"
+ }
+ password = b"yyqmyasygcwaiyaa"
+ iv = b"1234567887654321"
+ aes = AES.new(password, AES.MODE_CBC, iv)
+ wait_encrypted_str = json.dumps(wait_encrypted_data).encode()
+ filled_count = 16 - len(wait_encrypted_str) % 16
+ wait_encrypted_str += byte_list[filled_count] * filled_count
+ encrypted_str = aes.encrypt(wait_encrypted_str)
+ cipher_text = f"{start_time}_{end_time}_{b64encode(encrypted_str).decode()}"
+ return cipher_text
+
+
+def get_encrypt_json(
+ *,
+ start_date: str,
+ end_date: str,
+ keywords: List[List[str]],
+ type: str,
+ area: int,
+ cookies: str
+) -> Dict:
+ pre_url_map = {
+ 'search': 'http://index.baidu.com/api/SearchApi/index?',
+ 'live': 'http://index.baidu.com/api/LiveApi/getLive?',
+ 'news': 'http://index.baidu.com/api/NewsApi/getNewsIndex?',
+ 'feed': 'http://index.baidu.com/api/FeedSearchApi/getFeedIndex?'
+ }
+
+ pre_url = pre_url_map[type]
+ word_list = [
+ [{'name': keyword, 'wordType': 1} for keyword in keyword_list]
+ for keyword_list in keywords
+ ]
+ if type == 'live':
+ request_args = {
+ 'word': json.dumps(word_list),
+ 'region': area
+ }
+ else:
+ request_args = {
+ 'word': json.dumps(word_list),
+ 'startDate': start_date,
+ 'endDate': end_date,
+ 'area': area
+ }
+ url = pre_url + urlencode(request_args)
+ cipher_text = get_cipher_text(keywords[0][0])
+ html = http_get(url, cookies, cipher_text=cipher_text)
+ datas = json.loads(html)
+ if datas['status'] == 10000:
+ raise GopupError(ErrorCode.NO_LOGIN)
+ if datas["status"] == 10001:
+ raise GopupError(ErrorCode.REQUEST_LIMITED)
+ if datas['status'] != 0:
+ raise GopupError(ErrorCode.UNKNOWN, str(datas))
+ return datas
+
+
+def get_key(uniqid: str, cookies: str) -> str:
+ url = 'http://index.baidu.com/Interface/api/ptbk?uniqid=%s' % uniqid
+ html = http_get(url, cookies)
+ datas = json.loads(html)
+ key = datas['data']
+ return key
+
+
+def format_data(data: Dict, kind: str):
+ """
+ 格式化堆在一起的数据
+ """
+ keyword = str(data['word'])
+ start_date = datetime.datetime.strptime(data['all']['startDate'], '%Y-%m-%d')
+ end_date = datetime.datetime.strptime(data['all']['endDate'], '%Y-%m-%d')
+ date_list = []
+ while start_date <= end_date:
+ date_list.append(start_date)
+ start_date += datetime.timedelta(days=1)
+
+ # for kind in ALL_KIND:
+ index_datas = data[kind]['data']
+ for i, cur_date in enumerate(date_list):
+ try:
+ index_data = index_datas[i]
+ except IndexError:
+ index_data = ''
+ formated_data = {
+ 'keyword': [keyword_info['name'] for keyword_info in json.loads(keyword.replace('\'', '"'))][0],
+ 'type': kind,
+ 'date': cur_date.strftime('%Y-%m-%d'),
+ 'index': index_data if index_data else '0'
+ }
+ yield formated_data
+
+
+def format_data_feed(data: Dict):
+ keyword = str(data['key'])
+ start_date = datetime.datetime.strptime(data['startDate'], '%Y-%m-%d')
+ end_date = datetime.datetime.strptime(data['endDate'], '%Y-%m-%d')
+ date_list = []
+ while start_date <= end_date:
+ date_list.append(start_date)
+ start_date += datetime.timedelta(days=1)
+
+ index_datas = data['data']
+ for i, cur_date in enumerate(date_list):
+ try:
+ index_data = index_datas[i]
+ except IndexError:
+ index_data = ''
+ formated_data = {
+ 'keyword': [keyword_info['name'] for keyword_info in json.loads(keyword.replace('\'', '"'))][0],
+ 'date': cur_date.strftime('%Y-%m-%d'),
+ 'index': index_data if index_data else '0'
+ }
+ yield formated_data
+
+
+def format_data_new(data: Dict):
+ keyword = str(data['key'])
+ start_date = datetime.datetime.strptime(data['startDate'], '%Y-%m-%d')
+ end_date = datetime.datetime.strptime(data['endDate'], '%Y-%m-%d')
+ date_list = []
+ while start_date <= end_date:
+ date_list.append(start_date)
+ start_date += datetime.timedelta(days=1)
+
+ index_datas = data['data']
+ for i, cur_date in enumerate(date_list):
+ try:
+ index_data = index_datas[i]
+ except IndexError:
+ index_data = ''
+ formated_data = {
+ 'keyword': [keyword_info['name'] for keyword_info in json.loads(keyword.replace('\'', '"'))][0],
+ 'date': cur_date.strftime('%Y-%m-%d'),
+ 'index': index_data if index_data else '0'
+ }
+ yield formated_data
+
diff --git a/gopup/index/errors.py b/gopup/index/errors.py
new file mode 100644
index 0000000..ce6fc54
--- /dev/null
+++ b/gopup/index/errors.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+# _*_ coding: utf-8 _*_
+# @Time : 2022/9/28 11:03
+# @Author : justin.郑 3907721@qq.com
+# @File : errors.py
+# @desc :
+
+from enum import Enum
+
+
+class ErrorCode(int, Enum):
+ UNKNOWN = 10002
+ NETWORK_ERROR = 10003
+
+ # 百度指数
+ NO_LOGIN = 20000
+ KEYWORD_LIMITED = 20001
+ REQUEST_LIMITED = 20002
+ CHECK_KEYWORD_LIMITED = 20003
+
+ # 百度的登录
+ GET_QR_FAIL = 20010
+ LOGIN_FAIL = 20011
+ INDEX_LOGIN_FAIL = 20012
+
+ # 天眼查
+ TYC_COMPANY_COUNT_FAIL = 20020
+
+
+CODE_MSG_MAP = {
+ ErrorCode.NO_LOGIN: 'cookies失效,请重新获取cookies',
+ ErrorCode.UNKNOWN: '未知错误',
+ ErrorCode.NETWORK_ERROR: '网络错误',
+ ErrorCode.KEYWORD_LIMITED: ('关键词最多传递5个, '
+ '可以使用`from qdata.baidu_index.common import split_keywords`,'
+ '对关键词进行切分'),
+ ErrorCode.REQUEST_LIMITED: "该账号请求过于频繁, 请降低请求频率",
+ ErrorCode.CHECK_KEYWORD_LIMITED: "最多传入15个关键词",
+ ErrorCode.GET_QR_FAIL: "获取二维码失败",
+ ErrorCode.LOGIN_FAIL: "百度登录失败",
+ ErrorCode.INDEX_LOGIN_FAIL: "百度指数登录失败",
+ ErrorCode.TYC_COMPANY_COUNT_FAIL: "获取天眼查公司数量失败"
+}
+
+
+class GopupError(Exception):
+ def __init__(self, code: ErrorCode, info: str = ""):
+ self.code = code
+ self.msg = CODE_MSG_MAP.get(code) + (info and f", {info}")
+
+ def __str__(self):
+ return repr(f"ERROR-{self.code}: {self.msg}")
diff --git a/gopup/index/index_baidu.py b/gopup/index/index_baidu.py
index 2337a04..614d7c4 100644
--- a/gopup/index/index_baidu.py
+++ b/gopup/index/index_baidu.py
@@ -5,10 +5,12 @@
# @File : index_baidu.py
# @Desc : 获取百度指数
-import json
-import urllib.parse
+from gopup.index.baidu_decrypt import decrypt_func, get_encrypt_json, format_data, get_key, format_data_feed, format_data_new
import pandas as pd
import requests
+import datetime
+import json
+import math
def decrypt(t: str, e: str) -> str:
@@ -233,8 +235,8 @@ def baidu_atlas_index(word, cookie, date=None):
"word": word['word'],
"pv": word['pv'],
"ratio": word['ratio'],
- "period": data['period']
- # "sim": word['sim']
+ "period": data['period'],
+ "sim": word['sim']
}
res_list.append(tmp)
df = pd.DataFrame(res_list)
@@ -246,35 +248,32 @@ def baidu_atlas_index(word, cookie, date=None):
def baidu_search_index(word, start_date, end_date, cookie, type="all"):
# 百度搜索数据
try:
- headers = {
- "Accept": "application/json, text/plain, */*",
- "Accept-Encoding": "gzip, deflate",
- "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
- "Connection": "keep-alive",
- "Cookie": cookie,
- "Host": "index.baidu.com",
- "Referer": "http://index.baidu.com/v2/main/index.html",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"
- }
- w = '{"name":"%s","wordType":1}' % word
-
- url = 'http://index.baidu.com/api/SearchApi/index?area=0&word=[[%s]]&startDate=%s&endDate=%s' % (w, start_date, end_date)
-
- r = requests.get(url=url, headers=headers)
- data = r.json()["data"]
-
- all_data = data["userIndexes"][0][type]["data"]
- uniqid = data["uniqid"]
- ptbk = get_ptbk(uniqid, cookie)
- result = decrypt(ptbk, all_data).split(",")
- result = [int(item) if item != "" else 0 for item in result]
- temp_df_7 = pd.DataFrame(
- [pd.date_range(start=start_date, end=end_date), result],
- index=["date", word],
- ).T
- temp_df_7.index = pd.to_datetime(temp_df_7["date"])
- del temp_df_7["date"]
- return temp_df_7
+ keywords_list = [[word]]
+ encrypt_json = get_encrypt_json(
+ start_date=start_date,
+ end_date=end_date,
+ keywords=keywords_list,
+ type='search',
+ area=0,
+ cookies=cookie
+ )
+
+ encrypt_datas = encrypt_json['data']['userIndexes']
+ uniqid = encrypt_json['data']['uniqid']
+
+ result = []
+ key = get_key(uniqid, cookie)
+ for encrypt_data in encrypt_datas:
+ encrypt_data[type]['data'] = decrypt_func(key, encrypt_data[type]['data'])
+
+ for formated_data in format_data(encrypt_data, kind=type):
+ result.append(formated_data)
+ # yield formated_data
+
+ data_df = pd.DataFrame(result)
+ data_df.index = pd.to_datetime(data_df["date"])
+ del data_df["date"]
+ return data_df
except Exception as e:
return None
@@ -282,78 +281,73 @@ def baidu_search_index(word, start_date, end_date, cookie, type="all"):
def baidu_info_index(word, start_date, end_date, cookie):
# 百度资讯指数
try:
- headers = {
- "Accept": "application/json, text/plain, */*",
- "Accept-Encoding": "gzip, deflate",
- "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
- "Connection": "keep-alive",
- "Cookie": cookie,
- "Host": "index.baidu.com",
- "Referer": "http://index.baidu.com/v2/main/index.html",
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36"
- }
- w = '{"name":"%s","wordType":1}' % word
-
- url = 'http://index.baidu.com/api/FeedSearchApi/getFeedIndex?area=0&word=[[%s]]&startDate=%s&endDate=%s' % (
- w, start_date, end_date)
-
- r = requests.get(url=url, headers=headers)
- data = r.json()["data"]
- all_data = data["index"][0]["data"]
- uniqid = data["uniqid"]
- ptbk = get_ptbk(uniqid, cookie)
- result = decrypt(ptbk, all_data).split(",")
- result = [int(item) if item != "" else 0 for item in result]
- temp_df_7 = pd.DataFrame(
- [pd.date_range(start=start_date, end=end_date), result],
- index=["date", word],
- ).T
- temp_df_7.index = pd.to_datetime(temp_df_7["date"])
- del temp_df_7["date"]
- return temp_df_7
- except:
+ keywords_list = [[word]]
+ encrypt_json = get_encrypt_json(
+ start_date=start_date,
+ end_date=end_date,
+ keywords=keywords_list,
+ type='feed',
+ area=0,
+ cookies=cookie
+ )
+
+ encrypt_datas = encrypt_json['data']['index']
+ uniqid = encrypt_json['data']['uniqid']
+
+ result = []
+ key = get_key(uniqid, cookie)
+ for encrypt_data in encrypt_datas:
+ encrypt_data['data'] = decrypt_func(key, encrypt_data['data'])
+
+ for formated_data in format_data_feed(encrypt_data):
+ result.append(formated_data)
+ # yield formated_data
+
+ data_df = pd.DataFrame(result)
+ data_df.index = pd.to_datetime(data_df["date"])
+ del data_df["date"]
+ return data_df
+ except Exception as e:
return None
def baidu_media_index(word, start_date, end_date, cookie):
# 百度媒体指数
try:
- headers = {
- "Accept": "application/json, text/plain, */*",
- "Accept-Encoding": "gzip, deflate",
- "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
- "Connection": "keep-alive",
- "Cookie": cookie,
- "Host": "index.baidu.com",
- "Referer": "http://index.baidu.com/v2/main/index.html",
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36"
- }
- w = '{"name":"%s","wordType":1}' % word
-
- url = 'http://index.baidu.com/api/NewsApi/getNewsIndex?area=0&word=[[%s]]&startDate=%s&endDate=%s' % (w, start_date, end_date)
-
- r = requests.get(url=url, headers=headers)
-
- data = r.json()["data"]
- all_data = data["index"][0]["data"]
- uniqid = data["uniqid"]
- ptbk = get_ptbk(uniqid, cookie)
- result = decrypt(ptbk, all_data).split(",")
- result = [int(item) if item != "" else 0 for item in result]
- temp_df_7 = pd.DataFrame(
- [pd.date_range(start=start_date, end=end_date), result],
- index=["date", word],
- ).T
- temp_df_7.index = pd.to_datetime(temp_df_7["date"])
- del temp_df_7["date"]
- return temp_df_7
- except:
+ keywords_list = [[word]]
+ encrypt_json = get_encrypt_json(
+ start_date=start_date,
+ end_date=end_date,
+ keywords=keywords_list,
+ type='news',
+ area=0,
+ cookies=cookie
+ )
+
+ encrypt_datas = encrypt_json['data']['index']
+ uniqid = encrypt_json['data']['uniqid']
+
+ result = []
+ key = get_key(uniqid, cookie)
+ for encrypt_data in encrypt_datas:
+ encrypt_data['data'] = decrypt_func(key, encrypt_data['data'])
+
+ for formated_data in format_data_new(encrypt_data):
+ result.append(formated_data)
+ # yield formated_data
+
+ data_df = pd.DataFrame(result)
+ data_df.index = pd.to_datetime(data_df["date"])
+ del data_df["date"]
+ return data_df
+ except Exception as e:
return None
if __name__ == "__main__":
- cookie = '''BIDUPSID=512FE19892358D21D38C8FC50F5F37F7; PSTM=1660901365; BAIDUID=53AD0CE37FCDB36D9D1B39A93FE374F4:SL=0:NR=10:FG=1; Hm_up_d101ea4d2a5c67dab98251f0b5de24dc=%7B%22uid_%22%3A%7B%22value%22%3A%22334753876%22%2C%22scope%22%3A1%7D%7D; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; MCITY=-218%3A; BA_HECTOR=aga0a42lalak218laha5hmc01hi0at619; BAIDUID_BFESS=53AD0CE37FCDB36D9D1B39A93FE374F4:SL=0:NR=10:FG=1; ZFY=aW7:BiB7855FQiPLSOkRbec2PdNcv3rbOnPH5AYTKCqc:C; BDUSS=ZvNlM1RndBRDQ3bko0S0tEOWktVE02Tnd4b0R0dVZ1fmIwLWpPdjNDOWZ4a2RqSVFBQUFBJCQAAAAAAAAAAAEAAABU8PMTst24-dauw~cAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAF85IGNfOSBjfk; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%224028045868%22%2C%22first_id%22%3A%2218335e0347710fc-0e1fee1e643a0f-26021c51-1395396-18335e034789b9%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%7D%2C%22%24device_id%22%3A%2218335e0347710fc-0e1fee1e643a0f-26021c51-1395396-18335e034789b9%22%7D; delPer=0; H_PS_PSSID=37155_36552_36459_37115_37355_37299_36885_36786_37243_37260_26350_22157; PSINO=7; Hm_lvt_d101ea4d2a5c67dab98251f0b5de24dc=1661486036,1661564440,1661647128,1663127080; bdindexid=8nktvuic1kuo5dot5pkgo2q9e0; SIGNIN_UC=70a2711cf1d3d9b1a82d2f87d633bd8a04131395300fN1KlH2fo%2FP67W6DdexHNNUP3l99gFTlVBT31fwY0AeZ7JgLby0XOquez%2FgS66QIrBdlgN6%2FfxFJhYMdjaOTybrrHdz8W2BqOgOp0hXRAccayXkHgZIlByUaoaQHDKjhnDBipw083eS8hKObXIQXit1ZiFtP6XNWsK5VMlr5qHkt54hAfKRLAlmF9X7hUKZVmrSxcvI%2F2GrPWfIM9YgEajJYRsNeYN7kZiTscF99vZwMkqUipDarRfkpu0eoNbMD0dTj72fsdkmkmj8Ui6eKu8fPOpc5MwWMIPnmJxuqQJ9AHU%2BtsifCssB2AYpE2Ir4gFAg8rWsqmwl9lTmOCWZrw%3D%3D65473314357981690382511504956550; __cas__rn__=413139530; __cas__st__212=fcd26bfe42726771ecac112f095fc17d1f1584e59a13e60a039fc22b264d79132ee080e381ae1c33427295a9; __cas__id__212=42043514; CPID_212=42043514; CPTK_212=684298546; Hm_lpvt_d101ea4d2a5c67dab98251f0b5de24dc=1663127094; RT="z=1&dm=baidu.com&si=3efb8415-45d4-44df-ba4c-6af9f09d175e&ss=l812x7mj&sl=2&tt=1fw&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf"; ab_sr=1.0.1_N2RhZWI0OGUyOWQ1MmUwNzExMTUzYmRiMzllN2UxMDFiZGJlNzExZmVlY2IwNjI0YzllZWQwN2I3Mzc5ZDQ2ODlmOGUzNThiMWFlNzcxMzhlZjc5ODUxNjk3YWI3MDJiN2IzNGI1ZjY2NmQzYTg0MmQyMWYxNWMxMzJhZThjZjdiNDFjYjk2MTIzNTg3YTgxYjg4ZTM3YjY3ZmEyZDE3ZQ==; BDUSS_BFESS=ZvNlM1RndBRDQ3bko0S0tEOWktVE02Tnd4b0R0dVZ1fmIwLWpPdjNDOWZ4a2RqSVFBQUFBJCQAAAAAAAAAAAEAAABU8PMTst24-dauw~cAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAF85IGNfOSBjfk'''
- data = baidu_search_index(word="极限挑战", start_date='2022-04-01', end_date='2022-06-19', cookie=cookie)
+ cookie = '''BIDUPSID=512FE19892358D21D38C8FC50F5F37F7; PSTM=1660901365; BAIDUID=53AD0CE37FCDB36D9D1B39A93FE374F4:SL=0:NR=10:FG=1; Hm_up_d101ea4d2a5c67dab98251f0b5de24dc=%7B%22uid_%22%3A%7B%22value%22%3A%22334753876%22%2C%22scope%22%3A1%7D%7D; BDUSS=ZvNlM1RndBRDQ3bko0S0tEOWktVE02Tnd4b0R0dVZ1fmIwLWpPdjNDOWZ4a2RqSVFBQUFBJCQAAAAAAAAAAAEAAABU8PMTst24-dauw~cAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAF85IGNfOSBjfk; MCITY=-%3A; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BDSFRCVID_BFESS=G_4OJexroG06EXRj-uKarZxkLdsdTYQTDYLEOwXPsp3LGJLVgmxZEG0PtEw4Pz0bwaLOogKKLgOTHULF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF_BFESS=tb4J_KKytC-3H48k-4QEbbQH-UnLqMbdJgOZ04n-ah02Ml5y04CV5fP_KR6OaM7b-Knnhxjm3UTdsq76Wh35K5tTQP6rLtbLMNQ4KKJxbp5bMMJuD-5b-fAghUJiBM7MBan7-lRIXKohJh7FM4tW3J0ZyxomtfQxtNRJ0DnjtpChbRO4-TFKej3yjM5; BA_HECTOR=81a0ak852k2104a40k04aafq1hj5n2m18; ZFY=utya:BRNupJGq9supvasQbProo6s1q6EhqW:BvXv2orTE:C; BAIDUID_BFESS=53AD0CE37FCDB36D9D1B39A93FE374F4:SL=0:NR=10:FG=1; bdindexid=sftj5d8kfth69n1mqobrdpjmb6; Hm_lvt_d101ea4d2a5c67dab98251f0b5de24dc=1663127080,1664326055,1664326075; SIGNIN_UC=70a2711cf1d3d9b1a82d2f87d633bd8a04143385244YiyThQ5ysTqMgVj%2Fn6xC2SYZF0VE07SocZWWdU6ZnBkjQZwGAwJERGknT7CvfJjH9eFfgmYY53HOhSpOWKvK53E55xQOv5kXD1bACugrYHz26O8qmDIhSU7Tx2yGlQejq95SvYoXlAIBBDD0W0D1x4Bg4KlteVdTV7ShFG4iemhL3681G%2FEmDO8yTdxSgo7BEQKCTCoE9rr8HMGgon8hx8nJb3d4h%2FV1KYbRpLZ7J8nSjcEdPAXTt5R9QjpnNyWo89ncXTlKDH452%2BAAjYjf8DBbhTymoZo5%2FiwHxEdy8jkGAZC%2FYhHSHQureM%2B1FQwIKBaJtigI9ufAXWXqzaOWew%3D%3D22115626617237125877026469513709; __cas__rn__=414338524; __cas__st__212=12ef083e20021e38c1842b8cda8fca11a37493f871085428583d68fe518bc4dd44337f5cd54479ecf78c4360; __cas__id__212=42043514; CPID_212=42043514; CPTK_212=1578600173; RT="z=1&dm=baidu.com&si=136a97b7-5316-4e2d-87b0-ed0c2e7d1165&ss=l8kwre9b&sl=m&tt=tr2&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf"; Hm_lpvt_d101ea4d2a5c67dab98251f0b5de24dc=1664326877; ab_sr=1.0.1_ZGE0OTEyZDJjODRhOTUwMWJkMjgyZWExMTY1ODJiNjlmODg5OTQ5MWMzYzcyODhhZDYyZTE5N2ZkYWMyYjIzNjcyZjMyYjMxNTZiMTkwMDk5YjE3OTZmMDg2NDkxNTgxMDNiYjI0YzY5YjM4OTlmYjU5OWUzNDgyZTAzNTc0MzFlZDRhYjY2ZjAyNzhjYjg3ZTg4Nzc3YzdmYTY2NDYyNw==; BDUSS_BFESS=ZvNlM1RndBRDQ3bko0S0tEOWktVE02Tnd4b0R0dVZ1fmIwLWpPdjNDOWZ4a2RqSVFBQUFBJCQAAAAAAAAAAAEAAABU8PMTst24-dauw~cAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAF85IGNfOSBjfk'''
+ data = baidu_media_index(word="极限挑战", start_date='2022-09-01', end_date='2022-09-19', cookie=cookie)
+ # data = baidu_atlas_index(word="极限挑战", cookie=cookie)
print(data)
diff --git a/setup.py b/setup.py
index ba61eba..46ce62a 100644
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@
URL = 'https://github.com/justinzm/gopup'
NAME = 'gopup'
-VERSION = '0.3.6'
+VERSION = '0.3.7'
DESCRIPTION = 'GoPUP database'
if os.path.exists('README.md'):
with open('README.md', encoding='utf-8') as f:
@@ -34,7 +34,8 @@
'xlrd',
'pyexecjs',
'pyquery',
- 'tqdm'
+ 'tqdm',
+ 'pycryptodome'
]
# CONSOLE_SCRIPT = 'my-cmd=my_pkg.my_cmd:main'
# # 如果想在 pip install 之后自动生成一个可执行命令,就靠它了: