-
Notifications
You must be signed in to change notification settings - Fork 0
/
schedule_crawler.py
130 lines (110 loc) · 4.78 KB
/
schedule_crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import csv
import hashlib
import os
import requests
import datetime, time
import sys
from apscheduler.schedulers.blocking import BlockingScheduler
reload(sys)
sys.setdefaultencoding("utf-8")
scheduler = BlockingScheduler()
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
def get_time_stamp13(datetime_obj):
datetime_str = datetime.datetime.strftime(datetime_obj, '%Y-%m-%d %H:%M:00')
datetime_obj = datetime.datetime.strptime(datetime_str, '%Y-%m-%d %H:%M:00')
# print(datetime_obj)
date_stamp = str(int(time.mktime(datetime_obj.timetuple())))
data_microsecond = str("%06d" % datetime_obj.microsecond)[0:3]
date_stamp = date_stamp + data_microsecond
return int(date_stamp)
def get_sign(**kwargs):
'''
计算加密sign参数
:param kwargs:
:return:
'''
d = {
"Accept-APIVersion": "1.0",
"appVersionNo": "78",
"mobileBrand": "HUAWEI",
"mobileStandard": "WIFI",
"platformType": "android",
"platformVersion": "6.0",
"sign": "p@ssw0rd",
}
for kw_key in kwargs:
d[kw_key] = kwargs[kw_key]
data = ""
for key in sorted(d.keys()):
data += "&" + key + "=" + d[key]
sign_raw = data[1:]
h = hashlib.md5() # md5加密
h.update(str(sign_raw).encode("utf-8"))
sign_md5 = h.hexdigest()
return sign_md5
def get_crowding_info(call_time):
url = "http://webapp.cocc.cdmetro.cn:10080/api/realDmyjdSearch"
headers = {"platformVersion": "6.0",
"platformType": "android",
"Accept-APIVersion": "1.0",
"mobileBrand": "HUAWEI",
"mobileStandard": "WIFI",
"sign": get_sign(userId="", tokenId="", callTime=call_time), # 关键动态加密参数
"appVersionNo": "78",
"callTime": call_time,
"Content-Type": "application/x-www-form-urlencoded",
"Host": "webapp.cocc.cdmetro.cn:10080",
"Connection": "Keep-Alive",
"Accept-Encoding": "gzip",
"User-Agent": "okhttp/3.4.1"}
return requests.post(url, headers=headers)
@scheduler.scheduled_job("cron", day_of_week='*', hour='7,8,9,14, 17,18,19,20,21', minute='*/10', second='10')
def fetch_data():
'''
单元数据如下(感觉只能获得实时数据,无法获得历史数据,callTime只对sign加密有用):
{u'direction': 1, u'lineName': u'1号线', u'beginCode': u'0138', u'sectionId': u'01380137',
u'color': u'#429C38', u'remark': u'2019-05-16 15:41:18',
u'sectionName': u'广都->四河',
u'dmyjd': 2.37, u'updateTime': u'2019-05-16 15:39:54', u'timeDate': u'20190516',
u'endTimeHM': u'15:35', u'dmyjdDescr': u'舒适', u'section_state': 1, u'endTime': u'2019-05-16 15:35:00',
u'startTime': u'2019-05-16 15:20:00', u'startTimeHM': u'15:20', u'lineId': u'01', u'endCode': u'0137'}
'''
now_time = datetime.datetime.now() # 获取当前时间
call_time_str = str(get_time_stamp13(now_time)) # 转为13位时间戳
print now_time
try:
data = get_crowding_info(call_time_str).json() # 获取地铁拥堵数据
except Exception, msg:
print "{} => {}".format(now_time, msg)
else:
if not os.path.exists(os.path.join(BASE_DIR, "{date:%Y%m%d}_metro.csv".format(date=now_time))):
mode = "w"
header_flag = True
else:
mode = "a"
header_flag = False
with open(os.path.exists(os.path.join(BASE_DIR, "{date:%Y%m%d}_metro.csv".format(date=now_time))), mode) as f:
csv_writer = csv.DictWriter(f, dialect='excel', delimiter=',',
fieldnames=["crawl_date", "direction", "lineName", "beginCode", "sectionId",
"color",
"remark", "sectionName", "dmyjd", "updateTime", "timeDate",
"endTimeHM",
"dmyjdDescr",
"section_state", "endTime", "startTime", "startTimeHM", "lineId",
"endCode"])
if header_flag:
csv_writer.writeheader()
for row in data["returnData"]:
row.update({"crawl_date": now_time})
csv_writer.writerow(
{k: v.decode('utf-8').encode('gb18030') if type(v) == unicode else v for k, v in row.items()})
if __name__ == '__main__':
pass
try:
scheduler.start()
print ("statistic scheduler start success")
except (KeyboardInterrupt, SystemExit):
scheduler.shutdown()
print ("statistic scheduler start-up fail")