-
Notifications
You must be signed in to change notification settings - Fork 18
/
BIPTRecruitment.py
47 lines (41 loc) · 1.46 KB
/
BIPTRecruitment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import requests
from bs4 import BeautifulSoup
from jedis import jedis
def get_one_page_data(page, redis, table_name):
url = 'http://jobbipt.jysd.com/teachin?title=&range=0&city=&time=0&page=' + str(page)
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html5lib')
list_node = soup.find('div', attrs={'class': 'infoBox mt10'})
uls = list_node.find_all('ul', attrs={'class': 'infoList teachinList'})
for ul in uls:
lis = ul.find_all('li')
anchor = lis[0].find('a')
if anchor:
company_name = anchor.text.strip()
date = lis[4].text[0:10]
redis.save_dict(table_name, dict(
company_name=company_name,
date=date,
))
else:
pass
def get_bipt_recruitment():
# 北京石油化工学院
table_name = 'bipt_company_info'
print("北京石油化工学院开始================================")
redis = jedis.jedis()
redis.clear_list(table_name)
# 只有一页.....
max_page = 2
try:
for i in range(1, max_page):
get_one_page_data(i, redis, table_name)
print('page ' + str(i) + ' done!')
except BaseException as e:
redis.handle_error(e, table_name)
pass
redis.add_to_file(table_name)
redis.add_university(table_name)
print("北京石油化工学院Finish================================")
if __name__ == '__main__':
get_bipt_recruitment()