-
Notifications
You must be signed in to change notification settings - Fork 0
/
bilibili_class.py
83 lines (70 loc) · 3.17 KB
/
bilibili_class.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import requests
import re
import json
from contextlib import closing
from pyquery import PyQuery as pq
from requests import RequestException
class bilibili():
def __init__(self):
self.getHtmlHeaders={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q = 0.9'
}
self.downloadVideoHeaders={
'Origin': 'https://www.bilibili.com',
'Referer': 'https://www.bilibili.com/video/av625984808',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
}
#一般这里得到的网页源码和F12查看看到的不一样,因为F12开发者工具里的源码经过了浏览器的解释
def getHtml(self,url):
try:
response = requests.get(url=url, headers= self.getHtmlHeaders)
print(response.status_code)
if response.status_code == 200:
return response.text
except RequestException:
print('请求Html错误:')
def parseHtml(self,html):
#用pq解析得到视频标题
doc = pq(html)
video_title = doc('#viewbox_report > h1 > span').text()
#用正则、json得到视频url;用pq失败后的无奈之举
pattern = r'\<script\>window\.__playinfo__=(.*?)\</script\>'
result = re.findall(pattern, html)[0]
temp = json.loads(result)
#temp['durl']是一个列表,里面有很多字典
#video_url = temp['durl']
for item in temp['durl']:
if 'url' in item.keys():
video_url = item['url']
#print(video_url)
return{
'title': video_title,
'url': video_url
}
def download_video(self,video):
title = re.sub(r'[\/:*?"<>|]', '-', video['title']) # 去掉创建文件时的非法字符
url = video['url']
filename = title +'.flv'
with open(filename, "wb") as f:
f.write(requests.get(url=url, headers=self.downloadVideoHeaders, stream=True, verify=False).content)
#closing适用于提供了 close() 实现的对象,比如网络连接、数据库连接
# with closing(requests.get(video['url'], headers=self.downloadVideoHeaders, stream=True, verify=False)) as res:
# if res.status_code == 200:
# with open(filename, "wb") as f:
# for chunk in res.iter_content(chunk_size=1024):
# if chunk:
# f.write(chunk)
def run(self,url):
self.download_video(self.parseHtml(self.getHtml(url)))
url = 'https://www.bilibili.com/video/av625984808'
def getVideo_title(url):
html = bilibili().getHtml(url)
doc = pq(html)
video_title = doc('#viewbox_report > h1 > span').text()
return video_title
if __name__ == '__main__':
url = 'https://www.bilibili.com/video/av625984808'
bilibili().run(url)