This repository has been archived by the owner on May 26, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Scraper.py
84 lines (53 loc) · 1.89 KB
/
Scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# Importing libraries.
from bs4 import BeautifulSoup
import requests
import re
import PTN
# Scrape EZTV.
def search_eztv(name):
# Getting search engine.
resp = requests.get("https://eztv.ag/search/" + name)
soup = BeautifulSoup(resp.content, "lxml", )
# Finding show magnets.
columns=[] # Finding torrent site table format attributes.
torrents=[] # Available torrent results.
# Getting columns.
for column in soup.find_all('td',{'class': 'forum_thread_header'}) :
if column.text!='':
columns.append(column.text)
# Getting torrent info.
for record in soup.find_all('tr',{'class': 'forum_header_border'}) :
i=0
values = {} # Attribute values for specific result.
torratr = [] # General usable attributes for display by table order.
# Getting values.
for data in record.findAll('td'):
if data.text !='' and not data.text.isspace():
torratr.append(columns[i])
values.update({columns[i]:data.text.strip('\n')})
i+=1
torrents.append(values)
# Getting magnets and ignorig double magnet links.
links=[]
magnets=soup.find_all('a', {'class': 'magnet'})
if len(magnets)<=len(torrents):
i = 0
for link in soup.find_all('a', {'class': 'magnet'}):
torrents[i].update({'Magnet': link['href']})
i += 1
else:
i = 0
t = 1
for link in soup.find_all('a', {'class': 'magnet'}):
links.append(link.get('title'))
if len(links)==1 or links[i]!=links[i-1]:
torrents[t].update({'Magnet': link['href']})
t+=1
i+=1
# Getting downloads
i=0
for link in soup.find_all('a', {'class': 'download_1'}):
torrents[i].update({'Torrent': link['href']})
i+=1
# Return usable values.
return (torrents)