forked from xiangxiangtk/dork-scanner
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dork-scanner.py
137 lines (115 loc) · 4.31 KB
/
dork-scanner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import requests, re, sys, time
from bs4 import BeautifulSoup
from functools import partial
from multiprocess import Pool, TimeoutError, cpu_count
class SearchEngine():
def __init__(self, name):
self.name = name
class Google(SearchEngine):
def search_for(self, string, start):
urls = []
payload = { 'q' : string, 'start' : start }
headers = { 'User-agent' : 'Mozilla/11.0' }
req = requests.get( 'http://www.google.com/search',payload, headers = headers )
soup = BeautifulSoup( req.text, 'html.parser' )
h3tags = soup.find_all( 'h3', class_='r' )
for h3 in h3tags:
try:
urls.append( re.search('url\?q=(.+?)\&sa', h3.a['href']).group(1) )
except:
continue
return urls
class Bing(SearchEngine):
def search_for(self, string, start):
urls = []
payload = { 'q' : string, 'first' : start }
headers = { 'User-agent' : 'Mozilla/11.0' }
req = requests.get( 'https://www.bing.com/search',payload, headers = headers )
soup = BeautifulSoup( req.text, 'html.parser' )
h3tags = soup.find_all( 'li', class_='b_algo' )
for h3 in h3tags:
try:
urls.append(h3.find('a').attrs['href'])
except:
continue
return urls
class Baidu(SearchEngine):
def search_for(self, string, start):
urls = []
payload = { 'wd' : string, 'pn' : start }
headers = { 'User-agent' : 'Mozilla/11.0' }
req = requests.get( 'http://www.baidu.com/s',payload, headers = headers)
soup = BeautifulSoup( req.text, 'html.parser' )
h3tags = soup.find_all( 'h3', class_='t' )
for url in soup.find_all('h3', class_='t'):
try:
urlu = url.find('a').attrs['href']
link = requests.get(urlu)
urls.append(link.url)
except:
continue
return urls
def printf(lista):
for i in lista:
link = str(i)
ch = link.replace("%3F", "?")
ch2 = ch.replace("%3D","=")
print( " " + ch2 )
def export_to_txt(urls):
with open('file.txt','w') as file:
for item in urls:
print>>file, item
def main():
usage = """ Usage:
Mass_Exploiter.py <search> <engine> <pages> <processes>
<search> String to be searched for
<engine> Search engine to be used
<pages> Number of pages to search in
<processes> Number of parallel processes"""
try:
string = sys.argv[1]
engine = sys.argv[2]
page = sys.argv[3]
proc = int( sys.argv[4] )
if string.lower() == "-h" or string.lower() == "--help":
print(usage)
exit(0)
except:
print(" * * Error * * Arguments missing")
print("\n"+usage)
exit(0)
start_time = time.time()
result = []
pages = []
for p in range( 0, int(page)):
pages.append(p*10)
p = Pool(proc)
print "#################################################"
print "Searching for: "+str(string)+" in "+str(page)+" page(s) of "+str(engine)+" with "+str(proc)+" process(es)"
print "#################################################"
print "\n"
if engine == "google":
search = Google(engine)
request = partial( search.search_for, string )
all = p.map(request, pages)
elif engine == "bing":
search = Bing(engine)
request = partial( search.search_for, string )
all = p.map(request, pages)
elif engine == "baidu":
search = Baidu(engine)
request = partial( search.search_for, string )
all = p.map(request, pages)
else:
pass
for p in all:
result += [ u for u in p]
printf( set( result ) )
export_to_txt(result)
print "\n"
print "#################################################"
print( " Number of urls : " + str( len( result ) ) )
print( " Finished in : " + str( int( time.time() - start_time ) ) + "s")
print "#################################################"
if __name__ == '__main__':
main()