-
Notifications
You must be signed in to change notification settings - Fork 11
/
googlesub.py
executable file
·113 lines (98 loc) · 3.75 KB
/
googlesub.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
# Coded by Sam (info@sam3.se)
# http://0xdeadcode.se
import StringIO, urllib, signal, os, inspect, optparse
from random import randint
from time import sleep
try:
import requests
except:
exit('Could not find the library requests (Python 2.x)\nPlease install it and try again.')
try:
from bs4 import BeautifulSoup
except:
exit('Could not find the library BeautifulSoup4 (Python 2.x)\nPlease install it and try again.')
def getuseragent():
return [line.rstrip('\n') for line in open('useragent_list.txt', 'r')]
try:
useragentlist = getuseragent()
print '[+] Successfully loaded %i user agent(s)' % len(useragentlist)
except:
exit('[!] Something went terribly wrong when loading the user agent list. Does the file exist?')
urls, links, subdomains = [[]] * 3
def start_query(query, useragentlist, page):
global delay
headers = {'User-Agent': useragentlist[(randint(1, len(useragentlist)))-1]}
domain_list = ['.se', '.com']
links = []
for page in xrange(1, 10):
r = requests.get('http://www.google.com/search?q=%s&safe=on&start=%i' % ( query, page), timeout = 5)
html_container = BeautifulSoup(r.text, 'lxml')
links += fix_links(html_container.find_all('a'))
if delay:
sleep(1)
if links:
return links
def fix_links(linkdata):
links = []
for link in linkdata:
try:
l = str(link).strip('\n').split('href=')[1].split('/url?q=')[1].split('"')[0]
try:
l = l.split('&sa')[0]
except:
pass
if l.find('webcache.googleusercontent.com') == -1:
l = urllib.unquote(l).decode('utf8')
links.append(str(l))
except:
pass
return links
def strip(urls, queryurl):
if len(urls):
return list(set([url.split(queryurl)[0].split('//')[1] + queryurl for url in urls]))
##################
def handler(signum, frame): # http://stackoverflow.com/questions/1112343/how-do-i-capture-sigint-in-python
global subdomains
if subdomains:
subdomains = sorted(list(set(subdomains)))
print '\n\nFound %d subdomains:\n' % len(subdomains)
print '\n'.join(subdomains)
exit('CTRL+C pressed. Terminating.')
signal.signal(signal.SIGINT, handler)
###################
print 'Google subdomain scraper by Sam\n\nGooglesub will use google dorks to find subdomains without accessing the target domain.'
filename = os.path.split(inspect.getfile(inspect.currentframe()))
parser = optparse.OptionParser('Usage: Usage: %s <args>'
'\n\nExample: python %s -u google.com -d -q 5' % (filename[1], filename[1]))
parser.add_option('-u', dest='queryurl', type='string', help='Research target')
parser.add_option('-d', dest='delay', action='store_true', help='Adds delay to the script to avoid getting captcha (optional)')
parser.add_option('-q', dest='queries', type='int', help='How many queries the script should do. Recommended: 6')
(options, args) = parser.parse_args()
queryurl = options.queryurl
delay = options.delay
queries = options.queries
if queryurl == None or queries == None:
exit(parser.print_help())
query = 'site:' + queryurl
unique = []
if delay:
print 'Estimated completion time: %d seconds' % (2*int(queries)*10)
print 'Kill it with ctrl+c or let it finish.\nQuerying Google for \'%s\'.\nNow please wait while I invade google...' % query
for num in xrange(0, int(queries)):
print 'Executing query %s of %s' % (str(num+1), str(queries))
links = start_query(query, useragentlist, num)
if links:
subdomains += strip(links, queryurl)
if subdomains:
for s in subdomains:
if s not in unique:
query += '+-site:%s' % s
unique.append(s)
if delay:
sleep(2)
subdomains = sorted(list(set(subdomains)))
print '\n##########################################\nFound %d subdomains on %s\n' % (len(subdomains), queryurl )
print '\n'.join(subdomains)
print '\n##########################################\nDone. Quitting...\n'