-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
87 lines (70 loc) · 1.52 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import time
import sys
ur1=[]
ur2=[]
c=0
fi = open("log.txt","w")
fi.close()
fi=open("log.txt","a")
def func(siteurl):
global ur1
global ur2
global c
t="Page"+str(c)+".json"
#f2=open(t,"w")
#f2.close()
f2 = open(t,"a")
c=c+1
d = DesiredCapabilities.CHROME
d['loggingPrefs'] = { 'browser':'ALL' }
driver=webdriver.Chrome(desired_capabilities=d)
driver.get(siteurl)
for entry in driver.get_log('browser'):
fi.write(str(entry))
fi.write('\n')
ele=driver.find_elements_by_css_selector("a")
#driver.implicitly_wait(10)
#print(ele.text)
el=[]
ur=[]
for i in ele:
for j in i.text:
if j[0]>='A' and j[0]<='Z':
el.append(i.text)
el=list(set(el))
#print(el)
f2.write("{\"Pages\":[")
for i in el:
try:
#time.sleep(5)
t=driver.find_element_by_link_text(i)
t.click()
time.sleep(5)
ur.append(driver.current_url)
f2.write("{\"page\":\"")
f2.write(driver.current_url)
f2.write("\",\"Content\":\"")
#print(driver.current_url)
cont = driver.find_elements_by_css_selector('p')
for i in cont:
f2.write(i.text)
driver.back()
time.sleep(5)
f2.write("\"},")
except:
continue
f2.write("]}")
ur1=ur1+ur
ur2.append(siteurl)
#print(ur1)
ur1=list(set(ur1)-set(ur2))
print(ur1)
f2.close()
for i in ur1:
func(i)
#print(ur1)
func(sys.argv[1])
print(sys.argv[1])
fi.close()