-
Notifications
You must be signed in to change notification settings - Fork 0
/
ProxyQueue.py
122 lines (113 loc) · 3.03 KB
/
ProxyQueue.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import urllib, urllib2, json, base64, time, binascii
import cookielib
import re
import hashlib
from Loginer import Session
import threading,traceback
import Queue
def parseProxies(con):
return con.split('\r\n')[:-1]
# rst = []
# tmp = eval(con)
# tmp = tmp['proxylist']
# keys = tmp.keys()
# for k in keys:
# rst.append(tmp[k]['ip']+':'+tmp[k]['port'])
# return rst
rawproxyf = open('rawproxy.txt', 'a')
class ProxyQueue(object):
class proxythread(threading.Thread):
def __init__(self, outer):
threading.Thread.__init__(self)
self.outer = outer
def run(self):
while 1:
rst = self.outer.getProxies()
if rst != -1:
for proxy in rst:
self.outer.proxies.put(proxy)
time.sleep(6)
def getProxies(self):
global rawproxyf
s = Session()
print 'getting proxies from :' + self.proxyAPIUrl
try:
res = s.open(self.proxyAPIUrl)
con = res.read()
if res.getcode() == 200 and len(con)>500:
tmp = parseProxies(con)
print tmp
if len(tmp)==0:
return -1
print >>rawproxyf, con
rawproxyf.flush()
print 'successfully get ('+str(len(tmp))+') proxies! '
return tmp
else:
print 'get proxy from file'
if not self.file:
self.file=open(self.filename, 'r')
tmp = []
for i in range(0,self.batchnum):
line = self.file.readline()
if not line:
self.file.close()
self.file=open(self.filename, 'r')
tmp.append(line[:-1])
print 'successfully get ('+str(len(tmp))+') proxies! '
print tmp
return tmp
except:
exstr = traceback.format_exc()
print exstr
print 'get proxy failed! got from file'
if not self.file:
self.file=open(self.filename, 'r')
tmp = []
for i in range(self.batchnum):
line = self.file.readline()
if not line:
self.file.close()
self.file=open(self.filename, 'r')
tmp.append(line)
return tmp
return -1
def __init__(self, is_thread=True, filename='goodproxy.txt'):
self.filename = filename
self.lock = threading.Lock()
self.file=None
self.batchnum = 1000
self.proxyAPIUrl = 'http://www.httpsdaili.com/api.asp?key=gcxfhbvdf&getnum='+str(self.batchnum)
# self.proxyAPIUrl = 'http://www.httpsdaili.com/api.asp?key=8819588195389&getnum='+ str(self.batchnum) +'&isp=1&area=1'
# self.proxyAPIUrl = 'http://www.yasakvar.com/apiv1/?type=json'
self.proxies = Queue.Queue(maxsize=self.batchnum)
if is_thread:
self.proxyt = self.proxythread(self)
else:
self.proxyt=None
def start(self):
self.proxyt.start()
def stop(self):
self.proxyt.stop()
def getProxy(self):
if self.proxies.empty():
self.lock.acquire()
# print 'thread : '+str(threading.current_thread())+'get lock'
# print self.proxies.empty()
if self.proxies.empty():
if self.proxyt:
# print 'start get proxy thread'
try:
self.start()
except:
pass
# print 'waiting...'
else:
rst = self.getProxies()
if rst != -1:
for proxy in rst:
self.proxies.put(proxy)
self.lock.release()
return self.proxies.get()
if __name__ == '__main__':
ProxyQueue.run()