forked from Bitwise-01/Proxies
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscraper.py
114 lines (98 loc) · 3.46 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# Date: 05/03/2018
# Author: Pure-L0G1C
# Description: Proxy scraper
from requests import get
from bs4 import BeautifulSoup as bs
class Queue(object):
def __init__(self):
self.queue = []
def put(self, item):
if not item in self.queue:
self.queue.append(item)
def get(self):
if self.qsize:
return self.queue.pop(0)
def inQueue(self, item):
return item in self.queue
@property
def qsize(self):
return len(self.queue)
class Scraper(object):
def __init__(self):
self.anony_proxis = 'https://free-proxy-list.net/anonymous-proxy.html'
self.new_proxies = 'https://free-proxy-list.net'
self.socks_proxies = 'https://socks-proxy.net'
self.ssl_proxies = 'https://sslproxies.org'
self.ip_checker = 'https://ip-api.io/json/'
self.isAlive = False
self.protocol = None
self.country = None
self.proxies = None
self.maxSize = None
self.port = None
def parse(self, proxy, ssl=False):
detail = {'ip': proxy[0].string, 'port': proxy[1].string,
'protocol': 'SSL' if ssl else proxy[4].string,
'anonymity': proxy[4 if ssl else 5].string,
'country': proxy[3].string,
'updated': proxy[7].string,
'https': proxy[6].string}
if all([self.protocol, self.country, self.port]):
if detail['protocol'].lower() == self.protocol.lower():
if detail['country'].lower() == self.country.lower():
if detail['port'] == self.port:
return detail
elif all([self.protocol, self.country]):
if detail['protocol'].lower() == self.protocol.lower():
if detail['country'].lower() == self.country.lower():
return detail
elif all([self.protocol, self.port]):
if detail['protocol'].lower() == self.protocol.lower():
if detail['port'] == self.port:
return detail
elif all([self.country, self.port]):
if detail['country'].lower() == self.country.lower():
if detail['port'].lower() == self.port:
return detail
elif self.protocol:
return None if detail['protocol'].lower() != self.protocol.lower() else detail
elif self.country:
return None if detail['country'].lower() != self.country.lower() else detail
elif self.port:
return None if detail['port'] != self.port else detail
else:
return detail
def fetch(self, url, ssl=False):
try:proxies = bs(get(url).text, 'html.parser').find('tbody').findAll('tr')
except KeyboardInterrupt:self.isAlive = False;return
except:return
for proxy in proxies:
if not self.isAlive:break
data = self.parse(proxy.findAll('td'), ssl)
if data:
if self.maxSize:
if self.proxies.qsize < self.maxSize:
self.proxies.put(data)
else:break
else:
self.proxies.put(data)
def scrape(self, size=None, port=None, protocol=None, country=None):
self.port = str(port) if port else None
self.protocol = protocol
self.country = country
self.proxies = Queue()
self.maxSize = None
self.isAlive = True
self.isAlive = True
self.maxSize = size
if protocol:
if all([protocol.lower() != 'ssl', protocol.lower() != 'socks4', protocol.lower() != 'socks5']):
print('Only Supporting SSL & Socks protocol')
return
if self.isAlive:self.fetch(self.new_proxies)
if self.isAlive:self.fetch(self.anony_proxis)
if self.isAlive:self.fetch(self.socks_proxies)
if self.isAlive:self.fetch(self.ssl_proxies, True)
proxies = self.proxies
self.proxies = Queue()
return proxies