-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcaptchaDownloader.py
42 lines (32 loc) · 1 KB
/
captchaDownloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from bs4 import BeautifulSoup
import urllib.request
import numpy
import cv2
import os
URL = "https://sis.manipal.edu/{}"
OUTPUT_FOLDER = 'downloadedCaptchas'
count = 1
total = 1000
def downloadCaptchaImage(url):
global count
print("[INFO] downloading image {}/{}".format(str(count), str(total)))
url_response = urllib.request.urlopen(url)
img_array = numpy.array(bytearray(url_response.read()), dtype=numpy.uint8)
image = cv2.imdecode(img_array, -1)
if not os.path.exists(OUTPUT_FOLDER):
os.makedirs(OUTPUT_FOLDER)
path = os.path.join(OUTPUT_FOLDER, "{}.png".format(str(count).zfill(6)))
count = count + 1
cv2.imwrite(path, image)
def getCaptchaURL(source):
soup = BeautifulSoup(source, 'html.parser')
images = soup.find_all('img')
imageUrl = URL.format(images[2]['src'])
return imageUrl
def downloadCaptchas():
x = range(total)
for n in x:
resp = urllib.request.urlopen(URL.format('studlogin.aspx'))
captachaURL = getCaptchaURL(resp.read())
downloadCaptchaImage(captachaURL)
downloadCaptchas()