-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathImageDownloader.py
78 lines (59 loc) · 2.69 KB
/
ImageDownloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import datetime
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
def sanitize_filename(filename):
# Replace invalid characters with underscores
return ''.join(c if c.isalnum() or c in ['.', '_'] else '_' for c in filename)
def download_images_from_website(url, base_folder):
# Create a new folder with a timestamp
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
# Extract the domain name from the URL
domain = urlparse(url).netloc
folder_path = os.path.join(base_folder, f"images_{domain} "+timestamp)
# Send a GET request to the URL
response = requests.get(url)
# Check if the request was successful (status code 200)
if response.status_code == 200:
print("Successfully connected to the website")
# Parse the HTML content of the webpage
soup = BeautifulSoup(response.text, 'html.parser')
# Find all image tags
img_tags = soup.find_all('img')
# Create the folder if it doesn't exist
os.makedirs(folder_path, exist_ok=True)
# Download each image
for img_tag in img_tags:
src = img_tag.get('src')
if src:
# Join the URL of the webpage with the image URL
img_url = urljoin(url, src)
# Check if the URL is an absolute URL
if urlparse(img_url).scheme:
# Get the file name from the URL
file_name = os.path.basename(img_url)
# Sanitize the file name
file_name = sanitize_filename(file_name)
# Build the full path to save the image
full_path = os.path.join(folder_path, file_name)
# Send a GET request to the image URL
img_response = requests.get(img_url)
# Save the image to the specified folder
with open(full_path, 'wb') as img_file:
img_file.write(img_response.content)
print(f"Downloaded image: {file_name}")
# Check if the folder is empty
if not os.listdir(folder_path):
print("No images downloaded.")
else:
print(f"\n--DOWNLOAD COMPLETED--\nImages saved to: {os.path.abspath(folder_path)}")
else:
print(f"Failed to fetch webpage. Status code: {response.status_code}")
# --INSERT WEBSITE HERE--
website_url = ""
# --Download Location--
base_folder = ""
# Extract the domain name from the URL
domain = urlparse(website_url).netloc
download_images_from_website(website_url, base_folder)