-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathsitemap.py
68 lines (54 loc) · 2.33 KB
/
sitemap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
"""
Generated by ChatGPT using the following prompt:
Create a Python script that will generate a sitemap.xml file by finding all
*.htm files in subfolders under the current root folder. The site is a static
site so all files will map directly to the published URL.
Follow up prompt:
How do I indent the XML output file?
Additional updates were made to the final ChatGPT version.
"""
import os
from xml.etree.ElementTree import Element, SubElement, ElementTree, tostring
from xml.dom import minidom
from urllib.parse import quote
# Root URL of your site
BASE_URL = "https://awesome-web-react.js.org/"
# Directory containing the .htm files (current folder in this case)
ROOT_DIR = os.getcwd()
def generate_sitemap(root_dir):
# Create the root element of the sitemap
urlset = Element('urlset', xmlns="http://www.sitemaps.org/schemas/sitemap/0.9")
# Main page
urls = [BASE_URL]
# Walk through the directories and find .htm files
for dirpath, _, filenames in os.walk(root_dir):
for filename in filenames:
if filename.endswith('.htm') and 'template' not in filename:
# Create the full path by joining the base URL and the relative file path
file_path = os.path.relpath(os.path.join(dirpath, filename), root_dir)
url = BASE_URL + quote(file_path.replace(os.path.sep, '/'))
urls.append(url)
# Add URLs to the Sitemap XML
urls.sort()
for url in urls:
url_elem = SubElement(urlset, 'url')
loc_elem = SubElement(url_elem, 'loc')
loc_elem.text = url
# Convert the XML tree to a string
print(f'Added {len(urls)} pages to the Sitemap')
return urlset
def save_sitemap(sitemap, output_file='sitemap.xml'):
# Create an ElementTree object from the sitemap element
rough_string = tostring(sitemap, 'utf-8')
# Use minidom to pretty-print the XML
reparsed = minidom.parseString(rough_string)
pretty_xml = reparsed.toprettyxml(indent=" ")
# Write the formatted XML to the output file
with open(output_file, 'w', encoding='utf-8') as f:
f.write(pretty_xml)
if __name__ == "__main__":
# Generate the sitemap
sitemap = generate_sitemap(ROOT_DIR)
# Save it to sitemap.xml
save_sitemap(sitemap)
print(f"Sitemap has been generated and saved as 'sitemap.xml' in {ROOT_DIR}.")