-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcheck-links.py
151 lines (121 loc) · 5.05 KB
/
check-links.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import markdown
import re
import requests
import os
import os.path
import sys
import aiohttp
import asyncio
import json
import datetime
from colorama import Fore, Back, Style
print("DEBUGGING OUTPUT: Verify_ssl False")
EXCLUDE_FILES = os.environ.get("exclude_files")
INCLUDE_FILES = os.environ.get("include_files")
exit_code = 0
def get_include_exclude_files():
"""Parse file inclusions and exclusions from environmental variables"""
if EXCLUDE_FILES and INCLUDE_FILES:
print(f"{Back.RED}{Fore.WHITE}Please only include an inclusion or an exclusion variable in the workflow{Style.RESET_ALL}")
exit(2)
elif EXCLUDE_FILES:
print(f"{Back.BLACK}{Fore.RED}Excluding the following files:{Style.RESET_ALL}")
for filename in EXCLUDE_FILES.split(","):
print(filename)
return (EXCLUDE_FILES.split(","), [])
elif INCLUDE_FILES:
print(f"{Back.BLACK}{Fore.RED}Including only the following files:{Style.RESET_ALL}")
for filename in INCLUDE_FILES.split(","):
print(filename)
return ([], INCLUDE_FILES.split(","))
else:
return ([], [])
e_files, i_files = get_include_exclude_files()
def get_exclusion_list():
"""If there is a link exclusion list present, import it"""
try:
with open(".github/exclude_links.json") as infile:
config = json.load(infile)
print(f"{Back.CYAN}Recived the following Link Exclusion List:{Style.RESET_ALL}")
print(config)
return config
except (FileNotFoundError, json.decoder.JSONDecodeError):
print(f"{Back.CYAN}{Fore.RED}Link Exclusion List not set{Style.RESET_ALL}")
return {}
exclusion_list = get_exclusion_list()
def get_markdown_links_from_path(path):
with open(path) as f:
content = f.read()
html = markdown.markdown(content, output_format="html")
links = list(set(re.findall(r'href=[\'"]?([^\'" >]+)', html)))
links = list(filter(lambda l: l[0] != "{", links))
return links
def get_markdown_files():
"""Gets dictionary with keys being markdown files and values being a list of links in that markdown file"""
markdowns = {}
for dirpath, dirnames, filenames in os.walk("."):
if len(i_files):
files_to_check = [f for f in filenames if f in i_files]
else:
files_to_check = [f for f in filenames if f.endswith(".md")]
for filename in files_to_check:
if filename not in e_files:
path = os.path.join(dirpath, filename)
markdowns[path] = get_markdown_links_from_path(path)
return markdowns
bad_links = []
async def fetch_url(session, url, timeout=10):
try:
async with session.get(url, timeout=aiohttp.ClientTimeout(total=10), verify_ssl=False) as response:
return response.status
except:
return str(sys.exc_info()[0])
async def get_link_statuses():
markdowns = get_markdown_files()
async with aiohttp.ClientSession() as session:
for filename, links_list in markdowns.items():
for link in links_list:
if link[0] != "#":
try:
if link not in exclusion_list[filename]:
code = await shield(fetch_url(session, link))
if code != 200:
if isinstance(code, int):
print(f"{Back.RED}{Fore.WHITE}{link} returned status code: {code}{Style.RESET_ALL}")
bad_links.append((filename, link, code))
else:
print(code, dir(code))
except KeyError:
code = await fetch_url(session, link)
if code != 200:
print(f"{Back.RED}{Fore.WHITE}{link} returned status code: {code}{Style.RESET_ALL}")
bad_links.append((filename, link, code))
loop = asyncio.get_event_loop()
loop.run_until_complete(get_link_statuses())
def build_exclusion_list():
markdowns = get_markdown_files()
new_items = []
for link in bad_links:
filename = link[0]
url = link[1]
code = link[2]
if not exclusion_list.get(filename, 0):
exclusion_list[filename] = {}
if not exclusion_list[filename].get(url, 0):
item = {
"code": code,
"time": datetime.datetime.now().isoformat(),
"reason": "",
}
exclusion_list[filename][url] = item
new_items.append({url: item})
with open("exclude_links.json", "w") as outfile:
json.dump(exclusion_list, outfile, indent=2)
return new_items
new_items = build_exclusion_list()
if len(new_items):
exit_code = 1
# for item in new_items:
# print(item)
# sys.exit(exit_code)
os.environ["EXIT_CODE"] = str(exit_code)