-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathdltool.py
300 lines (273 loc) · 11.9 KB
/
dltool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
import os
import re
import math
import signal
import argparse
import datetime
import platform
import requests
import textwrap
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup
from progressbar import ProgressBar, Bar, ETA, FileTransferSpeed, Percentage, DataSize
#Define constants
#Myrient HTTP-server addresses
MYRIENTHTTPADDR = 'https://myrient.erista.me/files/'
#Catalog URLs, to parse out the catalog in use from DAT
CATALOGURLS = {
'https://www.no-intro.org': 'No-Intro',
'http://redump.org/': 'Redump'
}
#Postfixes in DATs to strip away
DATPOSTFIXES = [
' (Retool)'
]
#Chunk sizes to download
CHUNKSIZE = 8192
#Headers to use in HTTP-requests
REQHEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'
}
#Print output function
def logger(str, color=None, rewrite=False):
colors = {'red': '\033[91m', 'green': '\033[92m', 'yellow': '\033[93m', 'cyan': '\033[96m'}
if rewrite:
print('\033[1A', end='\x1b[2K')
if color:
print(f'{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")} | {colors[color]}{str}\033[00m')
else:
print(f'{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")} | {str}')
#Input request function
def inputter(str, color=None):
colors = {'red': '\033[91m', 'green': '\033[92m', 'yellow': '\033[93m', 'cyan': '\033[96m'}
if color:
val = input(f'{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")} | {colors[color]}{str}\033[00m')
else:
val = input(f'{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")} | {str}')
return val
#Scale file size
def scale1024(val):
prefixes=['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']
if val <= 0:
power = 0
else:
power = min(int(math.log(val, 2) / 10), len(prefixes) - 1)
scaled = float(val) / (2 ** (10 * power))
unit = prefixes[power]
return scaled, unit
#Exit handler function
def exithandler(signum, frame):
logger('Exiting script!', 'red')
exit()
signal.signal(signal.SIGINT, exithandler)
#Generate argument parser
parser = argparse.ArgumentParser(
add_help=False,
formatter_class=argparse.RawTextHelpFormatter,
description=textwrap.dedent('''\
\033[92mTool to automatically download ROMs of a DAT-file from Myrient.
Generate a DAT-file with the tool of your choice to include ROMs that you
want from a No-Intro/Redump/etc catalog, then use this tool to download
the matching files from Myrient.\033[00m
'''))
#Add required arguments
requiredargs = parser.add_argument_group('\033[91mRequired arguments\033[00m')
requiredargs.add_argument('-i', dest='inp', metavar='nointro.dat', help='Input DAT-file containing wanted ROMs', required=True)
requiredargs.add_argument('-o', dest='out', metavar='/data/roms', help='Output path for ROM files to be downloaded', required=True)
#Add optional arguments
optionalargs = parser.add_argument_group('\033[96mOptional arguments\033[00m')
optionalargs.add_argument('-c', dest='catalog', action='store_true', help='Choose catalog manually, even if automatically found')
optionalargs.add_argument('-s', dest='system', action='store_true', help='Choose system collection manually, even if automatically found')
optionalargs.add_argument('-l', dest='list', action='store_true', help='List only ROMs that are not found in server (if any)')
optionalargs.add_argument('-h', '--help', dest='help', action='help', help='Show this help message')
args = parser.parse_args()
#Init variables
catalog = None
collection = None
wantedroms = []
wantedfiles = []
missingroms = []
collectiondir = []
availableroms = {}
foundcollections = []
#Validate arguments
if not os.path.isfile(args.inp):
logger('Invalid input DAT-file!', 'red')
exit()
if not os.path.isdir(args.out):
logger('Invalid output ROM path!', 'red')
exit()
if platform.system() == 'Linux' and args.out[-1] == '/':
args.out = args.out[:-1]
elif platform.system() == 'Windows' and args.out[-1] == '\\':
args.out = args.out[:-1]
#Open input DAT-file
logger('Opening input DAT-file...', 'green')
datxml = ET.parse(args.inp)
datroot = datxml.getroot()
#Loop through ROMs in input DAT-file
for datchild in datroot:
#Print out system information
if datchild.tag == 'header':
system = datchild.find('name').text
for fix in DATPOSTFIXES:
system = system.replace(fix, '')
catalogurl = datchild.find('url').text
if catalogurl in CATALOGURLS:
catalog = CATALOGURLS[catalogurl]
logger(f'Processing {catalog}: {system}...', 'green')
else:
logger(f'Processing {system}...', 'green')
#Add found ROMs to wanted list
elif datchild.tag == 'game':
rom = datchild.find('rom')
filename = rom.attrib['name']
filename = re.sub(r'\.[(a-zA-Z0-9)]{1,3}\Z', '', filename)
if filename not in wantedroms:
wantedroms.append(filename)
#Get HTTP base and select wanted catalog
catalogurl = None
resp = requests.get(MYRIENTHTTPADDR, headers=REQHEADERS).text
resp = BeautifulSoup(resp, 'html.parser')
maindir = resp.find('table', id='list').tbody.find_all('tr')
for dir in maindir[1:]:
cell = dir.find('td')
if catalog in cell.a['title']:
catalogurl = cell.a['href']
if not catalogurl or args.catalog:
logger('Catalog for DAT not automatically found, please select from the following:', 'yellow')
dirnbr = 1
catalogtemp = {}
for dir in maindir[1:]:
cell = dir.find('td')
logger(f'{str(dirnbr).ljust(2)}: {cell.a["title"]}', 'yellow')
catalogtemp[dirnbr] = {'name': cell.a['title'], 'url': cell.a['href']}
dirnbr += 1
while True:
sel = inputter('Input selected catalog number: ', 'cyan')
try:
sel = int(sel)
if sel > 0 and sel < dirnbr:
catalog = catalogtemp[sel]['name']
catalogurl = catalogtemp[sel]['url']
break
else:
logger('Input number out of range!', 'red')
except:
logger('Invalid number!', 'red')
#Get catalog directory and select wanted collection
collectionurl = None
resp = requests.get(f'{MYRIENTHTTPADDR}{catalogurl}', headers=REQHEADERS).text
resp = BeautifulSoup(resp, 'html.parser')
contentdir = resp.find('table', id='list').tbody.find_all('tr')
for dir in contentdir[1:]:
cell = dir.find('td')
if cell.a['title'].startswith(system):
foundcollections.append({'name': cell.a['title'], 'url': cell.a['href']})
if len(foundcollections) == 1:
collection = foundcollections[0]['name']
collectionurl = foundcollections[0]['url']
if not collection or args.system:
logger('Collection for DAT not automatically found, please select from the following:', 'yellow')
dirnbr = 1
if len(foundcollections) > 1 and not args.system:
for foundcollection in foundcollections:
logger(f'{str(dirnbr).ljust(2)}: {foundcollection["name"]}', 'yellow')
dirnbr += 1
else:
collectiontemp = {}
for dir in contentdir[1:]:
cell = dir.find('td')
logger(f'{str(dirnbr).ljust(2)}: {cell.a["title"]}', 'yellow')
collectiontemp[dirnbr] = {'name': cell.a['title'], 'url': cell.a['href']}
dirnbr += 1
while True:
sel = inputter('Input selected collection number: ', 'cyan')
try:
sel = int(sel)
if sel > 0 and sel < dirnbr:
if len(foundcollections) > 1 and not args.system:
collection = foundcollections[sel-1]['name']
collectionurl = foundcollections[sel-1]['url']
else:
collection = collectiontemp[sel]['name']
collectionurl = collectiontemp[sel]['url']
break
else:
logger('Input number out of range!', 'red')
except:
logger('Invalid number!', 'red')
#Get collection directory contents and list contents to available ROMs
resp = requests.get(f'{MYRIENTHTTPADDR}{catalogurl}{collectionurl}', headers=REQHEADERS).text
resp = BeautifulSoup(resp, 'html.parser')
collectiondir = resp.find('table', id='list').tbody.find_all('tr')
for rom in collectiondir[1:]:
cell = rom.find('a')
filename = cell['title']
romname = re.sub(r'\.[(a-zA-Z0-9)]{1,3}\Z', '', filename)
url = f'{MYRIENTHTTPADDR}{catalogurl}{collectionurl}{cell["href"]}'
availableroms[romname] = {'name': romname, 'file': filename, 'url': url}
#Compare wanted ROMs and contents of the collection, parsing out only wanted files
for wantedrom in wantedroms:
if wantedrom in availableroms:
wantedfiles.append(availableroms[wantedrom])
else:
missingroms.append(wantedrom)
#Print out information about wanted/found/missing ROMs
logger(f'Amount of wanted ROMs in DAT-file : {len(wantedroms)}', 'green')
logger(f'Amount of found ROMs at server : {len(wantedfiles)}', 'green')
if missingroms:
logger(f'Amount of missing ROMs at server : {len(missingroms)}', 'yellow')
#Download wanted files
if not args.list:
dlcounter = 0
for wantedfile in wantedfiles:
dlcounter += 1
resumedl = False
proceeddl = True
if platform.system() == 'Linux':
localpath = f'{args.out}/{wantedfile["file"]}'
elif platform.system() == 'Windows':
localpath = f'{args.out}\{wantedfile["file"]}'
resp = requests.get(wantedfile['url'], headers=REQHEADERS, stream=True)
remotefilesize = int(resp.headers.get('content-length'))
if os.path.isfile(localpath):
localfilesize = int(os.path.getsize(localpath))
if localfilesize != remotefilesize:
resumedl = True
else:
proceeddl = False
if proceeddl:
file = open(localpath, 'ab')
size, unit = scale1024(remotefilesize)
pbar = ProgressBar(widgets=['\033[96m', Percentage(), ' | ', DataSize(), f' / {round(size, 1)} {unit}', ' ', Bar(marker='#'), ' ', ETA(), ' | ', FileTransferSpeed(), '\033[00m'], max_value=remotefilesize, redirect_stdout=True)
pbar.start()
if resumedl:
logger(f'Resuming {str(dlcounter).zfill(len(str(len(wantedfiles))))}/{len(wantedfiles)}: {wantedfile["name"]}', 'cyan')
pbar += localfilesize
headers = REQHEADERS
headers.update({'Range': f'bytes={localfilesize}-'})
resp = requests.get(wantedfile['url'], headers=headers, stream=True)
for data in resp.iter_content(chunk_size=CHUNKSIZE):
file.write(data)
pbar += len(data)
else:
logger(f'Downloading {str(dlcounter).zfill(len(str(len(wantedfiles))))}/{len(wantedfiles)}: {wantedfile["name"]}', 'cyan')
for data in resp.iter_content(chunk_size=CHUNKSIZE):
file.write(data)
pbar += len(data)
file.close()
pbar.finish()
print('\033[1A', end='\x1b[2K')
logger(f'Downloaded {str(dlcounter).zfill(len(str(len(wantedfiles))))}/{len(wantedfiles)}: {wantedfile["name"]}', 'green', True)
else:
logger(f'Already DLd {str(dlcounter).zfill(len(str(len(wantedfiles))))}/{len(wantedfiles)}: {wantedfile["name"]}', 'green')
logger('Downloading complete!', 'green', False)
#Output missing ROMs, if any
if missingroms:
logger(f'Following {len(missingroms)} ROMs in DAT not automatically found from server, grab these manually:', 'red')
for missingrom in missingroms:
logger(missingrom, 'yellow')
else:
logger('All ROMs in DAT found from server!', 'green')