-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathvalidate.py
executable file
·99 lines (87 loc) · 3.67 KB
/
validate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python3
'''
Script for performing cursory validation of files.
'''
import argparse, json, magic, os, string, subprocess, sys
NOT_FOUND = -3
NO_CHECKER = -2
FAILED = -1
OK = 0
def run(cmd, **kwargs):
with open(os.devnull, 'wt') as null:
try:
subprocess.check_call(cmd, stdout=null, stderr=null, **kwargs)
sys.stdout.write(f'\033[32mok ({" ".join(cmd)})\033[0m\n')
return OK
except subprocess.CalledProcessError:
sys.stdout.write(f'\033[31mFAILED ({" ".join(cmd)})\033[0m\n')
return FAILED
def main():
parser = argparse.ArgumentParser(description='file validator')
parser.add_argument('--db', type=argparse.FileType('r+'),
help='database to update')
parser.add_argument('input', nargs='*', default=[],
help='path or directory to scan')
opts = parser.parse_args(sys.argv[1:])
if len(opts.input) == 0:
queue = ['.']
else:
queue = opts.input
ms = magic.open(magic.MAGIC_NONE)
ms.load()
if opts.db is None:
results = {}
else:
results = json.load(opts.db)
while len(queue) > 0:
item = os.path.abspath(queue.pop())
sys.stdout.write(f' {item} ...')
if not os.path.exists(item):
results[item] = NOT_FOUND
sys.stdout.write('\033[31mFAILED (does not exist)\033[0m\n')
elif os.path.isdir(item):
dot_git = os.path.join(item, '.git')
dot_hg = os.path.join(item, '.hg')
if os.path.exists(dot_git) and os.path.isdir(dot_git):
results[item] = run(['git', 'fsck'], cwd=item)
elif os.path.exists(dot_hg) and os.path.isdir(dot_hg):
results[item] = run(['hg', 'verify'], cwd=item)
else:
queue += [os.path.join(item, x) for x in os.listdir(item)]
sys.stdout.write('recursing\n')
else:
filetype = ms.file(item)
if filetype.startswith('Python script,'):
results[item] = run(['pylint', '--errors-only', item])
elif filetype == 'OpenDocument Spreadsheet':
results[item] = run(['zip', '--test', item])
elif filetype == 'XML document text' or filetype == 'SVG Scalable Vector Graphics image':
results[item] = run(['xmllint', '--noout', item])
elif item.lower().endswith('.md'):
# For some reason markdown is incorrectly identified as a Fortran program.
results[item] = run(['markdown', item])
elif 'ASCII text' in filetype:
with open(item, 'rt') as f:
while True:
c = f.read(1)
if len(c) == 0:
sys.stdout.write('\033[32mok (printable ASCII)\033[0m\n')
results[item] = OK
break
if not c in string.printable:
sys.stdout.write('\033[31mFAILED (printable ASCII)\033[0m\n')
results[item] = FAILED
break
elif os.path.splitext(item.lower())[1] in ('.avi',):
# Many media container formats are misidentified by magic.
results[item] = run(['ffmpeg', '-i', item, '-f', 'null', os.devnull])
else:
sys.stdout.write(f'\033[31mno checker found for \'{filetype}\'\033[0m\n')
results[item] = NO_CHECKER
if opts.db is None:
sys.stdout.write(json.dumps(results, indent=2))
else:
json.dump(results, opts.db, indent=2)
return 0
if __name__ == '__main__':
sys.exit(main())