-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathread2devhelp.py
136 lines (105 loc) · 4.12 KB
/
read2devhelp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env python3
import argparse
import os
import re
import urllib.request
import xml.etree.ElementTree as ET
from pathlib import Path
from xml.dom import minidom
DEVHELP_DIR = os.environ.get('README2DEVHELP_DIR')
if not DEVHELP_DIR:
DEVHELP_DIR = Path().home() / '.local/share/devhelp/books'
def build_bundle(name):
book_path = DEVHELP_DIR.joinpath(name)
os.makedirs(book_path, exist_ok=True)
print('Book folder -> {}'.format(book_path))
index_path = book_path.joinpath('index.html')
if not index_path.exists():
print('Create file index.html -> {}'.format(index_path))
open(index_path, 'a').close()
devhelp2_path = book_path.joinpath('{}.devhelp2'.format(name))
if not devhelp2_path.exists():
print('Create file devhelp2 -> {}'.format(devhelp2_path))
open(devhelp2_path, 'a').close()
return (book_path, index_path, devhelp2_path)
def pull_readme(repository_url):
resp = urllib.request.urlopen(repository_url).read()
search_readme = re.search(r'id="readme"', resp.decode('utf-8'))
data, valid = resp.decode('utf-8'), search_readme.group()
return data, valid
def extract_readme(raw_body):
body = re.search(r'<article.+<\/article>', raw_body.replace('\n', ''),
re.MULTILINE)
valid_readme = re.search(r'<h1.+<\/h1>', body.group())
return valid_readme, body.group()
def generate_index(raw_body, url_repository):
headings = re.findall(r'<(h[1-2])>(.+?)</\1>', raw_body)
index = {'subs': []}
for (tag, content) in headings:
content_clened = re.sub(r'<[^>]+>', '', content)
if tag == 'h1':
index['chapter'] = content_clened
else:
anchor_subchapter = re.search(r'<a id=".+?"', content).group()
anchor_subchapter = re.search(r'"(.+)"',
anchor_subchapter).groups()[0]
link = 'index.html#{anchor}'.format(anchor=anchor_subchapter)
index['subs'].append({'name': content_clened, 'link': link})
else:
if 'chapter' not in index:
index['chapter'] = url_repository.split('/')[-1]
return index
def generate_xml(index, url_repository):
title = index['chapter']
subs = index['subs']
online = url_repository
name = url_repository.split('/')[-1]
attrs = {'xmlns': 'http://www.devhelp.net/book',
'title': title,
'link': 'index.html',
'author': 'Unknown',
'name': name,
'version': 'Unknown',
'language': 'Unknown',
'online': online}
root = ET.Element('book', attrib=attrs)
chapters = ET.SubElement(root, 'chapters')
for sub_attrs in subs:
ET.SubElement(chapters, 'sub', sub_attrs)
body_xml = ET.tostring(root).decode('utf-8')
pretty_xml = minidom.parseString(body_xml)
return pretty_xml.toprettyxml(indent=" ")
def builder(repo):
get_readme, _ = pull_readme(repo)
is_valid, raw_data = extract_readme(get_readme)
(path_dir, path_index, path_devhelp2) = build_bundle(repo.split('/')[-1])
chapters = generate_index(raw_data, repo)
xml_generated = generate_xml(chapters, repo)
with path_index.open('w') as readme_index_html:
html_body = '''
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport"
content="width=device-width,
initial-scale=1, shrink-to-fit=no">
</head>
<body>{}</body>
</html>
'''
readme_index_html.write(html_body.format(raw_data))
with path_devhelp2.open('w') as devhelp_xml:
pretty_xml = minidom.parseString(xml_generated)
devhelp_xml.write(pretty_xml.toprettyxml(indent=" "))
def command_line_parse():
parser = argparse.ArgumentParser(
description='Download and Convert README for DevHelp')
parser.add_argument('repository_url', help='Repository home URL',
type=str)
return parser
def main():
command_args = command_line_parse().parse_args()
builder(command_args.repository_url)
if __name__ == '__main__':
main()