This repository has been archived by the owner on Mar 14, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathhelpers.py
261 lines (215 loc) · 7.98 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
import enum
import os
import subprocess
import sys
from datetime import datetime
from pathlib import Path
from typing import Any
from typing import TypedDict
import pandas as pd
import rich
from dotenv import load_dotenv
from pandas.core.frame import DataFrame
ENCODING = 'UTF-8'
INDECLINABLES = {'abbrev', 'abs', 'ger', 'ind', 'inf', 'prefix', 'sandhi', 'idiom'}
CONJUGATIONS = {'aor', 'cond', 'fut', 'imp', 'imperf', 'opt', 'perf', 'pr'}
DECLENSIONS = {
'adj', 'card', 'cs', 'fem', 'letter', 'masc', 'nt', 'ordin', 'pp', 'pron',
'prp', 'ptp', 'root', 'suffix', 've'
}
load_dotenv()
def timeis() -> str:
""" Returns rich formatted date and time
"""
current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
return f'[blue]{current_time}[/blue]'
def line(length=40) -> str:
return '-' * length
class Kind(enum.Enum):
""" Marks type of building dict
"""
SBS = enum.auto()
RU = enum.auto()
DPS = enum.auto()
class DataFrames(TypedDict):
words_df: DataFrame
abbrev_df: DataFrame
help_df: DataFrame
class ResourcePaths(TypedDict):
kind: Kind
output_dir: Path
output_html_dir: Path
output_help_html_dir: Path
output_share_dir: Path
error_log_dir: Path
inflections_dir: Path
inflections_html_tables_dir: Path
words_path: Path
abbrev_path: Path
help_path: Path
dict_words_css_path: Path
dict_help_css_path: Path
definition_css_path: Path
buttons_js_path: Path
gd_json_path: Path
icon_path: Path
output_stardict_zip_path: Path
abbreviation_template_path: Path
word_template_path: Path
def parse_data_frames(rsc: ResourcePaths) -> DataFrames:
"""Parse csv files into pandas data frames"""
words_df = pd.read_csv(rsc['words_path'], sep="\t", dtype=str)
words_df = words_df.fillna('')
abbrev_df = pd.read_csv(rsc['abbrev_path'], sep="\t", dtype=str)
abbrev_df.fillna('', inplace=True)
help_df = pd.read_csv(rsc['help_path'], sep="\t", dtype=str)
help_df.fillna('', inplace=True)
return DataFrames(
words_df=words_df,
abbrev_df=abbrev_df,
help_df=help_df
)
def get_resource_paths_ru() -> ResourcePaths:
s = os.getenv('DPS_DIR')
if s is None:
rich.print(f"{timeis()} [red]ERROR! DPS_DIR is not set.")
sys.exit(2)
else:
dps_dir = Path(s)
rsc = ResourcePaths(
kind=Kind.RU,
# Project output
output_dir=Path('./output/'),
output_html_dir=Path('./output/html/'),
output_help_html_dir=Path('./output/help html/'),
output_share_dir=Path('./share/'),
gd_json_path=Path('./output/gd.json'),
output_stardict_zip_path=Path('ru-pali-dictionary.zip'),
error_log_dir=Path('./errorlogs/'),
# Project assets
dict_words_css_path=Path('./assets/words-ru.css'),
dict_help_css_path=Path('./assets/help.css'),
definition_css_path=Path('./assets/rpd.css'),
buttons_js_path=Path('./assets/buttons-ru.js'),
abbrev_path=Path('./assets/abbreviations.csv'),
help_path=Path('./assets/help.csv'),
# Project input
abbreviation_template_path=Path('./assets/templates/abbreviation-ru.html'),
inflections_dir=dps_dir.joinpath('inflection/'),
inflections_html_tables_dir=dps_dir/'inflection/output/html_tables_dps/',
words_path=dps_dir.joinpath('../dpd-db/dps/csvs/dps_full.csv'),
icon_path=Path('./logo/book.bmp'),
word_template_path=Path('./assets/templates/word-ru.html'),
)
# ensure write dirs exist
for d in [rsc['output_dir'],
rsc['output_html_dir'],
rsc['output_share_dir'],
rsc['error_log_dir']]:
d.mkdir(parents=True, exist_ok=True)
return rsc
def get_resource_paths_dps() -> ResourcePaths:
s = os.getenv('DPS_DIR')
if s is None:
rich.print(f"{timeis()} [red]ERROR! DPS_DIR is not set.")
sys.exit(2)
else:
dps_dir = Path(s)
rsc = ResourcePaths(
kind=Kind.DPS,
# Project output
output_dir=Path('./output/'),
output_html_dir=Path('./output/html/'),
output_help_html_dir=Path('./output/help html/'),
output_share_dir=Path('./share/'),
gd_json_path=Path('./output/gd.json'),
output_stardict_zip_path=Path('dps.zip'),
error_log_dir=Path('./errorlogs/'),
# Project assets
dict_words_css_path=Path('./assets/words-dps.css'),
dict_help_css_path=Path('./assets/help.css'),
definition_css_path=Path('./assets/rpd.css'),
buttons_js_path=Path('./assets/buttons-dps.js'),
abbrev_path=Path('./assets/abbreviations.csv'),
help_path=Path('./assets/help.csv'),
# Project input
abbreviation_template_path=Path('./assets/templates/abbreviation-ru.html'),
inflections_dir=dps_dir.joinpath('inflection/'),
inflections_html_tables_dir=dps_dir/'inflection/output/html_tables_dps/',
words_path=dps_dir.joinpath('../dpd-db/dps/csvs/dps_full.csv'),
icon_path=Path('./logo/book.bmp'),
word_template_path=Path('./assets/templates/word-dps.html'),
)
# ensure write dirs exist
for d in [rsc['output_dir'],
rsc['output_html_dir'],
rsc['output_share_dir'],
rsc['error_log_dir']]:
d.mkdir(parents=True, exist_ok=True)
return rsc
def get_resource_paths_sbs() -> ResourcePaths:
s = os.getenv('DPS_DIR')
if s is None:
rich.print(f"{timeis()} [red]ERROR! DPS_DIR is not set.")
sys.exit(2)
else:
dps_dir = Path(s)
rsc = ResourcePaths(
kind=Kind.SBS,
# Project output
output_dir=Path('./output/'),
output_html_dir=Path('./output/html/'),
output_help_html_dir=Path('./output/help html/'),
output_share_dir=Path('./share/'),
gd_json_path=Path('./output/gd.json'),
output_stardict_zip_path=Path('sbs-pd.zip'),
error_log_dir=Path('./errorlogs/'),
# Project assets
dict_words_css_path=Path('./assets/words-sbs.css'),
dict_help_css_path=Path('./assets/help.css'),
definition_css_path=Path('./assets/epd_sbs.css'),
buttons_js_path=Path('./assets/buttons-sbs.js'),
abbrev_path=Path('./assets/abbreviations.csv'),
help_path=Path('./assets/help.csv'),
# Project input
inflections_dir=dps_dir.joinpath('inflection/'),
inflections_html_tables_dir=dps_dir/'inflection/output/html_tables_sbs',
words_path=dps_dir.joinpath('../dpd-db/dps/csvs/sbs_pd.csv'),
icon_path=Path('./logo/head_brown.bmp'),
abbreviation_template_path=Path('./assets/templates/abbreviation-sbs.html'),
word_template_path=Path('./assets/templates/word-sbs.html'),
)
# ensure write dirs exist
for d in [rsc['output_dir'],
rsc['output_html_dir'],
rsc['output_share_dir'],
rsc['error_log_dir']]:
d.mkdir(parents=True, exist_ok=True)
return rsc
def copy_goldendict(src_path: Path, dest_dir: Path):
rich.print(f"{timeis()} [green]copying goldendict to share")
# file name without .zip suffix
dest_base = src_path.name.replace(src_path.suffix, '')
dest_path = dest_dir.joinpath(f"{dest_base}.zip")
try:
subprocess.run(
['mv', '--backup=numbered', src_path, dest_path],
check=True)
except Exception as e:
rich.print(f'{timeis()} [red]{e}[/red]')
sys.exit(2)
def string_if(condition: Any, string: str) -> str:
""" Get the second arg if the first is true, empty string otherwise
"""
if condition:
return string
return ''
def format_if(string: str, template: str) -> str:
""" Format the second arg with the first if not empty
:param string: any text
:param template: template in form of 'string with a placeholder {}'
:return: formatted template if the string is not empty or empty string
"""
if len(string) > 0:
return template.format(string)
return ''