-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgen-combo-novel.py
executable file
·99 lines (76 loc) · 2.25 KB
/
gen-combo-novel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python
from itertools import combinations
import sys
import re
try:
from tqdm import tqdm
except ImportError:
def tqdm(x):
return x
# 46010 + 3990 == 50000
# 46010: [('r*C', 215, 214)]
# 3990: [('r*C', 21, 3), ('r*C', 21, 19), ('r*C_duplicates', 19, 3)]
# note that:
# len(combinations(21, 3)) == 1330
# len(combinations(21, 19)) == 210
# len(combinations(215, 2)) == 23005
# len(combinations(215, 214)) == 215
# and that
# 23005 / 1330.0 = 17.296992481203006
def clean(w):
return re.match(r"^.*?([_a-zA-Z0-9\']+).*?$", w).group(1)
def proc_paragraphs(ps):
acc = ', '.join(ps)
acc = acc.capitalize()
while acc[-1] in (('.', ',', ' ', '?', '!')):
acc = acc[:-1]
# keep proper nouns proper -- it's only proper
for proper in ('multivac', 'messerschmitts', 'wellington', 'soho',
'ozymandias', "i'll", "crimea", 'thinkpad'):
acc = acc.replace(proper, proper.capitalize())
if acc.startswith('_'):
acc = '_' + acc[1:].capitalize()
return acc
def main(argv):
# 1. LOAD
part1 = []
with open(argv[1], 'r') as f:
for line in f:
part1.append(line.strip().capitalize())
assert len(part1) == 21
part2 = []
with open(argv[2], 'r') as f:
for line in f:
part2.extend([clean(w) for w in line.strip().split()])
assert len(part2) == 215, len(part2)
# 2. MUNGE
headings = []
for c in combinations(part1, 3):
headings.append(' '.join(c))
paragraphs = []
for c in combinations(part2, 2):
paragraphs.append(' '.join(c))
# 3. GO
c = 0
while headings and paragraphs:
h = headings.pop(0)
sys.stdout.write(h + '\n')
sys.stdout.write(('-' * len(h)) + '\n\n')
d = 17
if c % 4 == 0:
d = 18
ps = []
for n in xrange(0, d):
if not paragraphs:
continue
ps.append(paragraphs.pop(0))
sys.stdout.write(proc_paragraphs(ps) + '.\n\n')
c += 1
assert not headings
ps = []
while paragraphs:
ps.append(paragraphs.pop(0))
sys.stdout.write(proc_paragraphs(ps) + '.\n\n')
if __name__ == '__main__':
import sys
main(sys.argv)