-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathnfa.go
166 lines (147 loc) · 5.22 KB
/
nfa.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
package quamina
import "fmt"
// This groups the functions that traverse, merge, and debug Quamina's nondeterministic finite automata
// faState is used by the valueMatcher automaton - every step through the
// automaton requires a smallTable and for some of them, taking the step means you've matched a value and can
// transition to a new fieldMatcher, in which case the fieldTransitions slice will be non-nil
type faState struct {
table *smallTable
fieldTransitions []*fieldMatcher
}
// struct wrapper to make this comparable to help with pack/unpack
type faNext struct {
states []*faState
}
type nfaMetadata struct {
maxOutDegree int
}
type transmap struct {
set map[*fieldMatcher]bool
}
func (tm *transmap) add(fms []*fieldMatcher) {
for _, fm := range fms {
tm.set[fm] = true
}
}
func (tm *transmap) all() []*fieldMatcher {
var all []*fieldMatcher
for fm := range tm.set {
all = append(all, fm)
}
return all
}
// While some Quamina patterns require the use of NFAs, many (most?) don't, and while we're still using a
// NFA-capable data structure, we can traverse it deterministically if we know in advance that every
// combination of an faState with a byte will transition to at most one other faState.
func traverseDFA(table *smallTable, val []byte, transitions []*fieldMatcher) []*fieldMatcher {
for index := 0; index <= len(val); index++ {
var utf8Byte byte
if index < len(val) {
utf8Byte = val[index]
} else {
utf8Byte = valueTerminator
}
next := table.dStep(utf8Byte)
if next == nil {
break
}
transitions = append(transitions, next.fieldTransitions...)
table = next.table
}
return transitions
}
func traverseNFA(table *smallTable, val []byte, transitions []*fieldMatcher, bufs *bufpair) []*fieldMatcher {
currentStates := bufs.buf1
currentStates = append(currentStates, &faState{table: table})
nextStates := bufs.buf2
// a lot of the transitions stuff is going to be empty, but on the other hand
// a * entry with a transition could end up getting added a lot.
newTransitions := &transmap{set: make(map[*fieldMatcher]bool, len(transitions))}
newTransitions.add(transitions)
stepResult := &stepOut{}
for index := 0; len(currentStates) != 0 && index <= len(val); index++ {
var utf8Byte byte
if index < len(val) {
utf8Byte = val[index]
} else {
utf8Byte = valueTerminator
}
for _, state := range currentStates {
state.table.step(utf8Byte, stepResult)
for _, nextStep := range stepResult.steps {
newTransitions.add(nextStep.fieldTransitions)
nextStates = append(nextStates, nextStep)
}
for _, nextStep := range stepResult.epsilon {
newTransitions.add(nextStep.fieldTransitions)
nextStates = append(nextStates, nextStep)
}
}
// re-use these
swapStates := currentStates
currentStates = nextStates
nextStates = swapStates[:0]
}
bufs.buf1 = currentStates[:0]
bufs.buf2 = nextStates[:0]
return newTransitions.all()
}
type faStepKey struct {
step1 *faState
step2 *faState
}
// mergeFAs compute the union of two valueMatch automata. If you look up the textbook theory about this,
// they say to compute the set product for automata A and B and build A0B0, A0B1 … A1BN, A1B0 … but if you look
// at that you realize that many of the product states aren't reachable. So you compute A0B0 and then keep
// recursing on the transitions coming out, I'm pretty sure you get a correct result. I don't know if it's
// minimal or even avoids being wasteful.
// INVARIANT: neither argument is nil
// INVARIANT: To be thread-safe, no existing table can be updated except when we're building it
func mergeFAs(table1, table2 *smallTable, printer printer) *smallTable {
state1 := &faState{table: table1}
state2 := &faState{table: table2}
return mergeFAStates(state1, state2, make(map[faStepKey]*faState), printer).table
}
func mergeFAStates(state1, state2 *faState, keyMemo map[faStepKey]*faState, printer printer) *faState {
// try to memo-ize
mKey := faStepKey{state1, state2}
combined, ok := keyMemo[mKey]
if ok {
return combined
}
fieldTransitions := append(state1.fieldTransitions, state2.fieldTransitions...)
combined = &faState{table: newSmallTable(), fieldTransitions: fieldTransitions}
pretty, ok := printer.(*prettyPrinter)
if ok {
printer.labelTable(combined.table, fmt.Sprintf("%d∎%d",
pretty.tableSerial(state1.table), pretty.tableSerial(state2.table)))
}
keyMemo[mKey] = combined
u1 := unpackTable(state1.table)
u2 := unpackTable(state2.table)
var uComb unpackedTable
for i, next1 := range u1 {
next2 := u2[i]
switch {
case next1 == next2: // no need to merge
uComb[i] = next1
case next2 == nil: // u1 must be non-nil
uComb[i] = next1
case next1 == nil: // u2 must be non-nil
uComb[i] = next2
case i > 0 && next1 == u1[i-1] && next2 == u2[i-1]: // dupe of previous step - happens a lot
uComb[i] = uComb[i-1]
default: // have to recurse & merge
var comboNext []*faState
for _, nextStep1 := range next1.states {
for _, nextStep2 := range next2.states {
comboNext = append(comboNext, mergeFAStates(nextStep1, nextStep2, keyMemo, printer))
}
}
uComb[i] = &faNext{states: comboNext}
}
}
combined.table.pack(&uComb)
combined.table.epsilon = append(state1.table.epsilon, state2.table.epsilon...)
return combined
}