-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathjscan.go
344 lines (296 loc) · 9.17 KB
/
jscan.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
package jscan
import (
"fmt"
"strconv"
"sync"
"unicode/utf8"
"github.com/romshark/jscan/v2/internal/keyescape"
)
// Default stack sizes
const (
DefaultStackSizeIterator = 64
DefaultStackSizeValidator = 128
)
func newIterator[S ~string | ~[]byte]() *Iterator[S] {
return &Iterator[S]{stack: make([]stackNode, 0, DefaultStackSizeIterator)}
}
func newValidator[S ~string | ~[]byte]() *Validator[S] {
return &Validator[S]{stack: make([]stackNodeType, 0, DefaultStackSizeValidator)}
}
var (
iteratorPoolString = sync.Pool{New: func() any { return newIterator[string]() }}
iteratorPoolBytes = sync.Pool{New: func() any { return newIterator[[]byte]() }}
validatorPoolString = sync.Pool{New: func() any { return newValidator[string]() }}
validatorPoolBytes = sync.Pool{New: func() any { return newValidator[[]byte]() }}
)
type stackNodeType int8
const (
_ stackNodeType = iota
stackNodeTypeObject = 1
stackNodeTypeArray = 2
)
type stackNode struct {
ArrLen int
KeyIndex, KeyIndexEnd int
Type stackNodeType
}
// Iterator provides access to the recently encountered value.
type Iterator[S ~string | ~[]byte] struct {
stack []stackNode
src S
pointer []byte
valueType ValueType
valueIndex int
valueIndexEnd int
keyIndex, keyIndexEnd int
arrayIndex int
}
// Level returns the depth level of the current value.
//
// For example in the following JSON: `[1,2,3]` the array is situated at level 0
// while the integers inside are situated at level 1.
func (i *Iterator[S]) Level() int { return len(i.stack) }
// ArrayIndex returns either the index of the element value in the array
// or -1 if the value isn't inside an array.
func (i *Iterator[S]) ArrayIndex() int { return i.arrayIndex }
// ValueType returns the value type identifier.
func (i *Iterator[S]) ValueType() ValueType { return i.valueType }
// ValueIndex returns the start index of the value in the source.
func (i *Iterator[S]) ValueIndex() int { return i.valueIndex }
// ValueIndexEnd returns the end index of the value in the source if any.
// Object and array values have a -1 end index because their end is unknown
// during traversal.
func (i *Iterator[S]) ValueIndexEnd() int { return i.valueIndexEnd }
// KeyIndex returns either the start index of the member key string in the source
// or -1 when the value isn't a member of an object and hence doesn't have a key.
func (i *Iterator[S]) KeyIndex() int { return i.keyIndex }
// KeyIndexEnd returns either the end index of the member key string in the source
// or -1 when the value isn't a member of an object and hence doesn't have a key.
func (i *Iterator[S]) KeyIndexEnd() int { return i.keyIndexEnd }
// Key returns either the object member key or "" when the value
// isn't a member of an object and hence doesn't have a key.
func (i *Iterator[S]) Key() (key S) {
if i.keyIndex == -1 {
return
}
return i.src[i.keyIndex:i.keyIndexEnd]
}
// Value returns the value if any.
func (i *Iterator[S]) Value() (value S) {
if i.valueIndexEnd == -1 {
return
}
return i.src[i.valueIndex:i.valueIndexEnd]
}
// ScanStack calls fn for every element in the stack.
// If keyIndex is != -1 then the element is a member value, otherwise
// arrayIndex indicates the index of the element in the underlying array.
func (i *Iterator[S]) ScanStack(fn func(keyIndex, keyEnd, arrayIndex int)) {
for j := range i.stack {
if i.stack[j].KeyIndex > -1 {
fn(i.stack[j].KeyIndex, i.stack[j].KeyIndexEnd, -1)
}
if i.stack[j].Type == stackNodeTypeArray {
fn(-1, -1, i.stack[j].ArrLen-1)
}
}
}
// Pointer returns the JSON pointer in RFC-6901 format.
func (i *Iterator[S]) Pointer() (s S) {
i.ViewPointer(func(p []byte) {
switch any(s).(type) {
case string:
s = S(p)
case []byte:
b := make([]byte, len(p))
copy(b, p)
s = S(b)
}
})
return
}
// ViewPointer calls fn and provides the buffer holding the
// JSON pointer in RFC-6901 format.
// Consider using (*Iterator[S]).Pointer() instead for safety and convenience.
//
// WARNING: do not use or alias p after fn returns,
// only reading and copying p are considered safe!
func (i *Iterator[S]) ViewPointer(fn func(p []byte)) {
i.ScanStack(func(keyIndex, keyEnd, arrayIndex int) {
if keyIndex != -1 {
// Object key
i.pointer = append(i.pointer, '/')
i.pointer = keyescape.Append(i.pointer, i.src[keyIndex+1:keyEnd-1])
return
}
// Array index
i.pointer = append(i.pointer, '/')
i.pointer = strconv.AppendInt(i.pointer, int64(arrayIndex), 10)
})
if i.keyIndex != -1 {
i.pointer = append(i.pointer, '/')
i.pointer = keyescape.Append(i.pointer, i.src[i.keyIndex+1:i.keyIndexEnd-1])
}
fn(i.pointer)
i.pointer = i.pointer[:0]
}
func (i *Iterator[S]) getError(c ErrorCode) Error[S] {
return Error[S]{
Code: c,
Src: i.src,
Index: i.valueIndex,
}
}
// Error is a syntax error encountered during validation or iteration.
// The only exception is ErrorCodeCallback which indicates a callback
// explicitly breaking by returning true instead of a syntax error.
// (Error).IsErr() returning false is equivalent to err == nil.
type Error[S ~string | ~[]byte] struct {
// Src refers to the original source.
Src S
// Index points to the error start index in the source.
Index int
// Code indicates the type of the error.
Code ErrorCode
}
var _ error = Error[string]{}
// IsErr returns true if there is an error, otherwise returns false.
func (e Error[S]) IsErr() bool { return e.Code != 0 }
// Error stringifies the error implementing the built-in error interface.
// Calling Error should be avoided in performance-critical code as it
// relies on dynamic memory allocation.
func (e Error[S]) Error() string {
if e.Index < len(e.Src) {
var r rune
switch x := any(e.Src).(type) {
case string:
r, _ = utf8.DecodeRuneInString(x[e.Index:])
case []byte:
r, _ = utf8.DecodeRune(x[e.Index:])
}
return errorMessage(e.Code, e.Index, r)
}
return errorMessage(e.Code, e.Index, 0)
}
func reset[S ~string | ~[]byte](i *Iterator[S]) {
i.stack = i.stack[:0]
i.pointer = i.pointer[:0]
i.valueType = 0
i.keyIndex, i.keyIndexEnd = -1, -1
i.valueIndexEnd = -1
i.arrayIndex = 0
}
// ErrorCode defines the error type.
type ErrorCode int8
const (
_ ErrorCode = iota
// ErrorCodeInvalidEscape indicates the encounter of an invalid escape sequence.
ErrorCodeInvalidEscape
// ErrorCodeIllegalControlChar indicates the encounter of
// an illegal control character in the source.
ErrorCodeIllegalControlChar
// ErrorCodeUnexpectedEOF indicates the encounter an unexpected end of file.
ErrorCodeUnexpectedEOF
// ErrorCodeUnexpectedToken indicates the encounter of an unexpected token.
ErrorCodeUnexpectedToken
// ErrorCodeMalformedNumber indicates the encounter of a malformed number.
ErrorCodeMalformedNumber
// ErrorCodeCallback indicates return of true from the callback function.
ErrorCodeCallback
)
// ValueType defines a JSON value type
type ValueType int8
// JSON value types
const (
_ ValueType = iota
ValueTypeObject
ValueTypeArray
ValueTypeNull
ValueTypeFalse
ValueTypeTrue
ValueTypeString
ValueTypeNumber
)
func (t ValueType) String() string {
switch t {
case ValueTypeObject:
return "object"
case ValueTypeArray:
return "array"
case ValueTypeNull:
return "null"
case ValueTypeFalse:
return "false"
case ValueTypeTrue:
return "true"
case ValueTypeString:
return "string"
case ValueTypeNumber:
return "number"
}
return ""
}
func errorMessage(c ErrorCode, index int, atIndex rune) string {
errMsg := ""
switch c {
case ErrorCodeUnexpectedToken:
errMsg = "unexpected token"
case ErrorCodeMalformedNumber:
errMsg = "malformed number"
case ErrorCodeUnexpectedEOF:
return fmt.Sprintf("error at index %d: unexpected EOF", index)
case ErrorCodeInvalidEscape:
errMsg = "invalid escape"
case ErrorCodeIllegalControlChar:
errMsg = "illegal control character"
case ErrorCodeCallback:
errMsg = "callback error"
default:
return ""
}
if atIndex < 0x20 {
return fmt.Sprintf(
"error at index %d (0x%x): %s",
index, atIndex, errMsg,
)
}
return fmt.Sprintf(
"error at index %d ('%s'): %s",
index, string(atIndex), errMsg,
)
}
// lutSX maps space characters such as whitespace, tab, line-break and
// carriage-return to 1, valid hex digits to 2 and others to 0.
var lutSX = [256]byte{
' ': 1, '\n': 1, '\t': 1, '\r': 1,
'0': 2, '1': 2, '2': 2, '3': 2, '4': 2, '5': 2, '6': 2, '7': 2, '8': 2, '9': 2,
'a': 2, 'b': 2, 'c': 2, 'd': 2, 'e': 2, 'f': 2,
'A': 2, 'B': 2, 'C': 2, 'D': 2, 'E': 2, 'F': 2,
}
// lutStr maps 0 to all bytes that don't require checking during string traversal.
// 1 is mapped to control, quotation mark (") and reverse solidus ("\").
var lutStr = [256]byte{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
'"': 1, '\\': 1,
}
// lutEscape maps escapable characters to 1,
// all other ASCII characters are mapped to 0.
var lutEscape = [256]byte{
'"': 1,
'\\': 1,
'/': 1,
'b': 1,
'f': 1,
'n': 1,
'r': 1,
't': 1,
}
// getError returns the stringified error, if any.
func getError[S ~string | ~[]byte](c ErrorCode, src S, s S) Error[S] {
return Error[S]{
Code: c,
Src: src,
Index: len(src) - len(s),
}
}