diff --git a/buffer.go b/buffer.go index 6797e1a..c8bb3fb 100644 --- a/buffer.go +++ b/buffer.go @@ -2,6 +2,7 @@ package ajson import ( "io" + "strings" ) type buffer struct { @@ -11,28 +12,111 @@ type buffer struct { } const ( - quotes byte = '"' - quote byte = '\'' - coma byte = ',' - colon byte = ':' - backslash byte = '\\' - skipS byte = ' ' - skipN byte = '\n' - skipR byte = '\r' - skipT byte = '\t' - bracketL byte = '[' - bracketR byte = ']' - bracesL byte = '{' - bracesR byte = '}' - dollar byte = '$' - dot byte = '.' - //asterisk byte = '*' + quotes byte = '"' + quote byte = '\'' + coma byte = ',' + colon byte = ':' + backslash byte = '\\' + skipS byte = ' ' + skipN byte = '\n' + skipR byte = '\r' + skipT byte = '\t' + bracketL byte = '[' + bracketR byte = ']' + bracesL byte = '{' + bracesR byte = '}' + parenthesesL byte = '(' + parenthesesR byte = ')' + dollar byte = '$' + at byte = '@' + dot byte = '.' + asterisk byte = '*' + plus byte = '+' + minus byte = '-' + division byte = '/' + exclamation byte = '!' + caret byte = '^' + signL byte = '<' + signG byte = '>' + signE byte = '=' + ampersand byte = '&' + pipe byte = '|' + question byte = '?' ) var ( _null = []byte("null") _true = []byte("true") _false = []byte("false") + + // Operator precedence + // From https://golang.org/ref/spec#Operator_precedence + // + // Precedence Operator + // 5 * / % << >> & &^ + // 4 + - | ^ + // 3 == != < <= > >= + // 2 && + // 1 || + // + // Arithmetic operators + // From https://golang.org/ref/spec#Arithmetic_operators + // + // + sum integers, floats, complex values, strings + // - difference integers, floats, complex values + // * product integers, floats, complex values + // / quotient integers, floats, complex values + // % remainder integers + // + // & bitwise AND integers + // | bitwise OR integers + // ^ bitwise XOR integers + // &^ bit clear (AND NOT) integers + // + // << left shift integer << unsigned integer + // >> right shift integer >> unsigned integer + // + priority = map[string]int8{ + "!": 7, // additional: factorial + "**": 6, // additional: power + "*": 5, + "/": 5, + "%": 5, + "<<": 5, + ">>": 5, + "&": 5, + "&^": 5, + "+": 4, + "-": 4, + "|": 4, + "^": 4, + "==": 3, + "!=": 3, + "<": 3, + "<=": 3, + ">": 3, + ">=": 3, + "&&": 2, + "||": 1, + } + + rightOp = map[string]bool{ + "**": true, + } + + // fixme + functions = map[string]bool{ + "sin": true, + "cos": true, + } + // fixme + constants = map[string]bool{ + "pi": true, + "e": true, + "true": true, + "false": true, + "null": true, + } ) func newBuffer(body []byte) (b *buffer) { @@ -187,6 +271,229 @@ func (b *buffer) step() error { return io.EOF } +// reads until the end of the token e.g.: `@.length`, `@['foo'].bar[(@.length - 1)].baz` +func (b *buffer) token() (err error) { + var ( + c byte + str bool + stack = make([]byte, 0) + ) +tokenLoop: + for ; b.index < b.length; b.index++ { + c = b.data[b.index] + switch { + case c == quote: + if !str { + str = true + stack = append(stack, c) + } else if !b.backslash() { + if len(stack) == 0 || stack[len(stack)-1] != quote { + return b.errorSymbol() + } + str = false + stack = stack[:len(stack)-1] + } + case c == bracketL && !str: + stack = append(stack, c) + case c == bracketR && !str: + if len(stack) == 0 || stack[len(stack)-1] != bracketL { + return b.errorSymbol() + } + stack = stack[:len(stack)-1] + case c == parenthesesL && !str: + stack = append(stack, c) + case c == parenthesesR && !str: + if len(stack) == 0 || stack[len(stack)-1] != parenthesesL { + return b.errorSymbol() + } + stack = stack[:len(stack)-1] + case str: + continue + case c == dot || c == at || c == dollar || c == question || c == asterisk || (c >= 'A' && c <= 'z') || (c >= '0' && c <= '9'): // standard token name + continue + case len(stack) != 0: + continue + default: + break tokenLoop + } + } + if len(stack) != 0 { + return b.errorEOF() + } + return io.EOF +} + +func (b *buffer) rpn() (result []string, err error) { + var ( + c byte + start int + temp string + current string + found bool + variable bool + stack = make([]string, 0) + ) + for { + c, err = b.first() + if err != nil { + break + } + switch true { + case c == asterisk || c == division || c == minus || c == plus || c == caret || c == ampersand || c == pipe || c == signL || c == signG || c == signE || c == exclamation: // operations + if variable { + variable = false + current = string(c) + + c, err = b.next() + if err == nil { + temp = current + string(c) + if priority[temp] != 0 { + current = temp + } else { + b.index-- + } + } else { + err = nil + } + + found = false + for len(stack) > 0 { + temp = stack[len(stack)-1] + found = false + if temp[0] >= 'A' && temp[0] <= 'z' { // function + found = true + } else if priority[temp] != 0 { // operation + if priority[temp] > priority[current] { + found = true + } else if priority[temp] == priority[current] && !rightOp[temp] { + found = true + } + } + + if found { + stack = stack[:len(stack)-1] + result = append(result, temp) + } else { + break + } + } + stack = append(stack, current) + break + } + if c != minus && c != plus { + return nil, b.errorSymbol() + } + fallthrough // for numbers like `-1e6` + case (c >= '0' && c <= '9') || c == '.': // numbers + variable = true + start = b.index + err = b.numeric() + if err != nil && err != io.EOF { + return nil, err + } + current = string(b.data[start:b.index]) + result = append(result, current) + if err != nil { + err = nil + } else { + b.index-- + } + case c == quote: // string + variable = true + start = b.index + err = b.string(quote) + if err != nil { + return nil, b.errorEOF() + } + current = string(b.data[start : b.index+1]) + result = append(result, current) + case c == dollar || c == at: // variable : like @.length , $.expensive, etc. + variable = true + start = b.index + err = b.token() + if err != nil { + if err != io.EOF { + return nil, err + } + err = nil + } else { + b.index-- + } + current = string(b.data[start:b.index]) + result = append(result, current) + case c == parenthesesL: // ( + variable = false + current = string(c) + stack = append(stack, current) + case c == parenthesesR: // ) + variable = true + current = string(c) + found = false + for len(stack) > 0 { + temp = stack[len(stack)-1] + stack = stack[:len(stack)-1] + if temp == "(" { + found = true + break + } + result = append(result, temp) + } + if !found { // have no parenthesesL + return nil, errorRequest("formula has no left parentheses") + } + default: // prefix functions or etc. + start = b.index + variable = true + for ; b.index < b.length; b.index++ { + c = b.data[b.index] + if c == parenthesesL { // function detection, example: sin(...), round(...), etc. + variable = false + break + } + if c < 'A' || c > 'z' { + if !(c >= '0' && c <= '9') && c != '_' { // constants detection, example: true, false, null, PI, e, etc. + break + } + } + } + current = strings.ToLower(string(b.data[start:b.index])) + b.index-- + if !variable { + if _, found = functions[current]; !found { + return nil, errorRequest("wrong formula, '%s' is not a function", current) + } + stack = append(stack, current) + } else { + if _, found = constants[current]; !found { + return nil, errorRequest("wrong formula, '%s' is not a constant", current) + } + result = append(result, current) + } + } + err = b.step() + if err != nil { + break + } + } + + if err != io.EOF { + return + } + err = nil + + for len(stack) > 0 { + temp = stack[len(stack)-1] + _, ok := functions[temp] + if priority[temp] == 0 && !ok { // operations only + return nil, errorRequest("wrong formula, '%s' is not an operation or function", temp) + } + result = append(result, temp) + stack = stack[:len(stack)-1] + } + + return +} + func (b *buffer) errorEOF() error { return errorEOF(b) } diff --git a/buffer_test.go b/buffer_test.go new file mode 100644 index 0000000..46f51fe --- /dev/null +++ b/buffer_test.go @@ -0,0 +1,67 @@ +package ajson + +import ( + "io" + "testing" +) + +func TestBuffer_Token(t *testing.T) { + tests := []struct { + name string + value string + index int + fail bool + }{ + {name: "simple", value: "@.length", index: 8, fail: false}, + {name: "combined", value: "@['foo'].0.bar", index: 14, fail: false}, + {name: "formula", value: "@['foo'].[(@.length - 1)].*", index: 27, fail: false}, + {name: "filter", value: "@['foo'].[?(@.bar == 1 && @.baz < @.length)].*", index: 46, fail: false}, + {name: "string", value: "@['foo)(]][[[.[?(@.bar \\' == 1 && < @.length)'].*", index: 49, fail: false}, + + {name: "part 1", value: "@.foo+@.bar", index: 5, fail: false}, + {name: "part 2", value: "@.foo && @.bar", index: 5, fail: false}, + + {name: "fail 1", value: "@.foo[", fail: true}, + {name: "fail 2", value: "@.foo[(]", fail: true}, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + buf := newBuffer([]byte(test.value)) + err := buf.token() + if !test.fail && err != nil && err != io.EOF { + t.Errorf("Unexpected error: %s", err.Error()) + } else if test.fail && (err == nil || err == io.EOF) { + t.Errorf("Expected error, got nothing") + } else if !test.fail && test.index != buf.index { + t.Errorf("Wrong index: expected %d, got %d", test.index, buf.index) + } + }) + } +} + +func TestBuffer_RPN(t *testing.T) { + tests := []struct { + name string + value string + expected []string + }{ + {name: "example_1", value: "@.length", expected: []string{"@.length"}}, + {name: "example_2", value: "1 + 2", expected: []string{"1", "2", "+"}}, + {name: "example_3", value: "3 + 4 * 2 / (1 - 5)**2", expected: []string{"3", "4", "2", "*", "1", "5", "-", "2", "**", "/", "+"}}, + {name: "example_4", value: "'foo' == pi", expected: []string{"'foo'", "pi", "=="}}, + {name: "example_5", value: "pi != 'bar'", expected: []string{"pi", "'bar'", "!="}}, + {name: "example_6", value: "3 + 4 * -2 / (-1 - 5)**-2", expected: []string{"3", "4", "-2", "*", "-1", "5", "-", "-2", "**", "/", "+"}}, + {name: "example_7", value: "1.3e2 + sin(2*pi/3)", expected: []string{"1.3e2", "2", "pi", "*", "3", "/", "sin", "+"}}, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + buf := newBuffer([]byte(test.value)) + result, err := buf.rpn() + if err != nil { + t.Errorf("Unexpected error: %s", err.Error()) + } else if !sliceEqual(test.expected, result) { + t.Errorf("Error on RPN(%s): result doesn't match\nExpected: %s\nActual: %s", test.value, sliceString(test.expected), sliceString(result)) + } + }) + } +} diff --git a/decode.go b/decode.go index 9e8b922..b54ab3d 100644 --- a/decode.go +++ b/decode.go @@ -88,7 +88,7 @@ func Unmarshal(data []byte) (root *Node, err error) { found = true current = previous(current) } - case (b >= '0' && b <= '9') || b == '.' || b == '+' || b == '-' || b == 'e' || b == 'E': + case (b >= '0' && b <= '9') || b == '.' || b == '+' || b == '-': // Detected: Numeric current, err = newNode(current, buf, Numeric, &key) if err != nil { diff --git a/errors.go b/errors.go index ec95179..0bb8b3e 100644 --- a/errors.go +++ b/errors.go @@ -4,9 +4,10 @@ import "fmt" //Error is common struct to provide internal errors type Error struct { - Type ErrorType - Index int - Char byte + Type ErrorType + Index int + Char byte + Message string } //ErrorType is container for reflection type of error @@ -24,7 +25,11 @@ const ( ) func errorSymbol(b *buffer) error { - return &Error{Type: WrongSymbol, Index: b.index, Char: b.data[b.index]} + c, err := b.current() + if err != nil { + c = 0 + } + return &Error{Type: WrongSymbol, Index: b.index, Char: c} } func errorEOF(b *buffer) error { @@ -35,8 +40,8 @@ func errorType() error { return &Error{Type: WrongType} } -func errorRequest() error { - return &Error{Type: WrongRequest} +func errorRequest(format string, args ...interface{}) error { + return &Error{Type: WrongRequest, Message: fmt.Sprintf(format, args...)} } //Error interface implementation @@ -49,7 +54,7 @@ func (err *Error) Error() string { case WrongType: return fmt.Sprintf("wrong type of Node") case WrongRequest: - return fmt.Sprintf("wrong request") + return fmt.Sprintf("wrong request: %s", err.Message) } return fmt.Sprintf("unknown error: '%s' at %d", []byte{err.Char}, err.Index) } diff --git a/jsonpath.go b/jsonpath.go index 606e57c..21d9acd 100644 --- a/jsonpath.go +++ b/jsonpath.go @@ -58,14 +58,14 @@ func JSONPath(data []byte, path string) (result []*Node, err error) { case strings.Contains(cmd, ":"): // array slice operator keys = strings.Split(cmd, ":") if len(keys) > 3 { - return nil, errorRequest() + return nil, errorRequest("slice must contains no more than 2 colons, got '%s'", cmd) } if keys[0] == "" { from = 0 } else { from, err = strconv.Atoi(keys[0]) if err != nil { - return nil, errorRequest() + return nil, errorRequest("start of slice must be number, got '%s'", keys[0]) } } if keys[1] == "" { @@ -73,7 +73,7 @@ func JSONPath(data []byte, path string) (result []*Node, err error) { } else { to, err = strconv.Atoi(keys[1]) if err != nil { - return nil, errorRequest() + return nil, errorRequest("stop of slice must be number, got '%s'", keys[1]) } } step = 1 @@ -81,7 +81,7 @@ func JSONPath(data []byte, path string) (result []*Node, err error) { if keys[2] != "" { step, err = strconv.Atoi(keys[2]) if err != nil { - return nil, errorRequest() + return nil, errorRequest("step of slice must be number, got '%s'", keys[2]) } } } diff --git a/node.go b/node.go index 8ea55c8..32cad6e 100644 --- a/node.go +++ b/node.go @@ -391,7 +391,7 @@ func (n *Node) GetIndex(index int) (*Node, error) { } child, ok := n.children[strconv.Itoa(index)] if !ok { - return nil, errorRequest() + return nil, errorRequest("out of index %d", index) } return child, nil } @@ -412,7 +412,7 @@ func (n *Node) GetKey(key string) (*Node, error) { } value, ok := n.children[key] if !ok { - return nil, errorRequest() + return nil, errorRequest("wrong key '%s'", key) } return value, nil }