Skip to content

Commit

Permalink
Merge pull request #2 from JunNishimura/feature/add_lexer
Browse files Browse the repository at this point in the history
add lexer to handle with simple math operations
  • Loading branch information
JunNishimura authored Sep 20, 2024
2 parents 16e12a8 + ba87afe commit 2541bef
Show file tree
Hide file tree
Showing 3 changed files with 426 additions and 0 deletions.
128 changes: 128 additions & 0 deletions lexer/lexer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
package lexer

import "github.com/JunNishimura/jsop/token"

type Lexer struct {
input string
curPos int
nextPos int
curChar byte
}

func New(input string) *Lexer {
l := &Lexer{input: input}
l.readChar()
return l
}

func (l *Lexer) readChar() {
if l.nextPos >= len(l.input) {
l.curChar = 0
} else {
l.curChar = l.input[l.nextPos]
}
l.curPos = l.nextPos
l.nextPos++
}

func (l *Lexer) NextToken() token.Token {
var tok token.Token

l.skipWhitespace()

switch l.curChar {
case '{':
tok = newToken(token.LBRACE, l.curChar)
case '}':
tok = newToken(token.RBRACE, l.curChar)
case '[':
tok = newToken(token.LBRACKET, l.curChar)
case ']':
tok = newToken(token.RBRACKET, l.curChar)
case '"':
tok = newToken(token.DOUBLE_QUOTE, l.curChar)
case ':':
tok = newToken(token.COLON, l.curChar)
case ',':
tok = newToken(token.COMMA, l.curChar)
case '+':
if isDigit(l.peekChar()) {
l.readChar()
tok.Literal = l.readNumber()
tok.Type = token.INT
} else {
tok = newToken(token.PLUS, l.curChar)
}
case '-':
if isDigit(l.peekChar()) {
l.readChar()
negativeNumber := "-" + l.readNumber()
tok.Literal = negativeNumber
tok.Type = token.INT
} else {
tok = newToken(token.MINUS, l.curChar)
}
case '*':
tok = newToken(token.ASTERISK, l.curChar)
case '/':
tok = newToken(token.SLASH, l.curChar)
case 0:
tok.Literal = ""
tok.Type = token.EOF
default:
if isDigit(l.curChar) {
tok.Type = token.INT
tok.Literal = l.readNumber()
return tok
} else if isLetter(l.curChar) {
tok.Literal = l.readString()
tok.Type = token.LookupStringTokenType(tok.Literal)
return tok
}
tok = newToken(token.ILLEGAL, l.curChar)
}

l.readChar()
return tok
}

func (l *Lexer) skipWhitespace() {
for l.curChar == ' ' || l.curChar == '\t' || l.curChar == '\n' || l.curChar == '\r' {
l.readChar()
}
}

func newToken(tokenType token.TokenType, ch byte) token.Token {
return token.Token{Type: tokenType, Literal: string(ch)}
}

func isDigit(ch byte) bool {
return '0' <= ch && ch <= '9'
}

func isLetter(ch byte) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z'
}

func (l *Lexer) readNumber() string {
startPos := l.curPos
for isDigit(l.curChar) {
l.readChar()
}
return l.input[startPos:l.curPos]
}

func (l *Lexer) readString() string {
startPos := l.curPos
for isLetter(l.curChar) {
l.readChar()
}
return l.input[startPos:l.curPos]
}

func (l *Lexer) peekChar() byte {
if l.nextPos >= len(l.input) {
return 0
}
return l.input[l.nextPos]
}
251 changes: 251 additions & 0 deletions lexer/lexer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
package lexer

import (
"testing"

"github.com/JunNishimura/jsop/token"
)

func TestSingleProgram(t *testing.T) {
tests := []struct {
name string
input string
expected []token.Token
}{
{
name: "integer atom",
input: `
{
"atom": 1
}`,
expected: []token.Token{
{Type: token.LBRACE, Literal: "{"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.ATOM, Literal: "atom"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COLON, Literal: ":"},
{Type: token.INT, Literal: "1"},
{Type: token.RBRACE, Literal: "}"},
{Type: token.EOF, Literal: ""},
},
},
{
name: "integer more than 1 digit",
input: `
{
"atom": 123
}`,
expected: []token.Token{
{Type: token.LBRACE, Literal: "{"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.ATOM, Literal: "atom"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COLON, Literal: ":"},
{Type: token.INT, Literal: "123"},
{Type: token.RBRACE, Literal: "}"},
{Type: token.EOF, Literal: ""},
},
},
{
name: "integer with plus sign",
input: `
{
"atom": +123
}`,
expected: []token.Token{
{Type: token.LBRACE, Literal: "{"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.ATOM, Literal: "atom"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COLON, Literal: ":"},
{Type: token.INT, Literal: "123"},
{Type: token.RBRACE, Literal: "}"},
{Type: token.EOF, Literal: ""},
},
},
{
name: "negative integer",
input: `
{
"atom": -123
}`,
expected: []token.Token{
{Type: token.LBRACE, Literal: "{"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.ATOM, Literal: "atom"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COLON, Literal: ":"},
{Type: token.INT, Literal: "-123"},
{Type: token.RBRACE, Literal: "}"},
{Type: token.EOF, Literal: ""},
},
},
{
name: "mathematical operation: addition",
input: `
{
"command": {
"symbol": "+",
"args": [1, 2]
}
}`,
expected: []token.Token{
{Type: token.LBRACE, Literal: "{"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COMMAND, Literal: "command"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COLON, Literal: ":"},
{Type: token.LBRACE, Literal: "{"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.SYMBOL, Literal: "symbol"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COLON, Literal: ":"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.PLUS, Literal: "+"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COMMA, Literal: ","},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.ARGS, Literal: "args"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COLON, Literal: ":"},
{Type: token.LBRACKET, Literal: "["},
{Type: token.INT, Literal: "1"},
{Type: token.COMMA, Literal: ","},
{Type: token.INT, Literal: "2"},
{Type: token.RBRACKET, Literal: "]"},
{Type: token.RBRACE, Literal: "}"},
{Type: token.RBRACE, Literal: "}"},
{Type: token.EOF, Literal: ""},
},
},
{
name: "mathematical operation: subtraction",
input: `
{
"command": {
"symbol": "-",
"args": [1, 2]
}
}`,
expected: []token.Token{
{Type: token.LBRACE, Literal: "{"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COMMAND, Literal: "command"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COLON, Literal: ":"},
{Type: token.LBRACE, Literal: "{"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.SYMBOL, Literal: "symbol"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COLON, Literal: ":"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.MINUS, Literal: "-"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COMMA, Literal: ","},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.ARGS, Literal: "args"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COLON, Literal: ":"},
{Type: token.LBRACKET, Literal: "["},
{Type: token.INT, Literal: "1"},
{Type: token.COMMA, Literal: ","},
{Type: token.INT, Literal: "2"},
{Type: token.RBRACKET, Literal: "]"},
{Type: token.RBRACE, Literal: "}"},
{Type: token.RBRACE, Literal: "}"},
{Type: token.EOF, Literal: ""},
},
},
{
name: "mathematical operation: multiplication",
input: `
{
"command": {
"symbol": "*",
"args": [1, 2]
}
}`,
expected: []token.Token{
{Type: token.LBRACE, Literal: "{"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COMMAND, Literal: "command"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COLON, Literal: ":"},
{Type: token.LBRACE, Literal: "{"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.SYMBOL, Literal: "symbol"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COLON, Literal: ":"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.ASTERISK, Literal: "*"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COMMA, Literal: ","},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.ARGS, Literal: "args"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COLON, Literal: ":"},
{Type: token.LBRACKET, Literal: "["},
{Type: token.INT, Literal: "1"},
{Type: token.COMMA, Literal: ","},
{Type: token.INT, Literal: "2"},
{Type: token.RBRACKET, Literal: "]"},
{Type: token.RBRACE, Literal: "}"},
{Type: token.RBRACE, Literal: "}"},
{Type: token.EOF, Literal: ""},
},
},
{
name: "mathematical operation: division",
input: `
{
"command": {
"symbol": "/",
"args": [1, 2]
}
}`,
expected: []token.Token{
{Type: token.LBRACE, Literal: "{"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COMMAND, Literal: "command"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COLON, Literal: ":"},
{Type: token.LBRACE, Literal: "{"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.SYMBOL, Literal: "symbol"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COLON, Literal: ":"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.SLASH, Literal: "/"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COMMA, Literal: ","},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.ARGS, Literal: "args"},
{Type: token.DOUBLE_QUOTE, Literal: "\""},
{Type: token.COLON, Literal: ":"},
{Type: token.LBRACKET, Literal: "["},
{Type: token.INT, Literal: "1"},
{Type: token.COMMA, Literal: ","},
{Type: token.INT, Literal: "2"},
{Type: token.RBRACKET, Literal: "]"},
{Type: token.RBRACE, Literal: "}"},
{Type: token.RBRACE, Literal: "}"},
{Type: token.EOF, Literal: ""},
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
l := New(tt.input)
for i, expected := range tt.expected {
tok := l.NextToken()
if tok.Type != expected.Type {
t.Fatalf("tests[%d] - tokentype wrong. expected=%q, got=%q", i, expected.Type, tok.Type)
}
if tok.Literal != expected.Literal {
t.Fatalf("tests[%d] - literal wrong. expected=%q, got=%q", i, expected.Literal, tok.Literal)
}
}
})
}
}
Loading

0 comments on commit 2541bef

Please sign in to comment.