Skip to content

Commit

Permalink
Use doken
Browse files Browse the repository at this point in the history
  • Loading branch information
yishn committed Feb 3, 2020
1 parent 7aab80e commit 1975f01
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 63 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,10 @@ A generator function that yields SGF tokens, objects of the following form:
```

`type` is one of `"parenthesis"`, `"semicolon"`, `"prop_ident"`,
`"c_value_type", "invalid"`. `row` is the zero-based index of the row where the
token starts, `col` the zero-based index of column where the token starts, and
`pos` denotes the index in `contents` where the token starts. `progress` is a
number between `0` and `1` denoting the percental position of the token.
`"c_value_type"`, `"invalid"`. `row` is the zero-based index of the row where
the token starts, `col` the zero-based index of column where the token starts,
and `pos` denotes the index in `contents` where the token starts. `progress` is
a number between `0` and `1` denoting the percental position of the token.

#### `sgf.tokenize(contents)`

Expand Down
5 changes: 5 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@
"url": "https://github.com/SabakiHQ/sgf/issues"
},
"homepage": "https://github.com/SabakiHQ/sgf",
"dependencies": {},
"dependencies": {
"doken": "^1.0.0"
},
"devDependencies": {
"iconv-lite": "^0.4.24",
"jschardet": "^2.1.0",
Expand Down
73 changes: 15 additions & 58 deletions src/tokenize.js
Original file line number Diff line number Diff line change
@@ -1,71 +1,28 @@
const {createTokenizer, regexRule} = require('doken')
const iconv = require('./iconv-lite')
const jschardet = require('./jschardet')
const {unescapeString} = require('./helper')

const rules = {
whitespace: /^\s+/,
parenthesis: /^(\(|\))/,
semicolon: /^;/,
prop_ident: /^[A-Za-z]+/,
c_value_type: /^\[([^\\\]]|\\[^])*\]/
}
const tokenizeInner = createTokenizer({
rules: [
regexRule('_whitespace', /\s+/y, {lineBreaks: true}),
regexRule('parenthesis', /(\(|\))/y),
regexRule('semicolon', /;/y),
regexRule('prop_ident', /[A-Za-z]+/y),
regexRule('c_value_type', /\[([^\\\]]|\\[^])*\]/y, {lineBreaks: true})
]
})

exports.tokenizeIter = function*(contents) {
let length = contents.length
let [row, col, pos] = [0, 0, 0]

while (contents.length > 0) {
let value = null

for (let type in rules) {
let match = rules[type].exec(contents)
if (match == null) continue

value = match[0]

if (type !== 'whitespace') {
yield {
type,
value,
row,
col,
pos,
progress: pos / (length - 1)
}
}

break
}
for (let token of tokenizeInner(contents)) {
token.progress = token.pos / (length - 1)
delete token.length

if (value == null) {
value = contents[0]

yield {
type: 'invalid',
value,
row,
col,
pos,
progress: pos / (length - 1)
}
}

// Update source position

let newlineIndices = Array.from(value)
.map((c, i) => (c === '\n' ? i : null))
.filter(x => x != null)

row += newlineIndices.length

if (newlineIndices.length > 0) {
col = value.length - newlineIndices.slice(-1)[0] - 1
} else {
col += value.length
}
if (token.type == null) token.type = 'invalid'

pos += value.length
contents = contents.slice(value.length)
yield token
}
}

Expand Down

0 comments on commit 1975f01

Please sign in to comment.