From 1975f013d919e6ee8e2f4b7c76551277717b856b Mon Sep 17 00:00:00 2001 From: Yichuan Shen Date: Mon, 3 Feb 2020 13:58:56 +0100 Subject: [PATCH] Use doken --- README.md | 8 +++--- package-lock.json | 5 ++++ package.json | 4 ++- src/tokenize.js | 73 ++++++++++------------------------------------- 4 files changed, 27 insertions(+), 63 deletions(-) diff --git a/README.md b/README.md index 6692a2c..b237ca5 100644 --- a/README.md +++ b/README.md @@ -127,10 +127,10 @@ A generator function that yields SGF tokens, objects of the following form: ``` `type` is one of `"parenthesis"`, `"semicolon"`, `"prop_ident"`, -`"c_value_type", "invalid"`. `row` is the zero-based index of the row where the -token starts, `col` the zero-based index of column where the token starts, and -`pos` denotes the index in `contents` where the token starts. `progress` is a -number between `0` and `1` denoting the percental position of the token. +`"c_value_type"`, `"invalid"`. `row` is the zero-based index of the row where +the token starts, `col` the zero-based index of column where the token starts, +and `pos` denotes the index in `contents` where the token starts. `progress` is +a number between `0` and `1` denoting the percental position of the token. #### `sgf.tokenize(contents)` diff --git a/package-lock.json b/package-lock.json index c245f59..1d3a5f4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -550,6 +550,11 @@ "integrity": "sha512-6/v2PC/6UTGcWPPetb9acL8foberUg/CtPdALeJUdD1B/weHNvzftoo00gYznqHGRhHEbykUGzqfG9RWOSr5yw==", "dev": true }, + "doken": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/doken/-/doken-1.0.0.tgz", + "integrity": "sha512-KoWqzS6SrHWKqbRU8DlXAEja0IorWvSN4O37ptueAc/unSnbd4lOYGFKNaRrOH2CayIrdkQje3EoTlg6Xpn3xg==" + }, "ecc-jsbn": { "version": "0.1.2", "resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz", diff --git a/package.json b/package.json index 442e930..7a12c27 100644 --- a/package.json +++ b/package.json @@ -27,7 +27,9 @@ "url": "https://github.com/SabakiHQ/sgf/issues" }, "homepage": "https://github.com/SabakiHQ/sgf", - "dependencies": {}, + "dependencies": { + "doken": "^1.0.0" + }, "devDependencies": { "iconv-lite": "^0.4.24", "jschardet": "^2.1.0", diff --git a/src/tokenize.js b/src/tokenize.js index 002a3fd..860e3c6 100644 --- a/src/tokenize.js +++ b/src/tokenize.js @@ -1,71 +1,28 @@ +const {createTokenizer, regexRule} = require('doken') const iconv = require('./iconv-lite') const jschardet = require('./jschardet') const {unescapeString} = require('./helper') -const rules = { - whitespace: /^\s+/, - parenthesis: /^(\(|\))/, - semicolon: /^;/, - prop_ident: /^[A-Za-z]+/, - c_value_type: /^\[([^\\\]]|\\[^])*\]/ -} +const tokenizeInner = createTokenizer({ + rules: [ + regexRule('_whitespace', /\s+/y, {lineBreaks: true}), + regexRule('parenthesis', /(\(|\))/y), + regexRule('semicolon', /;/y), + regexRule('prop_ident', /[A-Za-z]+/y), + regexRule('c_value_type', /\[([^\\\]]|\\[^])*\]/y, {lineBreaks: true}) + ] +}) exports.tokenizeIter = function*(contents) { let length = contents.length - let [row, col, pos] = [0, 0, 0] - - while (contents.length > 0) { - let value = null - - for (let type in rules) { - let match = rules[type].exec(contents) - if (match == null) continue - - value = match[0] - if (type !== 'whitespace') { - yield { - type, - value, - row, - col, - pos, - progress: pos / (length - 1) - } - } - - break - } + for (let token of tokenizeInner(contents)) { + token.progress = token.pos / (length - 1) + delete token.length - if (value == null) { - value = contents[0] - - yield { - type: 'invalid', - value, - row, - col, - pos, - progress: pos / (length - 1) - } - } - - // Update source position - - let newlineIndices = Array.from(value) - .map((c, i) => (c === '\n' ? i : null)) - .filter(x => x != null) - - row += newlineIndices.length - - if (newlineIndices.length > 0) { - col = value.length - newlineIndices.slice(-1)[0] - 1 - } else { - col += value.length - } + if (token.type == null) token.type = 'invalid' - pos += value.length - contents = contents.slice(value.length) + yield token } }