Use doken

SabakiHQ · Feb 3, 2020 · 1975f01 · 1975f01
1 parent 7aab80e
commit 1975f01
Show file tree

Hide file tree

Showing 4 changed files with 27 additions and 63 deletions.
diff --git a/README.md b/README.md
@@ -127,10 +127,10 @@ A generator function that yields SGF tokens, objects of the following form:
 ```
 
 `type` is one of `"parenthesis"`, `"semicolon"`, `"prop_ident"`,
-`"c_value_type", "invalid"`. `row` is the zero-based index of the row where the
-token starts, `col` the zero-based index of column where the token starts, and
-`pos` denotes the index in `contents` where the token starts. `progress` is a
-number between `0` and `1` denoting the percental position of the token.
+`"c_value_type"`, `"invalid"`. `row` is the zero-based index of the row where
+the token starts, `col` the zero-based index of column where the token starts,
+and `pos` denotes the index in `contents` where the token starts. `progress` is
+a number between `0` and `1` denoting the percental position of the token.
 
 #### `sgf.tokenize(contents)`
 

diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -27,7 +27,9 @@
     "url": "https://github.com/SabakiHQ/sgf/issues"
   },
   "homepage": "https://github.com/SabakiHQ/sgf",
-  "dependencies": {},
+  "dependencies": {
+    "doken": "^1.0.0"
+  },
   "devDependencies": {
     "iconv-lite": "^0.4.24",
     "jschardet": "^2.1.0",

diff --git a/src/tokenize.js b/src/tokenize.js
@@ -1,71 +1,28 @@
+const {createTokenizer, regexRule} = require('doken')
 const iconv = require('./iconv-lite')
 const jschardet = require('./jschardet')
 const {unescapeString} = require('./helper')
 
-const rules = {
-  whitespace: /^\s+/,
-  parenthesis: /^(\(|\))/,
-  semicolon: /^;/,
-  prop_ident: /^[A-Za-z]+/,
-  c_value_type: /^\[([^\\\]]|\\[^])*\]/
-}
+const tokenizeInner = createTokenizer({
+  rules: [
+    regexRule('_whitespace', /\s+/y, {lineBreaks: true}),
+    regexRule('parenthesis', /(\(|\))/y),
+    regexRule('semicolon', /;/y),
+    regexRule('prop_ident', /[A-Za-z]+/y),
+    regexRule('c_value_type', /\[([^\\\]]|\\[^])*\]/y, {lineBreaks: true})
+  ]
+})
 
 exports.tokenizeIter = function*(contents) {
   let length = contents.length
-  let [row, col, pos] = [0, 0, 0]
-
-  while (contents.length > 0) {
-    let value = null
-
-    for (let type in rules) {
-      let match = rules[type].exec(contents)
-      if (match == null) continue
-
-      value = match[0]
 
-      if (type !== 'whitespace') {
-        yield {
-          type,
-          value,
-          row,
-          col,
-          pos,
-          progress: pos / (length - 1)
-        }
-      }
-
-      break
-    }
+  for (let token of tokenizeInner(contents)) {
+    token.progress = token.pos / (length - 1)
+    delete token.length
 
-    if (value == null) {
-      value = contents[0]
-
-      yield {
-        type: 'invalid',
-        value,
-        row,
-        col,
-        pos,
-        progress: pos / (length - 1)
-      }
-    }
-
-    // Update source position
-
-    let newlineIndices = Array.from(value)
-      .map((c, i) => (c === '\n' ? i : null))
-      .filter(x => x != null)
-
-    row += newlineIndices.length
-
-    if (newlineIndices.length > 0) {
-      col = value.length - newlineIndices.slice(-1)[0] - 1
-    } else {
-      col += value.length
-    }
+    if (token.type == null) token.type = 'invalid'
 
-    pos += value.length
-    contents = contents.slice(value.length)
+    yield token
   }
 }