Skip to content

Commit

Permalink
feat: chars, codes
Browse files Browse the repository at this point in the history
Signed-off-by: Lexus Drumgold <[email protected]>
  • Loading branch information
unicornware committed Jun 14, 2024
1 parent c9aecaa commit c949bdc
Show file tree
Hide file tree
Showing 15 changed files with 426 additions and 46 deletions.
1 change: 1 addition & 0 deletions .dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ vates
vfile
vitest
yarnrc
zwnj
9 changes: 9 additions & 0 deletions .eslintrc.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,15 @@
*/
const config = {
extends: ['./.eslintrc.base.cjs'],
overrides: [
...require('./.eslintrc.base.cjs').overrides,
{
files: ['src/codes.ts'],
rules: {
'sort-keys': 0
}
}
],
root: true
}

Expand Down
47 changes: 35 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
- [`CharacterReader#peekMatch(test)`](#characterreaderpeekmatchtest)
- [`CodeReader(file[, start])`](#codereaderfile-start)
- [`CodeReader#serialize(...codes)`](#codereaderserializecodes)
- [`chars`](#chars)
- [`codes`](#codes)
- [`CharacterMatch`](#charactermatch)
- [`Character`](#character)
- [`Code`](#code)
Expand All @@ -50,12 +52,13 @@

## What is this?

This package implements an input reader that can be used to read characters and code points from a file.
This package implements an input reader that can be used to read characters and character codes (code points) from a
file.

## When should I use this?

This package is useful when characters or code points need to be processed individually or as a group, such as when
building a parser or tokenizer.
This package is useful when characters or codes need to be read individually or as a group, such as when building a
parser or tokenizer.

## Install

Expand All @@ -77,14 +80,24 @@ yarn add @flex-development/vfile-reader
In Deno with [`esm.sh`][esmsh]:

```ts
import { CharacterReader, CodeReader } from 'https://esm.sh/@flex-development/vfile-reader'
import {
CharacterReader,
CodeReader,
chars,
codes
} from 'https://esm.sh/@flex-development/vfile-reader'
```

In browsers with [`esm.sh`][esmsh]:

```html
<script type="module">
import { CharacterReader, CodeReader } from 'https://esm.sh/@flex-development/vfile-reader'
import {
CharacterReader,
CodeReader,
chars,
codes
} from 'https://esm.sh/@flex-development/vfile-reader'
</script>
```

Expand Down Expand Up @@ -130,6 +143,8 @@ This package exports the following identifiers:
- [`CharacterReader`](#characterreaderfile-start)
- [`Reader`](#readerfile-start)
- [`chars`](#chars)
- [`codes`](#codes)
There is no default export.
Expand Down Expand Up @@ -284,19 +299,27 @@ Get the next match from the file without changing the position of the reader, wi
> **extends**: `Reader<Code>`
Create a new code point reader.
Create a new character code reader.
#### `CodeReader#serialize(...codes)`
Convert the specified sequence of code points to a string.
Convert the specified sequence of character codes to a string.
##### `Parameters`
- `...codes` ([`Code[]`](#code)) &mdash; code points sequence
- `...codes` ([`Code[]`](#code)) &mdash; character code sequence
##### `Returns`
(`string`) String created from code point sequence.
(`string`) String created from character code sequence.
### `chars`
Character dictionary.
### `codes`
Character code dictionary.
### `CharacterMatch`
Expand All @@ -316,8 +339,7 @@ type Character = string | null
### `Code`
An integer between `0` and `0x10FFFF` (inclusive) representing a Unicode code point in a source file, with `null`
denoting end of file (TypeScript type).
Character code ([code point][codepointat]) in a source file, with `null` denoting end of file (TypeScript type).
```ts
type Code = number | null
Expand Down Expand Up @@ -346,7 +368,7 @@ type ReaderIteratorResult<
### `ReaderValue`
Character or code point in a source file, with `null` denoting the end of file (TypeScript type).
Character or character code in a source file, with `null` denoting the end of file (TypeScript type).
```ts
type ReaderValue = Character | Code
Expand All @@ -367,6 +389,7 @@ See [`CONTRIBUTING.md`](CONTRIBUTING.md).
This project has a [code of conduct](CODE_OF_CONDUCT.md). By interacting with this repository, organization, or
community you agree to abide by its terms.
[codepointat]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String/codePointAt
[esm]: https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c
[esmsh]: https://esm.sh/
[location]: https://github.com/flex-development/vfile-location#locationfile-start
Expand Down
2 changes: 1 addition & 1 deletion src/__snapshots__/code.reader.functional.snap
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html

exports[`functional:CodeReader > iterator > should iterate over all code points 1`] = `
exports[`functional:CodeReader > iterator > should iterate over all codes 1`] = `
[
128525,
128077,
Expand Down
13 changes: 13 additions & 0 deletions src/__tests__/chars.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/**
* @file Unit Tests - chars
* @module vfile-reader/tests/unit/chars
*/

import testSubject from '../chars'
import codes from '../codes'

describe('unit:chars', () => {
it('should have same keys as codes', () => {
expect(testSubject).to.have.keys(Object.keys(codes))
})
})
2 changes: 1 addition & 1 deletion src/__tests__/code.reader.functional.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ describe('functional:CodeReader', () => {
})

describe('iterator', () => {
it('should iterate over all code points', () => {
it('should iterate over all codes', () => {
// Arrange
const codes: Code[] = []

Expand Down
4 changes: 2 additions & 2 deletions src/__tests__/code.reader.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ describe('unit:CodeReader', () => {
})

describe('#output', () => {
it('should return current code point without changing position', () => {
it('should return current code without changing position', () => {
expect(subject.output).to.equal(subject.peek(0))
expect(subject.index).to.eq(index)
})
})

describe('#previous', () => {
it('should return previous code point without changing position', () => {
it('should return previous code without changing position', () => {
expect(subject.previous).to.eq(subject.peek(-1))
expect(subject.index).to.eq(index)
})
Expand Down
19 changes: 19 additions & 0 deletions src/__tests__/codes.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/**
* @file Unit Tests - codes
* @module vfile-reader/tests/unit/codes
*/

import chars from '../chars'
import testSubject from '../codes'

describe('unit:codes', () => {
it('should define codes for all keys in chars', () => {
for (const [key, char] of Object.entries(chars)) {
expect(testSubject).to.have.property(key, char?.codePointAt(0) ?? char)
}
})

it('should have same keys as chars', () => {
expect(testSubject).to.have.keys(Object.keys(chars))
})
})
4 changes: 3 additions & 1 deletion src/__tests__/index.e2e.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ describe('e2e:vfile-reader', () => {
expect(testSubject).to.have.keys([
'CharacterReader',
'CodeReader',
'Reader'
'Reader',
'chars',
'codes'
])
})
})
153 changes: 153 additions & 0 deletions src/chars.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/**
* @file chars
* @module vfile-reader/chars
*/

import type { Character } from '@flex-development/vfile-reader'

/**
* Character dictionary.
*
* @see https://symbl.cc/en/unicode/blocks/basic-latin
*
* @enum {Character}
*/
const chars = {
ack: '\u0006',
ampersand: '&',
apostrophe: '\'',
asterisk: '*',
at: '@',
backslash: '\\',
backtick: '`',
bar: '|',
bel: '\u0007',
bom: '\uFEFF',
bs: '\u0008',
can: '\u0018',
caret: '^',
colon: ':',
comma: ',',
cr: '\r',
dc1: '\u0011',
dc2: '\u0012',
dc3: '\u0013',
dc4: '\u0014',
del: '\u007F',
digit0: '0',
digit1: '1',
digit2: '2',
digit3: '3',
digit4: '4',
digit5: '5',
digit6: '6',
digit7: '7',
digit8: '8',
digit9: '9',
dle: '\u0010',
dollar: '$',
dot: '.',
em: '\u0019',
enq: '\u0005',
eof: null,
eot: '\u0004',
equal: '=',
esc: '\u001B',
etb: '\u0017',
etx: '\u0003',
exclamation: '!',
ff: '\f',
fs: '\u001C',
gs: '\u001D',
gt: '>',
hash: '#',
ht: '\t',
leftBrace: '{',
leftBracket: '[',
leftParen: '(',
lf: '\n',
lowercaseA: 'a',
lowercaseB: 'b',
lowercaseC: 'c',
lowercaseD: 'd',
lowercaseE: 'e',
lowercaseF: 'f',
lowercaseG: 'g',
lowercaseH: 'h',
lowercaseI: 'i',
lowercaseJ: 'j',
lowercaseK: 'k',
lowercaseL: 'l',
lowercaseM: 'm',
lowercaseN: 'n',
lowercaseO: 'o',
lowercaseP: 'p',
lowercaseQ: 'q',
lowercaseR: 'r',
lowercaseS: 's',
lowercaseT: 't',
lowercaseU: 'u',
lowercaseV: 'v',
lowercaseW: 'w',
lowercaseX: 'x',
lowercaseY: 'y',
lowercaseZ: 'z',
ls: '\u2028',
lt: '<',
minus: '-',
nak: '\u0015',
nul: '\0',
percent: '%',
plus: '+',
ps: '\u2029',
question: '?',
quotation: '"',
replacement: '�',
rightBrace: '}',
rightBracket: ']',
rightParen: ')',
rs: '\u001E',
semicolon: ';',
si: '\u000F',
slash: '/',
so: '\u000E',
soh: '\u0001',
space: ' ',
stx: '\u0002',
sub: '\u001A',
syn: '\u0016',
tilde: '~',
underscore: '_',
uppercaseA: 'A',
uppercaseB: 'B',
uppercaseC: 'C',
uppercaseD: 'D',
uppercaseE: 'E',
uppercaseF: 'F',
uppercaseG: 'G',
uppercaseH: 'H',
uppercaseI: 'I',
uppercaseJ: 'J',
uppercaseK: 'K',
uppercaseL: 'L',
uppercaseM: 'M',
uppercaseN: 'N',
uppercaseO: 'O',
uppercaseP: 'P',
uppercaseQ: 'Q',
uppercaseR: 'R',
uppercaseS: 'S',
uppercaseT: 'T',
uppercaseU: 'U',
uppercaseV: 'V',
uppercaseW: 'W',
uppercaseX: 'X',
uppercaseY: 'Y',
uppercaseZ: 'Z',
us: '\u001F',
vt: '\v',
zwj: '\u200D',
zwnj: '\u200C'
} as const

export default chars
Loading

0 comments on commit c949bdc

Please sign in to comment.