diff --git a/deps/cjs-module-lexer/LICENSE b/deps/cjs-module-lexer/LICENSE index b31c17aa33cdd9..935b357962d08b 100644 --- a/deps/cjs-module-lexer/LICENSE +++ b/deps/cjs-module-lexer/LICENSE @@ -1,10 +1,10 @@ -MIT License ------------ - -Copyright (C) 2018-2020 Guy Bedford - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +MIT License +----------- + +Copyright (C) 2018-2020 Guy Bedford + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/deps/cjs-module-lexer/README.md b/deps/cjs-module-lexer/README.md index 2af01279ed2f38..cc7ca50cc72243 100644 --- a/deps/cjs-module-lexer/README.md +++ b/deps/cjs-module-lexer/README.md @@ -1,462 +1,464 @@ -# CJS Module Lexer - -[![Build Status][travis-image]][travis-url] - -A [very fast](#benchmarks) JS CommonJS module syntax lexer used to detect the most likely list of named exports of a CommonJS module. - -Outputs the list of named exports (`exports.name = ...`) and possible module reexports (`module.exports = require('...')`), including the common transpiler variations of these cases. - -Forked from https://github.com/guybedford/es-module-lexer. - -_Comprehensively handles the JS language grammar while remaining small and fast. - ~90ms per MB of JS cold and ~15ms per MB of JS warm, [see benchmarks](#benchmarks) for more info._ - -### Project Status - -This project is used in Node.js core for detecting the named exports available when importing a CJS module into ESM, and is maintained for this purpose. - -PRs will be accepted and upstreamed for parser bugs, performance improvements or new syntax support only. - -_Detection patterns for this project are **frozen**_. This is because adding any new export detection patterns would result in fragmented backwards-compatibility. Specifically, it would be very difficult to figure out why an ES module named export for CommonJS might work in newer Node.js versions but not older versions. This problem would only be discovered downstream of module authors, with the fix for module authors being to then have to understand which patterns in this project provide full backwards-compatibily. Rather, by fully freezing the detected patterns, if it works in any Node.js version it will work in any other. Build tools can also reliably treat the supported syntax for this project as a part of their output target for ensuring syntax support. - -### Usage - -``` -npm install cjs-module-lexer -``` - -For use in CommonJS: - -```js -const { parse } = require('cjs-module-lexer'); - -// `init` return a promise for parity with the ESM API, but you do not have to call it - -const { exports, reexports } = parse(` - // named exports detection - module.exports.a = 'a'; - (function () { - exports.b = 'b'; - })(); - Object.defineProperty(exports, 'c', { value: 'c' }); - /* exports.d = 'not detected'; */ - - // reexports detection - if (maybe) module.exports = require('./dep1.js'); - if (another) module.exports = require('./dep2.js'); - - // literal exports assignments - module.exports = { a, b: c, d, 'e': f } - - // __esModule detection - Object.defineProperty(module.exports, '__esModule', { value: true }) -`); - -// exports === ['a', 'b', 'c', '__esModule'] -// reexports === ['./dep1.js', './dep2.js'] -``` - -When using the ESM version, Wasm is supported instead: - -```js -import { parse, init } from 'cjs-module-lexer'; -// init() needs to be called and waited upon, or use initSync() to compile -// Wasm blockingly and synchronously. -await init(); -const { exports, reexports } = parse(source); -``` - -The Wasm build is around 1.5x faster and without a cold start. - -### Grammar - -CommonJS exports matches are run against the source token stream. - -The token grammar is: - -``` -IDENTIFIER: As defined by ECMA-262, without support for identifier `\` escapes, filtered to remove strict reserved words: - "implements", "interface", "let", "package", "private", "protected", "public", "static", "yield", "enum" - -STRING_LITERAL: A `"` or `'` bounded ECMA-262 string literal. - -MODULE_EXPORTS: `module` `.` `exports` - -EXPORTS_IDENTIFIER: MODULE_EXPORTS_IDENTIFIER | `exports` - -EXPORTS_DOT_ASSIGN: EXPORTS_IDENTIFIER `.` IDENTIFIER `=` - -EXPORTS_LITERAL_COMPUTED_ASSIGN: EXPORTS_IDENTIFIER `[` STRING_LITERAL `]` `=` - -EXPORTS_LITERAL_PROP: (IDENTIFIER (`:` IDENTIFIER)?) | (STRING_LITERAL `:` IDENTIFIER) - -EXPORTS_SPREAD: `...` (IDENTIFIER | REQUIRE) - -EXPORTS_MEMBER: EXPORTS_DOT_ASSIGN | EXPORTS_LITERAL_COMPUTED_ASSIGN - -EXPORTS_DEFINE: `Object` `.` `defineProperty `(` EXPORTS_IDENFITIER `,` STRING_LITERAL - -EXPORTS_DEFINE_VALUE: EXPORTS_DEFINE `, {` - (`enumerable: true,`)? - ( - `value:` | - `get` (`: function` IDENTIFIER? )? `() {` return IDENTIFIER (`.` IDENTIFIER | `[` STRING_LITERAL `]`)? `;`? `}` `,`? - ) - `})` - -EXPORTS_LITERAL: MODULE_EXPORTS `=` `{` (EXPORTS_LITERAL_PROP | EXPORTS_SPREAD) `,`)+ `}` - -REQUIRE: `require` `(` STRING_LITERAL `)` - -EXPORTS_ASSIGN: (`var` | `const` | `let`) IDENTIFIER `=` (`_interopRequireWildcard (`)? REQUIRE - -MODULE_EXPORTS_ASSIGN: MODULE_EXPORTS `=` REQUIRE - -EXPORT_STAR: (`__export` | `__exportStar`) `(` REQUIRE - -EXPORT_STAR_LIB: `Object.keys(` IDENTIFIER$1 `).forEach(function (` IDENTIFIER$2 `) {` - ( - ( - `if (` IDENTIFIER$2 `===` ( `'default'` | `"default"` ) `||` IDENTIFIER$2 `===` ( '__esModule' | `"__esModule"` ) `) return` `;`? - ( - (`if (Object` `.prototype`? `.hasOwnProperty.call(` IDENTIFIER `, ` IDENTIFIER$2 `)) return` `;`?)? - (`if (` IDENTIFIER$2 `in` EXPORTS_IDENTIFIER `&&` EXPORTS_IDENTIFIER `[` IDENTIFIER$2 `] ===` IDENTIFIER$1 `[` IDENTIFIER$2 `]) return` `;`)? - )? - ) | - `if (` IDENTIFIER$2 `!==` ( `'default'` | `"default"` ) (`&& !` (`Object` `.prototype`? `.hasOwnProperty.call(` IDENTIFIER `, ` IDENTIFIER$2 `)` | IDENTIFIER `.hasOwnProperty(` IDENTIFIER$2 `)`))? `)` - ) - ( - EXPORTS_IDENTIFIER `[` IDENTIFIER$2 `] =` IDENTIFIER$1 `[` IDENTIFIER$2 `]` `;`? | - `Object.defineProperty(` EXPORTS_IDENTIFIER `, ` IDENTIFIER$2 `, { enumerable: true, get` (`: function` IDENTIFIER? )? `() { return ` IDENTIFIER$1 `[` IDENTIFIER$2 `]` `;`? `}` `,`? `})` `;`? - ) - `})` -``` - -Spacing between tokens is taken to be any ECMA-262 whitespace, ECMA-262 block comment or ECMA-262 line comment. - -* The returned export names are taken to be the combination of: - 1. All `IDENTIFIER` and `STRING_LITERAL` slots for `EXPORTS_MEMBER` and `EXPORTS_LITERAL` matches. - 2. The first `STRING_LITERAL` slot for all `EXPORTS_DEFINE_VALUE` matches where that same string is not an `EXPORTS_DEFINE` match that is not also an `EXPORTS_DEFINE_VALUE` match. -* The reexport specifiers are taken to be the combination of: - 1. The `REQUIRE` matches of the last matched of either `MODULE_EXPORTS_ASSIGN` or `EXPORTS_LITERAL`. - 2. All _top-level_ `EXPORT_STAR` `REQUIRE` matches and `EXPORTS_ASSIGN` matches whose `IDENTIFIER` also matches the first `IDENTIFIER` in `EXPORT_STAR_LIB`. - -### Parsing Examples - -#### Named Exports Parsing - -The basic matching rules for named exports are `exports.name`, `exports['name']` or `Object.defineProperty(exports, 'name', ...)`. This matching is done without scope analysis and regardless of the expression position: - -```js -// DETECTS EXPORTS: a, b -(function (exports) { - exports.a = 'a'; - exports['b'] = 'b'; -})(exports); -``` - -Because there is no scope analysis, the above detection may overclassify: - -```js -// DETECTS EXPORTS: a, b, c -(function (exports, Object) { - exports.a = 'a'; - exports['b'] = 'b'; - if (false) - exports.c = 'c'; -})(NOT_EXPORTS, NOT_OBJECT); -``` - -It will in turn underclassify in cases where the identifiers are renamed: - -```js -// DETECTS: NO EXPORTS -(function (e) { - e.a = 'a'; - e['b'] = 'b'; -})(exports); -``` - -#### Getter Exports Parsing - -`Object.defineProperty` is detected for specifically value and getter forms returning an identifier or member expression: - -```js -// DETECTS: a, b, c, d, __esModule -Object.defineProperty(exports, 'a', { - enumerable: true, - get: function () { - return q.p; - } -}); -Object.defineProperty(exports, 'b', { - enumerable: true, - get: function () { - return q['p']; - } -}); -Object.defineProperty(exports, 'c', { - enumerable: true, - get () { - return b; - } -}); -Object.defineProperty(exports, 'd', { value: 'd' }); -Object.defineProperty(exports, '__esModule', { value: true }); -``` - -Value properties are also detected specifically: - -```js -Object.defineProperty(exports, 'a', { - value: 'no problem' -}); -``` - -To avoid matching getters that have side effects, any getter for an export name that does not support the forms above will -opt-out of the getter matching: - -```js -// DETECTS: NO EXPORTS -Object.defineProperty(exports, 'a', { - get () { - return 'nope'; - } -}); - -if (false) { - Object.defineProperty(module.exports, 'a', { - get () { - return dynamic(); - } - }) -} -``` - -Alternative object definition structures or getter function bodies are not detected: - -```js -// DETECTS: NO EXPORTS -Object.defineProperty(exports, 'a', { - enumerable: false, - get () { - return p; - } -}); -Object.defineProperty(exports, 'b', { - configurable: true, - get () { - return p; - } -}); -Object.defineProperty(exports, 'c', { - get: () => p -}); -Object.defineProperty(exports, 'd', { - enumerable: true, - get: function () { - return dynamic(); - } -}); -Object.defineProperty(exports, 'e', { - enumerable: true, - get () { - return 'str'; - } -}); -``` - -`Object.defineProperties` is also not supported. - -#### Exports Object Assignment - -A best-effort is made to detect `module.exports` object assignments, but because this is not a full parser, arbitrary expressions are not handled in the -object parsing process. - -Simple object definitions are supported: - -```js -// DETECTS EXPORTS: a, b, c -module.exports = { - a, - 'b': b, - c: c, - ...d -}; -``` - -Object properties that are not identifiers or string expressions will bail out of the object detection, while spreads are ignored: - -```js -// DETECTS EXPORTS: a, b -module.exports = { - a, - ...d, - b: require('c'), - c: "not detected since require('c') above bails the object detection" -} -``` - -`Object.defineProperties` is not currently supported either. - -#### module.exports reexport assignment - -Any `module.exports = require('mod')` assignment is detected as a reexport, but only the last one is returned: - -```js -// DETECTS REEXPORTS: c -module.exports = require('a'); -(module => module.exports = require('b'))(NOT_MODULE); -if (false) module.exports = require('c'); -``` - -This is to avoid over-classification in Webpack bundles with externals which include `module.exports = require('external')` in their source for every external dependency. - -In exports object assignment, any spread of `require()` are detected as multiple separate reexports: - -```js -// DETECTS REEXPORTS: a, b -module.exports = require('ignored'); -module.exports = { - ...require('a'), - ...require('b') -}; -``` - -#### Transpiler Re-exports - -For named exports, transpiler output works well with the rules described above. - -But for star re-exports, special care is taken to support common patterns of transpiler outputs from Babel and TypeScript as well as bundlers like RollupJS. -These reexport and star reexport patterns are restricted to only be detected at the top-level as provided by the direct output of these tools. - -For example, `export * from 'external'` is output by Babel as: - -```js -"use strict"; - -exports.__esModule = true; - -var _external = require("external"); - -Object.keys(_external).forEach(function (key) { - if (key === "default" || key === "__esModule") return; - exports[key] = _external[key]; -}); -``` - -Where the `var _external = require("external")` is specifically detected as well as the `Object.keys(_external)` statement, down to the exact -for of that entire expression including minor variations of the output. The `_external` and `key` identifiers are carefully matched in this -detection. - -Similarly for TypeScript, `export * from 'external'` is output as: - -```js -"use strict"; -function __export(m) { - for (var p in m) if (!exports.hasOwnProperty(p)) exports[p] = m[p]; -} -Object.defineProperty(exports, "__esModule", { value: true }); -__export(require("external")); -``` - -Where the `__export(require("external"))` statement is explicitly detected as a reexport, including variations `tslib.__export` and `__exportStar`. - -### Environment Support - -Node.js 10+, and [all browsers with Web Assembly support](https://caniuse.com/#feat=wasm). - -### JS Grammar Support - -* Token state parses all line comments, block comments, strings, template strings, blocks, parens and punctuators. -* Division operator / regex token ambiguity is handled via backtracking checks against punctuator prefixes, including closing brace or paren backtracking. -* Always correctly parses valid JS source, but may parse invalid JS source without errors. - -### Benchmarks - -Benchmarks can be run with `npm run bench`. - -Current results: - -JS Build: - -``` -Module load time -> 4ms -Cold Run, All Samples -test/samples/*.js (3635 KiB) -> 299ms - -Warm Runs (average of 25 runs) -test/samples/angular.js (1410 KiB) -> 13.96ms -test/samples/angular.min.js (303 KiB) -> 4.72ms -test/samples/d3.js (553 KiB) -> 6.76ms -test/samples/d3.min.js (250 KiB) -> 4ms -test/samples/magic-string.js (34 KiB) -> 0.64ms -test/samples/magic-string.min.js (20 KiB) -> 0ms -test/samples/rollup.js (698 KiB) -> 8.48ms -test/samples/rollup.min.js (367 KiB) -> 5.36ms - -Warm Runs, All Samples (average of 25 runs) -test/samples/*.js (3635 KiB) -> 40.28ms -``` - -Wasm Build: -``` -Module load time -> 10ms -Cold Run, All Samples -test/samples/*.js (3635 KiB) -> 43ms - -Warm Runs (average of 25 runs) -test/samples/angular.js (1410 KiB) -> 9.32ms -test/samples/angular.min.js (303 KiB) -> 3.16ms -test/samples/d3.js (553 KiB) -> 5ms -test/samples/d3.min.js (250 KiB) -> 2.32ms -test/samples/magic-string.js (34 KiB) -> 0.16ms -test/samples/magic-string.min.js (20 KiB) -> 0ms -test/samples/rollup.js (698 KiB) -> 6.28ms -test/samples/rollup.min.js (367 KiB) -> 3.6ms - -Warm Runs, All Samples (average of 25 runs) -test/samples/*.js (3635 KiB) -> 27.76ms -``` - -### Wasm Build Steps - -To build download the WASI SDK from https://github.com/WebAssembly/wasi-sdk/releases. - -The Makefile assumes the existence of "wasi-sdk-11.0" and "wabt" (optional) as sibling folders to this project. - -The build through the Makefile is then run via `make lib/lexer.wasm`, which can also be triggered via `npm run build-wasm` to create `dist/lexer.js`. - -On Windows it may be preferable to use the Linux subsystem. - -After the Web Assembly build, the CJS build can be triggered via `npm run build`. - -Optimization passes are run with [Binaryen](https://github.com/WebAssembly/binaryen) prior to publish to reduce the Web Assembly footprint. - -### License - -MIT - -[travis-url]: https://travis-ci.org/guybedford/es-module-lexer -[travis-image]: https://travis-ci.org/guybedford/es-module-lexer.svg?branch=master +# CJS Module Lexer + +[![Build Status][travis-image]][travis-url] + +A [very fast](#benchmarks) JS CommonJS module syntax lexer used to detect the most likely list of named exports of a CommonJS module. + +Outputs the list of named exports (`exports.name = ...`) and possible module reexports (`module.exports = require('...')`), including the common transpiler variations of these cases. + +Forked from https://github.com/guybedford/es-module-lexer. + +_Comprehensively handles the JS language grammar while remaining small and fast. - ~90ms per MB of JS cold and ~15ms per MB of JS warm, [see benchmarks](#benchmarks) for more info._ + +### Project Status + +This project is used in Node.js core for detecting the named exports available when importing a CJS module into ESM, and is maintained for this purpose. + +PRs will be accepted and upstreamed for parser bugs, performance improvements or new syntax support only. + +_Detection patterns for this project are **frozen**_. This is because adding any new export detection patterns would result in fragmented backwards-compatibility. Specifically, it would be very difficult to figure out why an ES module named export for CommonJS might work in newer Node.js versions but not older versions. This problem would only be discovered downstream of module authors, with the fix for module authors being to then have to understand which patterns in this project provide full backwards-compatibily. Rather, by fully freezing the detected patterns, if it works in any Node.js version it will work in any other. Build tools can also reliably treat the supported syntax for this project as a part of their output target for ensuring syntax support. + +### Usage + +``` +npm install cjs-module-lexer +``` + +For use in CommonJS: + +```js +const { parse } = require('cjs-module-lexer'); + +// `init` return a promise for parity with the ESM API, but you do not have to call it + +const { exports, reexports } = parse(` + // named exports detection + module.exports.a = 'a'; + (function () { + exports.b = 'b'; + })(); + Object.defineProperty(exports, 'c', { value: 'c' }); + /* exports.d = 'not detected'; */ + + // reexports detection + if (maybe) module.exports = require('./dep1.js'); + if (another) module.exports = require('./dep2.js'); + + // literal exports assignments + module.exports = { a, b: c, d, 'e': f } + + // __esModule detection + Object.defineProperty(module.exports, '__esModule', { value: true }) +`); + +// exports === ['a', 'b', 'c', '__esModule'] +// reexports === ['./dep1.js', './dep2.js'] +``` + +When using the ESM version, Wasm is supported instead: + +```js +import { parse, init } from 'cjs-module-lexer'; +// init() needs to be called and waited upon, or use initSync() to compile +// Wasm blockingly and synchronously. +await init(); +const { exports, reexports } = parse(source); +``` + +The Wasm build is around 1.5x faster and without a cold start. + +### Grammar + +CommonJS exports matches are run against the source token stream. + +The token grammar is: + +``` +IDENTIFIER: As defined by ECMA-262, without support for identifier `\` escapes, filtered to remove strict reserved words: + "implements", "interface", "let", "package", "private", "protected", "public", "static", "yield", "enum" + +STRING_LITERAL: A `"` or `'` bounded ECMA-262 string literal. + +MODULE_EXPORTS: `module` `.` `exports` + +EXPORTS_IDENTIFIER: MODULE_EXPORTS_IDENTIFIER | `exports` + +EXPORTS_DOT_ASSIGN: EXPORTS_IDENTIFIER `.` IDENTIFIER `=` + +EXPORTS_LITERAL_COMPUTED_ASSIGN: EXPORTS_IDENTIFIER `[` STRING_LITERAL `]` `=` + +EXPORTS_LITERAL_PROP: (IDENTIFIER (`:` IDENTIFIER)?) | (STRING_LITERAL `:` IDENTIFIER) + +EXPORTS_SPREAD: `...` (IDENTIFIER | REQUIRE) + +EXPORTS_MEMBER: EXPORTS_DOT_ASSIGN | EXPORTS_LITERAL_COMPUTED_ASSIGN + +EXPORTS_DEFINE: `Object` `.` `defineProperty `(` EXPORTS_IDENFITIER `,` STRING_LITERAL + +EXPORTS_DEFINE_VALUE: EXPORTS_DEFINE `, {` + (`enumerable: true,`)? + ( + `value:` | + `get` (`: function` IDENTIFIER? )? `() {` return IDENTIFIER (`.` IDENTIFIER | `[` STRING_LITERAL `]`)? `;`? `}` `,`? + ) + `})` + +EXPORTS_LITERAL: MODULE_EXPORTS `=` `{` (EXPORTS_LITERAL_PROP | EXPORTS_SPREAD) `,`)+ `}` + +REQUIRE: `require` `(` STRING_LITERAL `)` + +EXPORTS_ASSIGN: (`var` | `const` | `let`) IDENTIFIER `=` (`_interopRequireWildcard (`)? REQUIRE + +MODULE_EXPORTS_ASSIGN: MODULE_EXPORTS `=` REQUIRE + +EXPORT_STAR: (`__export` | `__exportStar`) `(` REQUIRE + +EXPORT_STAR_LIB: `Object.keys(` IDENTIFIER$1 `).forEach(function (` IDENTIFIER$2 `) {` + ( + ( + `if (` IDENTIFIER$2 `===` ( `'default'` | `"default"` ) `||` IDENTIFIER$2 `===` ( '__esModule' | `"__esModule"` ) `) return` `;`? + ( + (`if (Object` `.prototype`? `.hasOwnProperty.call(` IDENTIFIER `, ` IDENTIFIER$2 `)) return` `;`?)? + (`if (` IDENTIFIER$2 `in` EXPORTS_IDENTIFIER `&&` EXPORTS_IDENTIFIER `[` IDENTIFIER$2 `] ===` IDENTIFIER$1 `[` IDENTIFIER$2 `]) return` `;`)? + )? + ) | + `if (` IDENTIFIER$2 `!==` ( `'default'` | `"default"` ) (`&& !` (`Object` `.prototype`? `.hasOwnProperty.call(` IDENTIFIER `, ` IDENTIFIER$2 `)` | IDENTIFIER `.hasOwnProperty(` IDENTIFIER$2 `)`))? `)` + ) + ( + EXPORTS_IDENTIFIER `[` IDENTIFIER$2 `] =` IDENTIFIER$1 `[` IDENTIFIER$2 `]` `;`? | + `Object.defineProperty(` EXPORTS_IDENTIFIER `, ` IDENTIFIER$2 `, { enumerable: true, get` (`: function` IDENTIFIER? )? `() { return ` IDENTIFIER$1 `[` IDENTIFIER$2 `]` `;`? `}` `,`? `})` `;`? + ) + `})` +``` + +Spacing between tokens is taken to be any ECMA-262 whitespace, ECMA-262 block comment or ECMA-262 line comment. + +* The returned export names are taken to be the combination of: + 1. All `IDENTIFIER` and `STRING_LITERAL` slots for `EXPORTS_MEMBER` and `EXPORTS_LITERAL` matches. + 2. The first `STRING_LITERAL` slot for all `EXPORTS_DEFINE_VALUE` matches where that same string is not an `EXPORTS_DEFINE` match that is not also an `EXPORTS_DEFINE_VALUE` match. +* The reexport specifiers are taken to be the combination of: + 1. The `REQUIRE` matches of the last matched of either `MODULE_EXPORTS_ASSIGN` or `EXPORTS_LITERAL`. + 2. All _top-level_ `EXPORT_STAR` `REQUIRE` matches and `EXPORTS_ASSIGN` matches whose `IDENTIFIER` also matches the first `IDENTIFIER` in `EXPORT_STAR_LIB`. + +### Parsing Examples + +#### Named Exports Parsing + +The basic matching rules for named exports are `exports.name`, `exports['name']` or `Object.defineProperty(exports, 'name', ...)`. This matching is done without scope analysis and regardless of the expression position: + +```js +// DETECTS EXPORTS: a, b +(function (exports) { + exports.a = 'a'; + exports['b'] = 'b'; +})(exports); +``` + +Because there is no scope analysis, the above detection may overclassify: + +```js +// DETECTS EXPORTS: a, b, c +(function (exports, Object) { + exports.a = 'a'; + exports['b'] = 'b'; + if (false) + exports.c = 'c'; +})(NOT_EXPORTS, NOT_OBJECT); +``` + +It will in turn underclassify in cases where the identifiers are renamed: + +```js +// DETECTS: NO EXPORTS +(function (e) { + e.a = 'a'; + e['b'] = 'b'; +})(exports); +``` + +#### Getter Exports Parsing + +`Object.defineProperty` is detected for specifically value and getter forms returning an identifier or member expression: + +```js +// DETECTS: a, b, c, d, __esModule +Object.defineProperty(exports, 'a', { + enumerable: true, + get: function () { + return q.p; + } +}); +Object.defineProperty(exports, 'b', { + enumerable: true, + get: function () { + return q['p']; + } +}); +Object.defineProperty(exports, 'c', { + enumerable: true, + get () { + return b; + } +}); +Object.defineProperty(exports, 'd', { value: 'd' }); +Object.defineProperty(exports, '__esModule', { value: true }); +``` + +Value properties are also detected specifically: + +```js +Object.defineProperty(exports, 'a', { + value: 'no problem' +}); +``` + +To avoid matching getters that have side effects, any getter for an export name that does not support the forms above will +opt-out of the getter matching: + +```js +// DETECTS: NO EXPORTS +Object.defineProperty(exports, 'a', { + get () { + return 'nope'; + } +}); + +if (false) { + Object.defineProperty(module.exports, 'a', { + get () { + return dynamic(); + } + }) +} +``` + +Alternative object definition structures or getter function bodies are not detected: + +```js +// DETECTS: NO EXPORTS +Object.defineProperty(exports, 'a', { + enumerable: false, + get () { + return p; + } +}); +Object.defineProperty(exports, 'b', { + configurable: true, + get () { + return p; + } +}); +Object.defineProperty(exports, 'c', { + get: () => p +}); +Object.defineProperty(exports, 'd', { + enumerable: true, + get: function () { + return dynamic(); + } +}); +Object.defineProperty(exports, 'e', { + enumerable: true, + get () { + return 'str'; + } +}); +``` + +`Object.defineProperties` is also not supported. + +#### Exports Object Assignment + +A best-effort is made to detect `module.exports` object assignments, but because this is not a full parser, arbitrary expressions are not handled in the +object parsing process. + +Simple object definitions are supported: + +```js +// DETECTS EXPORTS: a, b, c +module.exports = { + a, + 'b': b, + c: c, + ...d +}; +``` + +Object properties that are not identifiers or string expressions will bail out of the object detection, while spreads are ignored: + +```js +// DETECTS EXPORTS: a, b +module.exports = { + a, + ...d, + b: require('c'), + c: "not detected since require('c') above bails the object detection" +} +``` + +`Object.defineProperties` is not currently supported either. + +#### module.exports reexport assignment + +Any `module.exports = require('mod')` assignment is detected as a reexport, but only the last one is returned: + +```js +// DETECTS REEXPORTS: c +module.exports = require('a'); +(module => module.exports = require('b'))(NOT_MODULE); +if (false) module.exports = require('c'); +``` + +This is to avoid over-classification in Webpack bundles with externals which include `module.exports = require('external')` in their source for every external dependency. + +In exports object assignment, any spread of `require()` are detected as multiple separate reexports: + +```js +// DETECTS REEXPORTS: a, b +module.exports = require('ignored'); +module.exports = { + ...require('a'), + ...require('b') +}; +``` + +#### Transpiler Re-exports + +For named exports, transpiler output works well with the rules described above. + +But for star re-exports, special care is taken to support common patterns of transpiler outputs from Babel and TypeScript as well as bundlers like RollupJS. +These reexport and star reexport patterns are restricted to only be detected at the top-level as provided by the direct output of these tools. + +For example, `export * from 'external'` is output by Babel as: + +```js +"use strict"; + +exports.__esModule = true; + +var _external = require("external"); + +Object.keys(_external).forEach(function (key) { + if (key === "default" || key === "__esModule") return; + exports[key] = _external[key]; +}); +``` + +Where the `var _external = require("external")` is specifically detected as well as the `Object.keys(_external)` statement, down to the exact +for of that entire expression including minor variations of the output. The `_external` and `key` identifiers are carefully matched in this +detection. + +Similarly for TypeScript, `export * from 'external'` is output as: + +```js +"use strict"; +function __export(m) { + for (var p in m) if (!exports.hasOwnProperty(p)) exports[p] = m[p]; +} +Object.defineProperty(exports, "__esModule", { value: true }); +__export(require("external")); +``` + +Where the `__export(require("external"))` statement is explicitly detected as a reexport, including variations `tslib.__export` and `__exportStar`. + +### Environment Support + +Node.js 10+, and [all browsers with Web Assembly support](https://caniuse.com/#feat=wasm). + +### JS Grammar Support + +* Token state parses all line comments, block comments, strings, template strings, blocks, parens and punctuators. +* Division operator / regex token ambiguity is handled via backtracking checks against punctuator prefixes, including closing brace or paren backtracking. +* Always correctly parses valid JS source, but may parse invalid JS source without errors. + +### Benchmarks + +Benchmarks can be run with `npm run bench`. + +Current results: + +JS Build: + +``` +Module load time +> 4ms +Cold Run, All Samples +test/samples/*.js (3635 KiB) +> 299ms + +Warm Runs (average of 25 runs) +test/samples/angular.js (1410 KiB) +> 13.96ms +test/samples/angular.min.js (303 KiB) +> 4.72ms +test/samples/d3.js (553 KiB) +> 6.76ms +test/samples/d3.min.js (250 KiB) +> 4ms +test/samples/magic-string.js (34 KiB) +> 0.64ms +test/samples/magic-string.min.js (20 KiB) +> 0ms +test/samples/rollup.js (698 KiB) +> 8.48ms +test/samples/rollup.min.js (367 KiB) +> 5.36ms + +Warm Runs, All Samples (average of 25 runs) +test/samples/*.js (3635 KiB) +> 40.28ms +``` + +Wasm Build: +``` +Module load time +> 10ms +Cold Run, All Samples +test/samples/*.js (3635 KiB) +> 43ms + +Warm Runs (average of 25 runs) +test/samples/angular.js (1410 KiB) +> 9.32ms +test/samples/angular.min.js (303 KiB) +> 3.16ms +test/samples/d3.js (553 KiB) +> 5ms +test/samples/d3.min.js (250 KiB) +> 2.32ms +test/samples/magic-string.js (34 KiB) +> 0.16ms +test/samples/magic-string.min.js (20 KiB) +> 0ms +test/samples/rollup.js (698 KiB) +> 6.28ms +test/samples/rollup.min.js (367 KiB) +> 3.6ms + +Warm Runs, All Samples (average of 25 runs) +test/samples/*.js (3635 KiB) +> 27.76ms +``` + +### Wasm Build Steps + +The build uses docker and make, they must be installed first. + +To build the lexer wasm run `npm run build-wasm`. + +Optimization passes are run with [Binaryen](https://github.com/WebAssembly/binaryen) +prior to publish to reduce the Web Assembly footprint. + +After building the lexer wasm, build the final distribution components +(lexer.js and lexer.mjs) by running `npm run build`. + +If you need to build lib/lexer.wat (optional) you must first install +[wabt](https://github.com/WebAssembly/wabt) as a sibling folder to this +project. The wat file is then build by running `make lib/lexer.wat` + +### License + +MIT + +[travis-url]: https://travis-ci.org/guybedford/es-module-lexer +[travis-image]: https://travis-ci.org/guybedford/es-module-lexer.svg?branch=master diff --git a/deps/cjs-module-lexer/dist/lexer-external.js b/deps/cjs-module-lexer/dist/lexer-external.js deleted file mode 100644 index 1e5f49de6221c1..00000000000000 --- a/deps/cjs-module-lexer/dist/lexer-external.js +++ /dev/null @@ -1,91 +0,0 @@ -"use strict"; - -exports.init = init; -exports.parse = parse; -let wasm; -const isLE = new Uint8Array(new Uint16Array([1]).buffer)[0] === 1; -function parse(source, name = '@') { - if (!wasm) throw new Error('Not initialized'); - const len = source.length + 1; - - // need 2 bytes per code point plus analysis space so we double again - const extraMem = (wasm.__heap_base.value || wasm.__heap_base) + len * 4 - wasm.memory.buffer.byteLength; - if (extraMem > 0) wasm.memory.grow(Math.ceil(extraMem / 65536)); - const addr = wasm.sa(len); - (isLE ? copyLE : copyBE)(source, new Uint16Array(wasm.memory.buffer, addr, len)); - const err_code = wasm.parseCJS(addr, source.length, 0, 0, 0); - if (err_code) { - const err = new Error(`Parse error ${name}${wasm.e()}:${source.slice(0, wasm.e()).split('\n').length}:${wasm.e() - source.lastIndexOf('\n', wasm.e() - 1)}`); - Object.assign(err, { - idx: wasm.e() - }); - if (err_code === 5 || err_code === 6 || err_code === 7) Object.assign(err, { - code: 'ERR_LEXER_ESM_SYNTAX' - }); - throw err; - } - let exports = new Set(), - reexports = new Set(), - unsafeGetters = new Set(); - while (wasm.rre()) { - const reexptStr = decode(source.slice(wasm.res(), wasm.ree())); - if (reexptStr) reexports.add(reexptStr); - } - while (wasm.ru()) unsafeGetters.add(decode(source.slice(wasm.us(), wasm.ue()))); - while (wasm.re()) { - let exptStr = decode(source.slice(wasm.es(), wasm.ee())); - if (exptStr !== undefined && !unsafeGetters.has(exptStr)) exports.add(exptStr); - } - return { - exports: [...exports], - reexports: [...reexports] - }; -} -function decode(str) { - if (str[0] === '"' || str[0] === '\'') { - try { - const decoded = (0, eval)(str); - // Filter to exclude non-matching UTF-16 surrogate strings - for (let i = 0; i < decoded.length; i++) { - const surrogatePrefix = decoded.charCodeAt(i) & 0xFC00; - if (surrogatePrefix < 0xD800) { - // Not a surrogate - continue; - } else if (surrogatePrefix === 0xD800) { - // Validate surrogate pair - if ((decoded.charCodeAt(++i) & 0xFC00) !== 0xDC00) return; - } else { - // Out-of-range surrogate code (above 0xD800) - return; - } - } - return decoded; - } catch {} - } else { - return str; - } -} -function copyBE(src, outBuf16) { - const len = src.length; - let i = 0; - while (i < len) { - const ch = src.charCodeAt(i); - outBuf16[i++] = (ch & 0xff) << 8 | ch >>> 8; - } -} -function copyLE(src, outBuf16) { - const len = src.length; - let i = 0; - while (i < len) outBuf16[i] = src.charCodeAt(i++); -} -let initPromise; -function init() { - if (initPromise) return initPromise; - return initPromise = (async () => { - const compiled = await WebAssembly.compile((await import('node:fs')).readFileSync(new URL(import.meta.resolve('../lib/lexer.wasm')))); - const { - exports - } = await WebAssembly.instantiate(compiled); - wasm = exports; - })(); -} \ No newline at end of file diff --git a/deps/cjs-module-lexer/dist/lexer.js b/deps/cjs-module-lexer/dist/lexer.js index 7c9b7124ae224d..d59aaf1bc47851 100644 --- a/deps/cjs-module-lexer/dist/lexer.js +++ b/deps/cjs-module-lexer/dist/lexer.js @@ -1 +1 @@ -"use strict";exports.init=init;exports.initSync=initSync;exports.parse=parse;let A;const Q=1===new Uint8Array(new Uint16Array([1]).buffer)[0];function parse(g,I="@"){if(!A)throw new Error("Not initialized");const D=g.length+1,N=(A.__heap_base.value||A.__heap_base)+4*D-A.memory.buffer.byteLength;N>0&&A.memory.grow(Math.ceil(N/65536));const k=A.sa(D);(Q?C:E)(g,new Uint16Array(A.memory.buffer,k,D));const w=A.parseCJS(k,g.length,0,0,0);if(w){const Q=new Error(`Parse error ${I}${A.e()}:${g.slice(0,A.e()).split("\n").length}:${A.e()-g.lastIndexOf("\n",A.e()-1)}`);throw Object.assign(Q,{idx:A.e()}),5!==w&&6!==w&&7!==w||Object.assign(Q,{code:"ERR_LEXER_ESM_SYNTAX"}),Q}let H=new Set,J=new Set,o=new Set;for(;A.rre();){const Q=B(g.slice(A.res(),A.ree()));Q&&J.add(Q)}for(;A.ru();)o.add(B(g.slice(A.us(),A.ue())));for(;A.re();){let Q=B(g.slice(A.es(),A.ee()));void 0===Q||o.has(Q)||H.add(Q)}return{exports:[...H],reexports:[...J]}}function B(A){if('"'!==A[0]&&"'"!==A[0])return A;try{const Q=(0,eval)(A);for(let A=0;A>>8}}function C(A,Q){const B=A.length;let E=0;for(;EA.charCodeAt(0))}let I;function init(){return I||(I=(async()=>{const Q=await WebAssembly.compile(g()),{exports:B}=await WebAssembly.instantiate(Q);A=B})())}function initSync(){if(A)return;const Q=new WebAssembly.Module(g()),{exports:B}=new WebAssembly.Instance(Q);A=B} \ No newline at end of file +"use strict";exports.init=init;exports.initSync=initSync;exports.parse=parse;let A;const B=1===new Uint8Array(new Uint16Array([1]).buffer)[0];function parse(I,C="@"){if(!A)throw new Error("Not initialized");const w=I.length+1,D=(A.__heap_base.value||A.__heap_base)+4*w-A.memory.buffer.byteLength;D>0&&A.memory.grow(Math.ceil(D/65536));const G=A.sa(w);(B?g:E)(I,new Uint16Array(A.memory.buffer,G,w));const S=A.parseCJS(G,I.length,0,0,0);if(S){const B=new Error(`Parse error ${C}${A.e()}:${I.slice(0,A.e()).split("\n").length}:${A.e()-I.lastIndexOf("\n",A.e()-1)}`);throw Object.assign(B,{idx:A.e()}),5!==S&&6!==S&&7!==S||Object.assign(B,{code:"ERR_LEXER_ESM_SYNTAX"}),B}let R=new Set,o=new Set,H=new Set;for(;A.rre();){const B=Q(I.slice(A.res(),A.ree()));B&&o.add(B)}for(;A.ru();)H.add(Q(I.slice(A.us(),A.ue())));for(;A.re();){let B=Q(I.slice(A.es(),A.ee()));void 0===B||H.has(B)||R.add(B)}return{exports:[...R],reexports:[...o]}}function Q(A){if('"'!==A[0]&&"'"!==A[0])return A;try{const B=(0,eval)(A);for(let A=0;A>>8}}function g(A,B){const Q=A.length;let E=0;for(;EA.charCodeAt(0))}let C;function init(){return C||(C=(async()=>{const B=await WebAssembly.compile(I()),{exports:Q}=await WebAssembly.instantiate(B);A=Q})())}function initSync(){if(A)return;const B=new WebAssembly.Module(I()),{exports:Q}=new WebAssembly.Instance(B);A=Q} \ No newline at end of file diff --git a/deps/cjs-module-lexer/dist/lexer.mjs b/deps/cjs-module-lexer/dist/lexer.mjs index 964b74cfa781b6..806bb46287259d 100644 --- a/deps/cjs-module-lexer/dist/lexer.mjs +++ b/deps/cjs-module-lexer/dist/lexer.mjs @@ -1,2 +1,2 @@ -/* cjs-module-lexer 1.4.1 */ -let A;const Q=1===new Uint8Array(new Uint16Array([1]).buffer)[0];export function parse(g,I="@"){if(!A)throw new Error("Not initialized");const D=g.length+1,N=(A.__heap_base.value||A.__heap_base)+4*D-A.memory.buffer.byteLength;N>0&&A.memory.grow(Math.ceil(N/65536));const k=A.sa(D);(Q?C:E)(g,new Uint16Array(A.memory.buffer,k,D));const w=A.parseCJS(k,g.length,0,0,0);if(w){const Q=new Error(`Parse error ${I}${A.e()}:${g.slice(0,A.e()).split("\n").length}:${A.e()-g.lastIndexOf("\n",A.e()-1)}`);throw Object.assign(Q,{idx:A.e()}),5!==w&&6!==w&&7!==w||Object.assign(Q,{code:"ERR_LEXER_ESM_SYNTAX"}),Q}let H=new Set,J=new Set,o=new Set;for(;A.rre();){const Q=B(g.slice(A.res(),A.ree()));Q&&J.add(Q)}for(;A.ru();)o.add(B(g.slice(A.us(),A.ue())));for(;A.re();){let Q=B(g.slice(A.es(),A.ee()));void 0===Q||o.has(Q)||H.add(Q)}return{exports:[...H],reexports:[...J]}}function B(A){if('"'!==A[0]&&"'"!==A[0])return A;try{const Q=(0,eval)(A);for(let A=0;A>>8}}function C(A,Q){const B=A.length;let E=0;for(;EA.charCodeAt(0))}let I;export function init(){return I||(I=(async()=>{const Q=await WebAssembly.compile(g()),{exports:B}=await WebAssembly.instantiate(Q);A=B})())}export function initSync(){if(A)return;const Q=new WebAssembly.Module(g()),{exports:B}=new WebAssembly.Instance(Q);A=B} \ No newline at end of file +/* cjs-module-lexer 2.0.0 */ +let A;const B=1===new Uint8Array(new Uint16Array([1]).buffer)[0];export function parse(I,C="@"){if(!A)throw new Error("Not initialized");const w=I.length+1,D=(A.__heap_base.value||A.__heap_base)+4*w-A.memory.buffer.byteLength;D>0&&A.memory.grow(Math.ceil(D/65536));const G=A.sa(w);(B?g:E)(I,new Uint16Array(A.memory.buffer,G,w));const S=A.parseCJS(G,I.length,0,0,0);if(S){const B=new Error(`Parse error ${C}${A.e()}:${I.slice(0,A.e()).split("\n").length}:${A.e()-I.lastIndexOf("\n",A.e()-1)}`);throw Object.assign(B,{idx:A.e()}),5!==S&&6!==S&&7!==S||Object.assign(B,{code:"ERR_LEXER_ESM_SYNTAX"}),B}let R=new Set,o=new Set,H=new Set;for(;A.rre();){const B=Q(I.slice(A.res(),A.ree()));B&&o.add(B)}for(;A.ru();)H.add(Q(I.slice(A.us(),A.ue())));for(;A.re();){let B=Q(I.slice(A.es(),A.ee()));void 0===B||H.has(B)||R.add(B)}return{exports:[...R],reexports:[...o]}}function Q(A){if('"'!==A[0]&&"'"!==A[0])return A;try{const B=(0,eval)(A);for(let A=0;A>>8}}function g(A,B){const Q=A.length;let E=0;for(;EA.charCodeAt(0))}let C;export function init(){return C||(C=(async()=>{const B=await WebAssembly.compile(I()),{exports:Q}=await WebAssembly.instantiate(B);A=Q})())}export function initSync(){if(A)return;const B=new WebAssembly.Module(I()),{exports:Q}=new WebAssembly.Instance(B);A=Q} \ No newline at end of file diff --git a/deps/cjs-module-lexer/lexer.js b/deps/cjs-module-lexer/lexer.js old mode 100644 new mode 100755 diff --git a/deps/cjs-module-lexer/src/.babelrc b/deps/cjs-module-lexer/src/.babelrc new file mode 100755 index 00000000000000..ac89f952694c6e --- /dev/null +++ b/deps/cjs-module-lexer/src/.babelrc @@ -0,0 +1,10 @@ +{ + "plugins": [ + [ + "@babel/plugin-transform-modules-commonjs", + { + "strict": true + }, + ] + ] +} diff --git a/deps/cjs-module-lexer/src/CHANGELOG.md b/deps/cjs-module-lexer/src/CHANGELOG.md new file mode 100644 index 00000000000000..5a24d193fd487f --- /dev/null +++ b/deps/cjs-module-lexer/src/CHANGELOG.md @@ -0,0 +1,40 @@ +1.2.2 +- Fix RollupJS reexports bug (https://github.com/nodejs/cjs-module-lexer/pull/59) + +1.2.1 +- Support Unicode escapes in strings (https://github.com/nodejs/cjs-module-lexer/pull/55) +- Filter export strings to valid surrogate pairs (https://github.com/nodejs/cjs-module-lexer/pull/56) + +1.2.0 +- Support for non-identifier exports (https://github.com/nodejs/cjs-module-lexer/pull/54, @nicolo-ribaudo) + +1.1.1 +- Better support for Babel reexport getter function forms (https://github.com/nodejs/cjs-module-lexer/issues/50) +- Support Babel interopRequireWildcard reexports patterns (https://github.com/nodejs/cjs-module-lexer/issues/52) + +1.1.0 +- Support for Babel reexport conflict filter (https://github.com/nodejs/cjs-module-lexer/issues/36, @nicolo-ribaudo) +- Support trailing commas in getter patterns (https://github.com/nodejs/cjs-module-lexer/issues/31) +- Support for RollupJS reexports property checks (https://github.com/nodejs/cjs-module-lexer/issues/38) + +1.0.0 +- Unsafe getter tracking (https://github.com/nodejs/cjs-module-lexer/pull/29) + +0.6.0 +- API-only breaking change: Unify JS and Wasm interfaces (https://github.com/nodejs/cjs-module-lexer/pull/27) +- Add type definitions (https://github.com/nodejs/cjs-module-lexer/pull/28) + +0.5.2 +- Support named getter functions (https://github.com/nodejs/cjs-module-lexer/pull/26) + +0.5.1: +- Feature: Implement specific reexport getter forms (https://github.com/nodejs/cjs-module-lexer/pull/25) + +0.5.0 +- Breaking Change: No longer emit Object.defineProperty exports (https://github.com/nodejs/cjs-module-lexer/pull/24) +- Doc: Update link to WASI SDK (https://github.com/nodejs/cjs-module-lexer/pull/19) + +0.4.3 +- Support for Babel 7.12 reexports (https://github.com/nodejs/cjs-module-lexer/pull/16) +- Support module.exports = { ...require('x') } reexports (https://github.com/nodejs/cjs-module-lexer/pull/18) +- "if" keyword space parsing in exports matching (https://github.com/nodejs/cjs-module-lexer/pull/17) diff --git a/deps/cjs-module-lexer/src/LICENSE b/deps/cjs-module-lexer/src/LICENSE new file mode 100755 index 00000000000000..935b357962d08b --- /dev/null +++ b/deps/cjs-module-lexer/src/LICENSE @@ -0,0 +1,10 @@ +MIT License +----------- + +Copyright (C) 2018-2020 Guy Bedford + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/deps/cjs-module-lexer/src/Makefile b/deps/cjs-module-lexer/src/Makefile new file mode 100755 index 00000000000000..9a75850f73ae65 --- /dev/null +++ b/deps/cjs-module-lexer/src/Makefile @@ -0,0 +1,16 @@ +WASM2WAT := ../wabt/bin/wasm2wat +WASM_OPT := ../binaryen/bin/wasm-opt + +.PHONY: optimize clean + +lib/lexer.wat: lib/lexer.wasm + $(WASM2WAT) lib/lexer.wasm -o lib/lexer.wat + +lib/lexer.wasm: include-wasm/cjs-module-lexer.h src/lexer.c | lib/ + node build/wasm.js --docker + +lib/: + @mkdir -p $@ + +clean: + $(RM) lib/* diff --git a/deps/cjs-module-lexer/src/README.md b/deps/cjs-module-lexer/src/README.md new file mode 100755 index 00000000000000..cc7ca50cc72243 --- /dev/null +++ b/deps/cjs-module-lexer/src/README.md @@ -0,0 +1,464 @@ +# CJS Module Lexer + +[![Build Status][travis-image]][travis-url] + +A [very fast](#benchmarks) JS CommonJS module syntax lexer used to detect the most likely list of named exports of a CommonJS module. + +Outputs the list of named exports (`exports.name = ...`) and possible module reexports (`module.exports = require('...')`), including the common transpiler variations of these cases. + +Forked from https://github.com/guybedford/es-module-lexer. + +_Comprehensively handles the JS language grammar while remaining small and fast. - ~90ms per MB of JS cold and ~15ms per MB of JS warm, [see benchmarks](#benchmarks) for more info._ + +### Project Status + +This project is used in Node.js core for detecting the named exports available when importing a CJS module into ESM, and is maintained for this purpose. + +PRs will be accepted and upstreamed for parser bugs, performance improvements or new syntax support only. + +_Detection patterns for this project are **frozen**_. This is because adding any new export detection patterns would result in fragmented backwards-compatibility. Specifically, it would be very difficult to figure out why an ES module named export for CommonJS might work in newer Node.js versions but not older versions. This problem would only be discovered downstream of module authors, with the fix for module authors being to then have to understand which patterns in this project provide full backwards-compatibily. Rather, by fully freezing the detected patterns, if it works in any Node.js version it will work in any other. Build tools can also reliably treat the supported syntax for this project as a part of their output target for ensuring syntax support. + +### Usage + +``` +npm install cjs-module-lexer +``` + +For use in CommonJS: + +```js +const { parse } = require('cjs-module-lexer'); + +// `init` return a promise for parity with the ESM API, but you do not have to call it + +const { exports, reexports } = parse(` + // named exports detection + module.exports.a = 'a'; + (function () { + exports.b = 'b'; + })(); + Object.defineProperty(exports, 'c', { value: 'c' }); + /* exports.d = 'not detected'; */ + + // reexports detection + if (maybe) module.exports = require('./dep1.js'); + if (another) module.exports = require('./dep2.js'); + + // literal exports assignments + module.exports = { a, b: c, d, 'e': f } + + // __esModule detection + Object.defineProperty(module.exports, '__esModule', { value: true }) +`); + +// exports === ['a', 'b', 'c', '__esModule'] +// reexports === ['./dep1.js', './dep2.js'] +``` + +When using the ESM version, Wasm is supported instead: + +```js +import { parse, init } from 'cjs-module-lexer'; +// init() needs to be called and waited upon, or use initSync() to compile +// Wasm blockingly and synchronously. +await init(); +const { exports, reexports } = parse(source); +``` + +The Wasm build is around 1.5x faster and without a cold start. + +### Grammar + +CommonJS exports matches are run against the source token stream. + +The token grammar is: + +``` +IDENTIFIER: As defined by ECMA-262, without support for identifier `\` escapes, filtered to remove strict reserved words: + "implements", "interface", "let", "package", "private", "protected", "public", "static", "yield", "enum" + +STRING_LITERAL: A `"` or `'` bounded ECMA-262 string literal. + +MODULE_EXPORTS: `module` `.` `exports` + +EXPORTS_IDENTIFIER: MODULE_EXPORTS_IDENTIFIER | `exports` + +EXPORTS_DOT_ASSIGN: EXPORTS_IDENTIFIER `.` IDENTIFIER `=` + +EXPORTS_LITERAL_COMPUTED_ASSIGN: EXPORTS_IDENTIFIER `[` STRING_LITERAL `]` `=` + +EXPORTS_LITERAL_PROP: (IDENTIFIER (`:` IDENTIFIER)?) | (STRING_LITERAL `:` IDENTIFIER) + +EXPORTS_SPREAD: `...` (IDENTIFIER | REQUIRE) + +EXPORTS_MEMBER: EXPORTS_DOT_ASSIGN | EXPORTS_LITERAL_COMPUTED_ASSIGN + +EXPORTS_DEFINE: `Object` `.` `defineProperty `(` EXPORTS_IDENFITIER `,` STRING_LITERAL + +EXPORTS_DEFINE_VALUE: EXPORTS_DEFINE `, {` + (`enumerable: true,`)? + ( + `value:` | + `get` (`: function` IDENTIFIER? )? `() {` return IDENTIFIER (`.` IDENTIFIER | `[` STRING_LITERAL `]`)? `;`? `}` `,`? + ) + `})` + +EXPORTS_LITERAL: MODULE_EXPORTS `=` `{` (EXPORTS_LITERAL_PROP | EXPORTS_SPREAD) `,`)+ `}` + +REQUIRE: `require` `(` STRING_LITERAL `)` + +EXPORTS_ASSIGN: (`var` | `const` | `let`) IDENTIFIER `=` (`_interopRequireWildcard (`)? REQUIRE + +MODULE_EXPORTS_ASSIGN: MODULE_EXPORTS `=` REQUIRE + +EXPORT_STAR: (`__export` | `__exportStar`) `(` REQUIRE + +EXPORT_STAR_LIB: `Object.keys(` IDENTIFIER$1 `).forEach(function (` IDENTIFIER$2 `) {` + ( + ( + `if (` IDENTIFIER$2 `===` ( `'default'` | `"default"` ) `||` IDENTIFIER$2 `===` ( '__esModule' | `"__esModule"` ) `) return` `;`? + ( + (`if (Object` `.prototype`? `.hasOwnProperty.call(` IDENTIFIER `, ` IDENTIFIER$2 `)) return` `;`?)? + (`if (` IDENTIFIER$2 `in` EXPORTS_IDENTIFIER `&&` EXPORTS_IDENTIFIER `[` IDENTIFIER$2 `] ===` IDENTIFIER$1 `[` IDENTIFIER$2 `]) return` `;`)? + )? + ) | + `if (` IDENTIFIER$2 `!==` ( `'default'` | `"default"` ) (`&& !` (`Object` `.prototype`? `.hasOwnProperty.call(` IDENTIFIER `, ` IDENTIFIER$2 `)` | IDENTIFIER `.hasOwnProperty(` IDENTIFIER$2 `)`))? `)` + ) + ( + EXPORTS_IDENTIFIER `[` IDENTIFIER$2 `] =` IDENTIFIER$1 `[` IDENTIFIER$2 `]` `;`? | + `Object.defineProperty(` EXPORTS_IDENTIFIER `, ` IDENTIFIER$2 `, { enumerable: true, get` (`: function` IDENTIFIER? )? `() { return ` IDENTIFIER$1 `[` IDENTIFIER$2 `]` `;`? `}` `,`? `})` `;`? + ) + `})` +``` + +Spacing between tokens is taken to be any ECMA-262 whitespace, ECMA-262 block comment or ECMA-262 line comment. + +* The returned export names are taken to be the combination of: + 1. All `IDENTIFIER` and `STRING_LITERAL` slots for `EXPORTS_MEMBER` and `EXPORTS_LITERAL` matches. + 2. The first `STRING_LITERAL` slot for all `EXPORTS_DEFINE_VALUE` matches where that same string is not an `EXPORTS_DEFINE` match that is not also an `EXPORTS_DEFINE_VALUE` match. +* The reexport specifiers are taken to be the combination of: + 1. The `REQUIRE` matches of the last matched of either `MODULE_EXPORTS_ASSIGN` or `EXPORTS_LITERAL`. + 2. All _top-level_ `EXPORT_STAR` `REQUIRE` matches and `EXPORTS_ASSIGN` matches whose `IDENTIFIER` also matches the first `IDENTIFIER` in `EXPORT_STAR_LIB`. + +### Parsing Examples + +#### Named Exports Parsing + +The basic matching rules for named exports are `exports.name`, `exports['name']` or `Object.defineProperty(exports, 'name', ...)`. This matching is done without scope analysis and regardless of the expression position: + +```js +// DETECTS EXPORTS: a, b +(function (exports) { + exports.a = 'a'; + exports['b'] = 'b'; +})(exports); +``` + +Because there is no scope analysis, the above detection may overclassify: + +```js +// DETECTS EXPORTS: a, b, c +(function (exports, Object) { + exports.a = 'a'; + exports['b'] = 'b'; + if (false) + exports.c = 'c'; +})(NOT_EXPORTS, NOT_OBJECT); +``` + +It will in turn underclassify in cases where the identifiers are renamed: + +```js +// DETECTS: NO EXPORTS +(function (e) { + e.a = 'a'; + e['b'] = 'b'; +})(exports); +``` + +#### Getter Exports Parsing + +`Object.defineProperty` is detected for specifically value and getter forms returning an identifier or member expression: + +```js +// DETECTS: a, b, c, d, __esModule +Object.defineProperty(exports, 'a', { + enumerable: true, + get: function () { + return q.p; + } +}); +Object.defineProperty(exports, 'b', { + enumerable: true, + get: function () { + return q['p']; + } +}); +Object.defineProperty(exports, 'c', { + enumerable: true, + get () { + return b; + } +}); +Object.defineProperty(exports, 'd', { value: 'd' }); +Object.defineProperty(exports, '__esModule', { value: true }); +``` + +Value properties are also detected specifically: + +```js +Object.defineProperty(exports, 'a', { + value: 'no problem' +}); +``` + +To avoid matching getters that have side effects, any getter for an export name that does not support the forms above will +opt-out of the getter matching: + +```js +// DETECTS: NO EXPORTS +Object.defineProperty(exports, 'a', { + get () { + return 'nope'; + } +}); + +if (false) { + Object.defineProperty(module.exports, 'a', { + get () { + return dynamic(); + } + }) +} +``` + +Alternative object definition structures or getter function bodies are not detected: + +```js +// DETECTS: NO EXPORTS +Object.defineProperty(exports, 'a', { + enumerable: false, + get () { + return p; + } +}); +Object.defineProperty(exports, 'b', { + configurable: true, + get () { + return p; + } +}); +Object.defineProperty(exports, 'c', { + get: () => p +}); +Object.defineProperty(exports, 'd', { + enumerable: true, + get: function () { + return dynamic(); + } +}); +Object.defineProperty(exports, 'e', { + enumerable: true, + get () { + return 'str'; + } +}); +``` + +`Object.defineProperties` is also not supported. + +#### Exports Object Assignment + +A best-effort is made to detect `module.exports` object assignments, but because this is not a full parser, arbitrary expressions are not handled in the +object parsing process. + +Simple object definitions are supported: + +```js +// DETECTS EXPORTS: a, b, c +module.exports = { + a, + 'b': b, + c: c, + ...d +}; +``` + +Object properties that are not identifiers or string expressions will bail out of the object detection, while spreads are ignored: + +```js +// DETECTS EXPORTS: a, b +module.exports = { + a, + ...d, + b: require('c'), + c: "not detected since require('c') above bails the object detection" +} +``` + +`Object.defineProperties` is not currently supported either. + +#### module.exports reexport assignment + +Any `module.exports = require('mod')` assignment is detected as a reexport, but only the last one is returned: + +```js +// DETECTS REEXPORTS: c +module.exports = require('a'); +(module => module.exports = require('b'))(NOT_MODULE); +if (false) module.exports = require('c'); +``` + +This is to avoid over-classification in Webpack bundles with externals which include `module.exports = require('external')` in their source for every external dependency. + +In exports object assignment, any spread of `require()` are detected as multiple separate reexports: + +```js +// DETECTS REEXPORTS: a, b +module.exports = require('ignored'); +module.exports = { + ...require('a'), + ...require('b') +}; +``` + +#### Transpiler Re-exports + +For named exports, transpiler output works well with the rules described above. + +But for star re-exports, special care is taken to support common patterns of transpiler outputs from Babel and TypeScript as well as bundlers like RollupJS. +These reexport and star reexport patterns are restricted to only be detected at the top-level as provided by the direct output of these tools. + +For example, `export * from 'external'` is output by Babel as: + +```js +"use strict"; + +exports.__esModule = true; + +var _external = require("external"); + +Object.keys(_external).forEach(function (key) { + if (key === "default" || key === "__esModule") return; + exports[key] = _external[key]; +}); +``` + +Where the `var _external = require("external")` is specifically detected as well as the `Object.keys(_external)` statement, down to the exact +for of that entire expression including minor variations of the output. The `_external` and `key` identifiers are carefully matched in this +detection. + +Similarly for TypeScript, `export * from 'external'` is output as: + +```js +"use strict"; +function __export(m) { + for (var p in m) if (!exports.hasOwnProperty(p)) exports[p] = m[p]; +} +Object.defineProperty(exports, "__esModule", { value: true }); +__export(require("external")); +``` + +Where the `__export(require("external"))` statement is explicitly detected as a reexport, including variations `tslib.__export` and `__exportStar`. + +### Environment Support + +Node.js 10+, and [all browsers with Web Assembly support](https://caniuse.com/#feat=wasm). + +### JS Grammar Support + +* Token state parses all line comments, block comments, strings, template strings, blocks, parens and punctuators. +* Division operator / regex token ambiguity is handled via backtracking checks against punctuator prefixes, including closing brace or paren backtracking. +* Always correctly parses valid JS source, but may parse invalid JS source without errors. + +### Benchmarks + +Benchmarks can be run with `npm run bench`. + +Current results: + +JS Build: + +``` +Module load time +> 4ms +Cold Run, All Samples +test/samples/*.js (3635 KiB) +> 299ms + +Warm Runs (average of 25 runs) +test/samples/angular.js (1410 KiB) +> 13.96ms +test/samples/angular.min.js (303 KiB) +> 4.72ms +test/samples/d3.js (553 KiB) +> 6.76ms +test/samples/d3.min.js (250 KiB) +> 4ms +test/samples/magic-string.js (34 KiB) +> 0.64ms +test/samples/magic-string.min.js (20 KiB) +> 0ms +test/samples/rollup.js (698 KiB) +> 8.48ms +test/samples/rollup.min.js (367 KiB) +> 5.36ms + +Warm Runs, All Samples (average of 25 runs) +test/samples/*.js (3635 KiB) +> 40.28ms +``` + +Wasm Build: +``` +Module load time +> 10ms +Cold Run, All Samples +test/samples/*.js (3635 KiB) +> 43ms + +Warm Runs (average of 25 runs) +test/samples/angular.js (1410 KiB) +> 9.32ms +test/samples/angular.min.js (303 KiB) +> 3.16ms +test/samples/d3.js (553 KiB) +> 5ms +test/samples/d3.min.js (250 KiB) +> 2.32ms +test/samples/magic-string.js (34 KiB) +> 0.16ms +test/samples/magic-string.min.js (20 KiB) +> 0ms +test/samples/rollup.js (698 KiB) +> 6.28ms +test/samples/rollup.min.js (367 KiB) +> 3.6ms + +Warm Runs, All Samples (average of 25 runs) +test/samples/*.js (3635 KiB) +> 27.76ms +``` + +### Wasm Build Steps + +The build uses docker and make, they must be installed first. + +To build the lexer wasm run `npm run build-wasm`. + +Optimization passes are run with [Binaryen](https://github.com/WebAssembly/binaryen) +prior to publish to reduce the Web Assembly footprint. + +After building the lexer wasm, build the final distribution components +(lexer.js and lexer.mjs) by running `npm run build`. + +If you need to build lib/lexer.wat (optional) you must first install +[wabt](https://github.com/WebAssembly/wabt) as a sibling folder to this +project. The wat file is then build by running `make lib/lexer.wat` + +### License + +MIT + +[travis-url]: https://travis-ci.org/guybedford/es-module-lexer +[travis-image]: https://travis-ci.org/guybedford/es-module-lexer.svg?branch=master diff --git a/deps/cjs-module-lexer/src/build.js b/deps/cjs-module-lexer/src/build.js new file mode 100755 index 00000000000000..da024d1a6cb780 --- /dev/null +++ b/deps/cjs-module-lexer/src/build.js @@ -0,0 +1,25 @@ +const fs = require('fs'); +const terser = require('terser'); + +const MINIFY = true; + +try { fs.mkdirSync('./dist'); } +catch (e) {} + +const wasmBuffer = fs.readFileSync('./lib/lexer.wasm'); +const jsSource = fs.readFileSync('./src/lexer.js').toString(); +const pjson = JSON.parse(fs.readFileSync('./package.json').toString()); + +const jsSourceProcessed = jsSource.replace('WASM_BINARY', wasmBuffer.toString('base64')); + +const minified = MINIFY && terser.minify(jsSourceProcessed, { + module: true, + output: { + preamble: `/* cjs-module-lexer ${pjson.version} */` + } +}); + +if (minified.error) + throw minified.error; + +fs.writeFileSync('./dist/lexer.mjs', minified ? minified.code : jsSourceProcessed); diff --git a/deps/cjs-module-lexer/src/build/Makefile b/deps/cjs-module-lexer/src/build/Makefile new file mode 100755 index 00000000000000..f13c390b8cdb96 --- /dev/null +++ b/deps/cjs-module-lexer/src/build/Makefile @@ -0,0 +1,13 @@ +lib/lexer.wasm: include-wasm/cjs-module-lexer.h src/lexer.c + @mkdir -p lib + clang --sysroot=/usr/share/wasi-sysroot -target wasm32-unknown-wasi src/lexer.c -I include-wasm -o lib/lexer.wasm -nostartfiles \ + -Wl,-z,stack-size=13312,--no-entry,--compress-relocations,--strip-all,--export=__heap_base,\ + --export=parseCJS,--export=sa,--export=e,--export=re,--export=es,--export=ee,--export=rre,--export=ree,--export=res,--export=ru,--export=us,--export=ue \ + -Wno-logical-op-parentheses -Wno-parentheses \ + -Oz + +optimize: lib/lexer.wasm + ${WASM_OPT} -Oz lib/lexer.wasm -o lib/lexer.wasm + +clean: + rm lib/* diff --git a/deps/cjs-module-lexer/src/build/wasm.js b/deps/cjs-module-lexer/src/build/wasm.js new file mode 100644 index 00000000000000..58fb64041d7691 --- /dev/null +++ b/deps/cjs-module-lexer/src/build/wasm.js @@ -0,0 +1,54 @@ +'use strict' + +const WASM_BUILDER_CONTAINER = 'ghcr.io/nodejs/wasm-builder@sha256:975f391d907e42a75b8c72eb77c782181e941608687d4d8694c3e9df415a0970' // v0.0.9 + +const WASM_OPT = './wasm-opt' + +const { execSync } = require('node:child_process') +const { writeFileSync, readFileSync, existsSync, mkdirSync } = require('node:fs') +const { join, resolve } = require('node:path') + +const ROOT = resolve(__dirname, '../') + +let platform = process.env.WASM_PLATFORM +if (!platform && process.argv[2]) { + platform = execSync('docker info -f "{{.OSType}}/{{.Architecture}}"').toString().trim() +} + +if (process.argv[2] === '--docker') { + let cmd = `docker run --rm --platform=${platform.toString().trim()} ` + if (process.platform === 'linux') { + cmd += ` --user ${process.getuid()}:${process.getegid()}` + } + + if (!existsSync(`${ROOT}/dist`)){ + mkdirSync(`${ROOT}/dist`); + } + + cmd += ` --mount type=bind,source=${ROOT}/lib,target=/home/node/build/lib \ + --mount type=bind,source=${ROOT}/src,target=/home/node/build/src \ + --mount type=bind,source=${ROOT}/dist,target=/home/node/build/dist \ + --mount type=bind,source=${ROOT}/node_modules,target=/home/node/build/node_modules \ + --mount type=bind,source=${ROOT}/build/wasm.js,target=/home/node/build/wasm.js \ + --mount type=bind,source=${ROOT}/build/Makefile,target=/home/node/build/Makefile \ + --mount type=bind,source=${ROOT}/build.js,target=/home/node/build/build.js \ + --mount type=bind,source=${ROOT}/package.json,target=/home/node/build/package.json \ + --mount type=bind,source=${ROOT}/include-wasm,target=/home/node/build/include-wasm \ + -t ${WASM_BUILDER_CONTAINER} node wasm.js` + console.log(`> ${cmd}\n\n`) + execSync(cmd, { stdio: 'inherit' }) + process.exit(0) +} + +const hasOptimizer = (function () { + try { execSync(`${WASM_OPT} --version`); return true } catch (error) { return false } +})() + +// Build wasm binary +console.log('Building wasm'); +execSync(`make lib/lexer.wasm`, { stdio: 'inherit' }) +if (hasOptimizer) { + console.log('Optimizing wasm'); + execSync(`make optimize`, { stdio: 'inherit' }) +} +execSync(`node build.js`, { stdio: 'inherit' }) diff --git a/deps/cjs-module-lexer/src/include-wasm/cjs-module-lexer.h b/deps/cjs-module-lexer/src/include-wasm/cjs-module-lexer.h new file mode 100755 index 00000000000000..ab2adca3c751ab --- /dev/null +++ b/deps/cjs-module-lexer/src/include-wasm/cjs-module-lexer.h @@ -0,0 +1,238 @@ +#include +#include +#include +#include +#include + +extern unsigned char __heap_base; + +const uint16_t* source = (void*)&__heap_base; +uint32_t parse_error; + +struct Slice { + const uint16_t* start; + const uint16_t* end; + struct Slice* next; +}; +typedef struct Slice Slice; + +struct StarExportBinding { + const uint16_t* specifier_start; + const uint16_t* specifier_end; + const uint16_t* id_start; + const uint16_t* id_end; +}; +typedef struct StarExportBinding StarExportBinding; + +Slice* first_export = NULL; +Slice* export_read_head = NULL; +Slice* export_write_head = NULL; +Slice* first_reexport = NULL; +Slice* reexport_read_head = NULL; +Slice* reexport_write_head = NULL; +Slice* first_unsafe_getter = NULL; +Slice* unsafe_getter_read_head = NULL; +Slice* unsafe_getter_write_head = NULL; +void* analysis_base; +void* analysis_head; + +void bail (uint32_t err); + +// allocateSource +const uint16_t* sa (uint32_t utf16Len) { + const uint16_t* sourceEnd = source + utf16Len + 1; + // ensure source is null terminated + *(uint16_t*)(source + utf16Len) = '\0'; + analysis_base = (void*)sourceEnd; + analysis_head = analysis_base; + first_export = NULL; + export_write_head = NULL; + export_read_head = NULL; + first_reexport = NULL; + reexport_write_head = NULL; + reexport_read_head = NULL; + first_unsafe_getter = NULL; + unsafe_getter_write_head = NULL; + unsafe_getter_read_head = NULL; + return source; +} + +// getErr +uint32_t e () { + return parse_error; +} + +// getExportStart +uint32_t es () { + return export_read_head->start - source; +} +// getExportEnd +uint32_t ee () { + return export_read_head->end - source; +} +// getReexportStart +uint32_t res () { + return reexport_read_head->start - source; +} +// getReexportEnd +uint32_t ree () { + return reexport_read_head->end - source; +} +// getUnsafeGetterStart +uint32_t us () { + return unsafe_getter_read_head->start - source; +} +// getUnsafeGetterEnd +uint32_t ue () { + return unsafe_getter_read_head->end - source; +} +// readExport +bool re () { + if (export_read_head == NULL) + export_read_head = first_export; + else + export_read_head = export_read_head->next; + if (export_read_head == NULL) + return false; + return true; +} +// readReexport +bool rre () { + if (reexport_read_head == NULL) + reexport_read_head = first_reexport; + else + reexport_read_head = reexport_read_head->next; + if (reexport_read_head == NULL) + return false; + return true; +} +// readUnsafeGetter +bool ru () { + if (unsafe_getter_read_head == NULL) + unsafe_getter_read_head = first_unsafe_getter; + else + unsafe_getter_read_head = unsafe_getter_read_head->next; + if (unsafe_getter_read_head == NULL) + return false; + return true; +} + +void _addExport (const uint16_t* start, const uint16_t* end) { + Slice* export = (Slice*)(analysis_head); + analysis_head = analysis_head + sizeof(Slice); + if (export_write_head == NULL) + first_export = export; + else + export_write_head->next = export; + export_write_head = export; + export->start = start; + export->end = end; + export->next = NULL; +} +void _addReexport (const uint16_t* start, const uint16_t* end) { + Slice* reexport = (Slice*)(analysis_head); + analysis_head = analysis_head + sizeof(Slice); + if (reexport_write_head == NULL) + first_reexport = reexport; + else + reexport_write_head->next = reexport; + reexport_write_head = reexport; + reexport->start = start; + reexport->end = end; + reexport->next = NULL; +} +void _addUnsafeGetter (const uint16_t* start, const uint16_t* end) { + Slice* unsafe_getter = (Slice*)(analysis_head); + analysis_head = analysis_head + sizeof(Slice); + if (unsafe_getter_write_head == NULL) + first_unsafe_getter = unsafe_getter; + else + unsafe_getter_write_head->next = unsafe_getter; + unsafe_getter_write_head = unsafe_getter; + unsafe_getter->start = start; + unsafe_getter->end = end; + unsafe_getter->next = NULL; +} +void _clearReexports () { + reexport_write_head = NULL; + first_reexport = NULL; +} +void (*addExport)(const uint16_t*, const uint16_t*) = &_addExport; +void (*addReexport)(const uint16_t*, const uint16_t*) = &_addReexport; +void (*addUnsafeGetter)(const uint16_t*, const uint16_t*) = &_addUnsafeGetter; +void (*clearReexports)() = &_clearReexports; +uint32_t parseCJS (uint16_t* source, uint32_t sourceLen, void (*addExport)(const uint16_t* start, const uint16_t* end), void (*addReexport)(const uint16_t* start, const uint16_t* end), void (*addUnsafeGetter)(const uint16_t*, const uint16_t*), void (*clearReexports)()); + +enum RequireType { + Import, + ExportAssign, + ExportStar +}; + +void tryBacktrackAddStarExportBinding (uint16_t* pos); +bool tryParseRequire (enum RequireType requireType); +void tryParseLiteralExports (); +bool readExportsOrModuleDotExports (uint16_t ch); +void tryParseModuleExportsDotAssign (); +void tryParseExportsDotAssign (bool assign); +void tryParseObjectDefineOrKeys (bool keys); +bool identifier (uint16_t ch); + +void throwIfImportStatement (); +void throwIfExportStatement (); + +void readImportString (const uint16_t* ss, uint16_t ch); +uint16_t readExportAs (uint16_t* startPos, uint16_t* endPos); + +uint16_t commentWhitespace (); +void stringLiteral (uint16_t quote); +void regularExpression (); +void templateString (); +void blockComment (); +void lineComment (); + +uint16_t readToWsOrPunctuator (uint16_t ch); + +uint32_t fullCharCode (uint16_t ch); +uint32_t fullCharCodeAtLast (uint16_t* pos); +bool isIdentifierStart (uint32_t code); +bool isIdentifierChar (uint32_t code); +int charCodeByteLen (uint32_t ch); + +bool isBr (uint16_t c); +bool isBrOrWs (uint16_t c); +bool isBrOrWsOrPunctuator (uint16_t c); +bool isBrOrWsOrPunctuatorNotDot (uint16_t c); + +bool str_eq2 (uint16_t* pos, uint16_t c1, uint16_t c2); +bool str_eq3 (uint16_t* pos, uint16_t c1, uint16_t c2, uint16_t c3); +bool str_eq4 (uint16_t* pos, uint16_t c1, uint16_t c2, uint16_t c3, uint16_t c4); +bool str_eq5 (uint16_t* pos, uint16_t c1, uint16_t c2, uint16_t c3, uint16_t c4, uint16_t c5); +bool str_eq6 (uint16_t* pos, uint16_t c1, uint16_t c2, uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6); +bool str_eq7 (uint16_t* pos, uint16_t c1, uint16_t c2, uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6, uint16_t c7); +bool str_eq8 (uint16_t* pos, uint16_t c1, uint16_t c2, uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6, uint16_t c7, uint16_t c8); +bool str_eq9 (uint16_t* pos, uint16_t c1, uint16_t c2, uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6, uint16_t c7, uint16_t c8, uint16_t c9); +bool str_eq10 (uint16_t* pos, uint16_t c1, uint16_t c2, uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6, uint16_t c7, uint16_t c8, uint16_t c9, uint16_t c10); +bool str_eq13 (uint16_t* pos, uint16_t c1, uint16_t c2, uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6, uint16_t c7, uint16_t c8, uint16_t c9, uint16_t c10, uint16_t c11, uint16_t c12, uint16_t c13); +bool str_eq18 (uint16_t* pos, uint16_t c1, uint16_t c2, uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6, uint16_t c7, uint16_t c8, uint16_t c9, uint16_t c10, uint16_t c11, uint16_t c12, uint16_t c13, uint16_t c14, uint16_t c15, uint16_t c16, uint16_t c17, uint16_t c18); +bool str_eq22 (uint16_t* pos, uint16_t c1, uint16_t c2, uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6, uint16_t c7, uint16_t c8, uint16_t c9, uint16_t c10, uint16_t c11, uint16_t c12, uint16_t c13, uint16_t c14, uint16_t c15, uint16_t c16, uint16_t c17, uint16_t c18, uint16_t c19, uint16_t c20, uint16_t c21, uint16_t c22); + +bool readPrecedingKeyword2(uint16_t* pos, uint16_t c1, uint16_t c2); +bool readPrecedingKeyword3(uint16_t* pos, uint16_t c1, uint16_t c2, uint16_t c3); +bool readPrecedingKeyword4(uint16_t* pos, uint16_t c1, uint16_t c2, uint16_t c3, uint16_t c4); +bool readPrecedingKeyword5(uint16_t* pos, uint16_t c1, uint16_t c2, uint16_t c3, uint16_t c4, uint16_t c5); +bool readPrecedingKeyword6(uint16_t* pos, uint16_t c1, uint16_t c2, uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6); +bool readPrecedingKeyword7(uint16_t* pos, uint16_t c1, uint16_t c2, uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6, uint16_t c7); + +bool keywordStart (uint16_t* pos); +bool isExpressionKeyword (uint16_t* pos); +bool isParenKeyword (uint16_t* pos); +bool isPunctuator (uint16_t charCode); +bool isExpressionPunctuator (uint16_t charCode); +bool isExpressionTerminator (uint16_t* pos); + +void nextChar (uint16_t ch); +void nextCharSurrogate (uint16_t ch); +uint16_t readChar (); + +void syntaxError (uint32_t code); diff --git a/deps/cjs-module-lexer/src/lexer.d.mts b/deps/cjs-module-lexer/src/lexer.d.mts new file mode 100644 index 00000000000000..98e01424641336 --- /dev/null +++ b/deps/cjs-module-lexer/src/lexer.d.mts @@ -0,0 +1 @@ +export * from './lexer.js' diff --git a/deps/cjs-module-lexer/lexer.d.ts b/deps/cjs-module-lexer/src/lexer.d.ts old mode 100644 new mode 100755 similarity index 96% rename from deps/cjs-module-lexer/lexer.d.ts rename to deps/cjs-module-lexer/src/lexer.d.ts index c7d8a523f36bc6..cda07d80af9d58 --- a/deps/cjs-module-lexer/lexer.d.ts +++ b/deps/cjs-module-lexer/src/lexer.d.ts @@ -1,8 +1,8 @@ -export interface Exports { - exports: string[]; - reexports: string[]; -} - -export declare function parse(source: string, name?: string): Exports; -export declare function init(): Promise; -export declare function initSync(): void; +export interface Exports { + exports: string[]; + reexports: string[]; +} + +export declare function parse(source: string, name?: string): Exports; +export declare function init(): Promise; +export declare function initSync(): void; diff --git a/deps/cjs-module-lexer/src/lexer.js b/deps/cjs-module-lexer/src/lexer.js new file mode 100755 index 00000000000000..aaf7dde807f98e --- /dev/null +++ b/deps/cjs-module-lexer/src/lexer.js @@ -0,0 +1,1443 @@ +let source, pos, end; +let openTokenDepth, + templateDepth, + lastTokenPos, + lastSlashWasDivision, + templateStack, + templateStackDepth, + openTokenPosStack, + openClassPosStack, + nextBraceIsClass, + starExportMap, + lastStarExportSpecifier, + _exports, + unsafeGetters, + reexports; + +function resetState () { + openTokenDepth = 0; + templateDepth = -1; + lastTokenPos = -1; + lastSlashWasDivision = false; + templateStack = new Array(1024); + templateStackDepth = 0; + openTokenPosStack = new Array(1024); + openClassPosStack = new Array(1024); + nextBraceIsClass = false; + starExportMap = Object.create(null); + lastStarExportSpecifier = null; + + _exports = new Set(); + unsafeGetters = new Set(); + reexports = new Set(); +} + +// RequireType +const Import = 0; +const ExportAssign = 1; +const ExportStar = 2; + +function parseCJS (source, name = '@') { + resetState(); + try { + parseSource(source); + } + catch (e) { + e.message += `\n at ${name}:${source.slice(0, pos).split('\n').length}:${pos - source.lastIndexOf('\n', pos - 1)}`; + e.loc = pos; + throw e; + } + const result = { exports: [..._exports].filter(expt => expt !== undefined && !unsafeGetters.has(expt)), reexports: [...reexports].filter(reexpt => reexpt !== undefined) }; + resetState(); + return result; +} + +function decode (str) { + if (str[0] === '"' || str[0] === '\'') { + try { + const decoded = (0, eval)(str); + // Filter to exclude non-matching UTF-16 surrogate strings + for (let i = 0; i < decoded.length; i++) { + const surrogatePrefix = decoded.charCodeAt(i) & 0xFC00; + if (surrogatePrefix < 0xD800) { + // Not a surrogate + continue; + } + else if (surrogatePrefix === 0xD800) { + // Validate surrogate pair + if ((decoded.charCodeAt(++i) & 0xFC00) !== 0xDC00) + return; + } + else { + // Out-of-range surrogate code (above 0xD800) + return; + } + } + return decoded; + } + catch {} + } + else { + return str; + } +} + +function parseSource (cjsSource) { + source = cjsSource; + pos = -1; + end = source.length - 1; + let ch = 0; + + // Handle #! + if (source.charCodeAt(0) === 35/*#*/ && source.charCodeAt(1) === 33/*!*/) { + if (source.length === 2) + return true; + pos += 2; + while (pos++ < end) { + ch = source.charCodeAt(pos); + if (ch === 10/*\n*/ || ch === 13/*\r*/) + break; + } + } + + while (pos++ < end) { + ch = source.charCodeAt(pos); + + if (ch === 32 || ch < 14 && ch > 8) + continue; + + if (openTokenDepth === 0) { + switch (ch) { + case 105/*i*/: + if (source.startsWith('mport', pos + 1) && keywordStart(pos)) + throwIfImportStatement(); + lastTokenPos = pos; + continue; + case 114/*r*/: + const startPos = pos; + if (tryParseRequire(Import) && keywordStart(startPos)) + tryBacktrackAddStarExportBinding(startPos - 1); + lastTokenPos = pos; + continue; + case 95/*_*/: + if (source.startsWith('interopRequireWildcard', pos + 1) && (keywordStart(pos) || source.charCodeAt(pos - 1) === 46/*.*/)) { + const startPos = pos; + pos += 23; + if (source.charCodeAt(pos) === 40/*(*/) { + pos++; + openTokenPosStack[openTokenDepth++] = lastTokenPos; + if (tryParseRequire(Import) && keywordStart(startPos)) { + tryBacktrackAddStarExportBinding(startPos - 1); + } + } + } + else if (source.startsWith('_export', pos + 1) && (keywordStart(pos) || source.charCodeAt(pos - 1) === 46/*.*/)) { + pos += 8; + if (source.startsWith('Star', pos)) + pos += 4; + if (source.charCodeAt(pos) === 40/*(*/) { + openTokenPosStack[openTokenDepth++] = lastTokenPos; + if (source.charCodeAt(++pos) === 114/*r*/) + tryParseRequire(ExportStar); + } + } + lastTokenPos = pos; + continue; + } + } + + switch (ch) { + case 101/*e*/: + if (source.startsWith('xport', pos + 1) && keywordStart(pos)) { + if (source.charCodeAt(pos + 6) === 115/*s*/) + tryParseExportsDotAssign(false); + else if (openTokenDepth === 0) + throwIfExportStatement(); + } + break; + case 99/*c*/: + if (keywordStart(pos) && source.startsWith('lass', pos + 1) && isBrOrWs(source.charCodeAt(pos + 5))) + nextBraceIsClass = true; + break; + case 109/*m*/: + if (source.startsWith('odule', pos + 1) && keywordStart(pos)) + tryParseModuleExportsDotAssign(); + break; + case 79/*O*/: + if (source.startsWith('bject', pos + 1) && keywordStart(pos)) + tryParseObjectDefineOrKeys(openTokenDepth === 0); + break; + case 40/*(*/: + openTokenPosStack[openTokenDepth++] = lastTokenPos; + break; + case 41/*)*/: + if (openTokenDepth === 0) + throw new Error('Unexpected closing bracket.'); + openTokenDepth--; + break; + case 123/*{*/: + openClassPosStack[openTokenDepth] = nextBraceIsClass; + nextBraceIsClass = false; + openTokenPosStack[openTokenDepth++] = lastTokenPos; + break; + case 125/*}*/: + if (openTokenDepth === 0) + throw new Error('Unexpected closing brace.'); + if (openTokenDepth-- === templateDepth) { + templateDepth = templateStack[--templateStackDepth]; + templateString(); + } + else { + if (templateDepth !== -1 && openTokenDepth < templateDepth) + throw new Error('Unexpected closing brace.'); + } + break; + case 60/*>*/: + // TODO: