From f03e191bdad1b9615e62f067e948c4764f419efe Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Sun, 14 Apr 2024 15:19:45 +0900 Subject: [PATCH 1/5] Add support for ES2025 RegExp Duplicate named capturing groups --- acorn/src/acorn.d.ts | 2 +- acorn/src/regexp.js | 37 +++++++++++++++++++++++++++++++- bin/test262.unsupported-features | 1 - test/run.js | 1 + test/tests-regexp-2025.js | 17 +++++++++++++++ 5 files changed, 55 insertions(+), 3 deletions(-) create mode 100644 test/tests-regexp-2025.js diff --git a/acorn/src/acorn.d.ts b/acorn/src/acorn.d.ts index cf72b3704..4f7b383a3 100644 --- a/acorn/src/acorn.d.ts +++ b/acorn/src/acorn.d.ts @@ -573,7 +573,7 @@ export function tokenizer(input: string, options: Options): { [Symbol.iterator](): Iterator } -export type ecmaVersion = 3 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | 2022 | 2023 | 2024 | "latest" +export type ecmaVersion = 3 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | 2022 | 2023 | 2024 | 2025 | "latest" export interface Options { /** diff --git a/acorn/src/regexp.js b/acorn/src/regexp.js index 71babf7ae..be11e2538 100644 --- a/acorn/src/regexp.js +++ b/acorn/src/regexp.js @@ -23,6 +23,8 @@ export class RegExpValidationState { this.numCapturingParens = 0 this.maxBackReference = 0 this.groupNames = [] + this.groupNamesInDisjunction = [] + this.groupNamesInAlternative = [] this.backReferenceNames = [] } @@ -169,6 +171,8 @@ pp.regexp_pattern = function(state) { state.numCapturingParens = 0 state.maxBackReference = 0 state.groupNames.length = 0 + state.groupNamesInDisjunction.length = 0 + state.groupNamesInAlternative.length = 0 state.backReferenceNames.length = 0 this.regexp_disjunction(state) @@ -194,11 +198,27 @@ pp.regexp_pattern = function(state) { // https://www.ecma-international.org/ecma-262/8.0/#prod-Disjunction pp.regexp_disjunction = function(state) { + let upperGroupNamesInDisjunction + if (this.options.ecmaVersion >= 16) { + upperGroupNamesInDisjunction = state.groupNamesInDisjunction + state.groupNamesInDisjunction = [] + } + this.regexp_alternative(state) while (state.eat(0x7C /* | */)) { this.regexp_alternative(state) } + if (this.options.ecmaVersion >= 16) { + // Adds the group name that appears in current Disjunction + // as the group name of the current Alternative and upper Disjunction. + for (const groupName of state.groupNamesInDisjunction) { + upperGroupNamesInDisjunction.push(groupName) + state.groupNamesInAlternative.push(groupName) + } + state.groupNamesInDisjunction = upperGroupNamesInDisjunction + } + // Make the same message as V8. if (this.regexp_eatQuantifier(state, true)) { state.raise("Nothing to repeat") @@ -210,8 +230,18 @@ pp.regexp_disjunction = function(state) { // https://www.ecma-international.org/ecma-262/8.0/#prod-Alternative pp.regexp_alternative = function(state) { + let upperGroupNamesInAlternative + if (this.options.ecmaVersion >= 16) { + upperGroupNamesInAlternative = state.groupNamesInAlternative + state.groupNamesInAlternative = [...state.groupNamesInAlternative] + } + while (state.pos < state.source.length && this.regexp_eatTerm(state)) ; + + if (this.options.ecmaVersion >= 16) { + state.groupNamesInAlternative = upperGroupNamesInAlternative + } } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-Term @@ -448,10 +478,15 @@ pp.regexp_eatExtendedPatternCharacter = function(state) { pp.regexp_groupSpecifier = function(state) { if (state.eat(0x3F /* ? */)) { if (this.regexp_eatGroupName(state)) { - if (state.groupNames.indexOf(state.lastStringValue) !== -1) { + const groupNames = this.options.ecmaVersion >= 16 ? state.groupNamesInAlternative : state.groupNames + if (groupNames.indexOf(state.lastStringValue) !== -1) { state.raise("Duplicate capture group name") } state.groupNames.push(state.lastStringValue) + if (this.options.ecmaVersion >= 16) { + state.groupNamesInAlternative.push(state.lastStringValue) + state.groupNamesInDisjunction.push(state.lastStringValue) + } return } state.raise("Invalid group") diff --git a/bin/test262.unsupported-features b/bin/test262.unsupported-features index 5ab02064c..383077264 100644 --- a/bin/test262.unsupported-features +++ b/bin/test262.unsupported-features @@ -1,3 +1,2 @@ decorators import-assertions -regexp-duplicate-named-groups diff --git a/test/run.js b/test/run.js index 05087c483..0587bc571 100644 --- a/test/run.js +++ b/test/run.js @@ -15,6 +15,7 @@ require("./tests-regexp-2020.js"); require("./tests-regexp-2022.js"); require("./tests-regexp-2024.js"); + require("./tests-regexp-2025.js"); require("./tests-json-superset.js"); require("./tests-optional-catch-binding.js"); require("./tests-bigint.js"); diff --git a/test/tests-regexp-2025.js b/test/tests-regexp-2025.js new file mode 100644 index 000000000..dd010ef42 --- /dev/null +++ b/test/tests-regexp-2025.js @@ -0,0 +1,17 @@ +if (typeof exports !== "undefined") { + var test = require("./driver.js").test + var testFail = require("./driver.js").testFail +} + +test("/(?a)|(?b)/", {}, { ecmaVersion: 2025 }) +testFail("/(?a)|(?b)/", "Invalid regular expression: /(?a)|(?b)/: Duplicate capture group name (1:1)", { ecmaVersion: 2024 }) +testFail("/(?a)(?b)/", "Invalid regular expression: /(?a)(?b)/: Duplicate capture group name (1:1)", { ecmaVersion: 2025 }) +test("/(?:(?a)|(?b))\\k/", {}, { ecmaVersion: 2025 }) +testFail("/(?:(?a)|(?b))\\k/", "Invalid regular expression: /(?:(?a)|(?b))\\k/: Duplicate capture group name (1:1)", { ecmaVersion: 2024 }) +testFail("/(?:(?a)(?b))\\k/", "Invalid regular expression: /(?:(?a)(?b))\\k/: Duplicate capture group name (1:1)", { ecmaVersion: 2025 }) +test("/(?a)(?a)|(?b)(?b)/", {}, { ecmaVersion: 2025 }) +test("/(?a)|(?b)|(?c)/", {}, { ecmaVersion: 2025 }) +test("/(?a)|\\k/", {}, { ecmaVersion: 2025 }) +testFail("/(?a)|(?b)(?c)/", "Invalid regular expression: /(?a)|(?b)(?c)/: Duplicate capture group name (1:1)", { ecmaVersion: 2025 }) +testFail("/(?:(?a)|(?b))(?c)/", "Invalid regular expression: /(?:(?a)|(?b))(?c)/: Duplicate capture group name (1:1)", { ecmaVersion: 2025 }) +testFail("/(?:(?:(?a)|(?b))|(?:))(?c)/", "Invalid regular expression: /(?:(?:(?a)|(?b))|(?:))(?c)/: Duplicate capture group name (1:1)", { ecmaVersion: 2025 }) From f7a9609d438fbd560bf0b541afaf651a390062f7 Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Mon, 15 Apr 2024 11:45:59 +0900 Subject: [PATCH 2/5] add test case --- test/tests-regexp-2025.js | 1 + 1 file changed, 1 insertion(+) diff --git a/test/tests-regexp-2025.js b/test/tests-regexp-2025.js index dd010ef42..6eedc49e9 100644 --- a/test/tests-regexp-2025.js +++ b/test/tests-regexp-2025.js @@ -14,4 +14,5 @@ test("/(?a)|(?b)|(?c)/", {}, { ecmaVersion: 2025 }) test("/(?a)|\\k/", {}, { ecmaVersion: 2025 }) testFail("/(?a)|(?b)(?c)/", "Invalid regular expression: /(?a)|(?b)(?c)/: Duplicate capture group name (1:1)", { ecmaVersion: 2025 }) testFail("/(?:(?a)|(?b))(?c)/", "Invalid regular expression: /(?:(?a)|(?b))(?c)/: Duplicate capture group name (1:1)", { ecmaVersion: 2025 }) +testFail("/(?a)(?:(?b)|(?c))/", "Invalid regular expression: /(?a)(?:(?b)|(?c))/: Duplicate capture group name (1:1)", { ecmaVersion: 2025 }) testFail("/(?:(?:(?a)|(?b))|(?:))(?c)/", "Invalid regular expression: /(?:(?:(?a)|(?b))|(?:))(?c)/: Duplicate capture group name (1:1)", { ecmaVersion: 2025 }) From 971bcffa94dea8973793d5e72b4861fca1aa39e5 Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Mon, 15 Apr 2024 19:22:35 +0900 Subject: [PATCH 3/5] refactor --- acorn/src/regexp.js | 46 ++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/acorn/src/regexp.js b/acorn/src/regexp.js index be11e2538..dd72516ee 100644 --- a/acorn/src/regexp.js +++ b/acorn/src/regexp.js @@ -23,8 +23,7 @@ export class RegExpValidationState { this.numCapturingParens = 0 this.maxBackReference = 0 this.groupNames = [] - this.groupNamesInDisjunction = [] - this.groupNamesInAlternative = [] + this.groupNamesToAddToUpperScope = [] this.backReferenceNames = [] } @@ -171,8 +170,7 @@ pp.regexp_pattern = function(state) { state.numCapturingParens = 0 state.maxBackReference = 0 state.groupNames.length = 0 - state.groupNamesInDisjunction.length = 0 - state.groupNamesInAlternative.length = 0 + state.groupNamesToAddToUpperScope.length = 0 state.backReferenceNames.length = 0 this.regexp_disjunction(state) @@ -198,10 +196,11 @@ pp.regexp_pattern = function(state) { // https://www.ecma-international.org/ecma-262/8.0/#prod-Disjunction pp.regexp_disjunction = function(state) { - let upperGroupNamesInDisjunction + let groupNamesToAddToUpperUpperScope if (this.options.ecmaVersion >= 16) { - upperGroupNamesInDisjunction = state.groupNamesInDisjunction - state.groupNamesInDisjunction = [] + groupNamesToAddToUpperUpperScope = state.groupNamesToAddToUpperScope + // Clear groupNamesToAddToUpperScope to store the groupName added in this Disjunction. + state.groupNamesToAddToUpperScope = [] } this.regexp_alternative(state) @@ -210,13 +209,13 @@ pp.regexp_disjunction = function(state) { } if (this.options.ecmaVersion >= 16) { - // Adds the group name that appears in current Disjunction - // as the group name of the current Alternative and upper Disjunction. - for (const groupName of state.groupNamesInDisjunction) { - upperGroupNamesInDisjunction.push(groupName) - state.groupNamesInAlternative.push(groupName) + for (const groupName of state.groupNamesToAddToUpperScope) { + // Adds the groupName added in Disjunction to groupNames. + state.groupNames.push(groupName) + // Adds the groupName added with Disjunction to the upper scope. + groupNamesToAddToUpperUpperScope.push(groupName) } - state.groupNamesInDisjunction = upperGroupNamesInDisjunction + state.groupNamesToAddToUpperScope = groupNamesToAddToUpperUpperScope } // Make the same message as V8. @@ -230,17 +229,23 @@ pp.regexp_disjunction = function(state) { // https://www.ecma-international.org/ecma-262/8.0/#prod-Alternative pp.regexp_alternative = function(state) { - let upperGroupNamesInAlternative + let upperGroupNames if (this.options.ecmaVersion >= 16) { - upperGroupNamesInAlternative = state.groupNamesInAlternative - state.groupNamesInAlternative = [...state.groupNamesInAlternative] + upperGroupNames = [...state.groupNames] } while (state.pos < state.source.length && this.regexp_eatTerm(state)) ; if (this.options.ecmaVersion >= 16) { - state.groupNamesInAlternative = upperGroupNamesInAlternative + // Adds the groupName added with Alternative to the upper scope. + for (const groupName of state.groupNames) { + if (upperGroupNames.indexOf(groupName) === -1) { + state.groupNamesToAddToUpperScope.push(groupName) + } + } + // Reverts the groupNames so that the next adjacent Alt does not report duplicates. + state.groupNames = upperGroupNames } } @@ -478,15 +483,10 @@ pp.regexp_eatExtendedPatternCharacter = function(state) { pp.regexp_groupSpecifier = function(state) { if (state.eat(0x3F /* ? */)) { if (this.regexp_eatGroupName(state)) { - const groupNames = this.options.ecmaVersion >= 16 ? state.groupNamesInAlternative : state.groupNames - if (groupNames.indexOf(state.lastStringValue) !== -1) { + if (state.groupNames.indexOf(state.lastStringValue) !== -1) { state.raise("Duplicate capture group name") } state.groupNames.push(state.lastStringValue) - if (this.options.ecmaVersion >= 16) { - state.groupNamesInAlternative.push(state.lastStringValue) - state.groupNamesInDisjunction.push(state.lastStringValue) - } return } state.raise("Invalid group") From 589b29851d29478691bc09a7a388d9937c8dfb64 Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Mon, 15 Apr 2024 19:29:34 +0900 Subject: [PATCH 4/5] fix comment --- acorn/src/regexp.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/acorn/src/regexp.js b/acorn/src/regexp.js index dd72516ee..46bd5e7e2 100644 --- a/acorn/src/regexp.js +++ b/acorn/src/regexp.js @@ -212,7 +212,7 @@ pp.regexp_disjunction = function(state) { for (const groupName of state.groupNamesToAddToUpperScope) { // Adds the groupName added in Disjunction to groupNames. state.groupNames.push(groupName) - // Adds the groupName added with Disjunction to the upper scope. + // Adds the groupName added in Disjunction to the upper scope. groupNamesToAddToUpperUpperScope.push(groupName) } state.groupNamesToAddToUpperScope = groupNamesToAddToUpperUpperScope @@ -238,7 +238,7 @@ pp.regexp_alternative = function(state) { ; if (this.options.ecmaVersion >= 16) { - // Adds the groupName added with Alternative to the upper scope. + // Adds the groupName added in Alternative to the upper scope. for (const groupName of state.groupNames) { if (upperGroupNames.indexOf(groupName) === -1) { state.groupNamesToAddToUpperScope.push(groupName) From d376f70558f15a9bbae0b2fbc1dd193cd488a42a Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Tue, 16 Apr 2024 08:12:21 +0900 Subject: [PATCH 5/5] fix comment --- acorn/src/regexp.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/acorn/src/regexp.js b/acorn/src/regexp.js index 46bd5e7e2..c44ea7b84 100644 --- a/acorn/src/regexp.js +++ b/acorn/src/regexp.js @@ -244,7 +244,7 @@ pp.regexp_alternative = function(state) { state.groupNamesToAddToUpperScope.push(groupName) } } - // Reverts the groupNames so that the next adjacent Alt does not report duplicates. + // Reverts the groupNames so that the next adjacent Alternative does not report duplicates. state.groupNames = upperGroupNames } }