From f6c63b92add1b9b1a217d0cbb7a1bc4c19603e30 Mon Sep 17 00:00:00 2001 From: Michael Schmidt Date: Fri, 15 Dec 2023 18:33:22 +0100 Subject: [PATCH] More efficient regex replace (#2411) --- src/common/types/chainner-builtin.ts | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/common/types/chainner-builtin.ts b/src/common/types/chainner-builtin.ts index ca0c34436..def838799 100644 --- a/src/common/types/chainner-builtin.ts +++ b/src/common/types/chainner-builtin.ts @@ -189,25 +189,23 @@ const regexReplaceImpl = ( // rregex currently only supports byte offsets in matches. So we have to // match spans on UTF8 and then convert it back to Unicode. - const utf8 = Buffer.from(text, 'utf8'); - const toUTF16 = (offset: number) => { - return utf8.toString('utf8', 0, offset).length; - }; + const utf8 = new TextEncoder().encode(text); + const decoder = new TextDecoder(); let result = ''; - let lastIndex = 0; + let lastByteIndex = 0; for (const match of matches) { const full = match.get[0]; - result += text.slice(lastIndex, toUTF16(full.start)); + result += decoder.decode(utf8.slice(lastByteIndex, full.start)); const replacements = new Map(); match.get.forEach((m, i) => replacements.set(String(i), m.value)); Object.entries(match.name).forEach(([name, m]) => replacements.set(name, m.value)); result += replacement.replace(replacements); - lastIndex = toUTF16(full.end); + lastByteIndex = full.end; } - result += text.slice(lastIndex); + result += decoder.decode(utf8.slice(lastByteIndex)); return result; };