Skip to content
This repository has been archived by the owner on Sep 11, 2024. It is now read-only.

replace graphemer by unicode-segmenter #12617

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@
"filesize": "10.1.2",
"github-markdown-css": "^5.5.1",
"glob-to-regexp": "^0.4.1",
"graphemer": "^1.4.0",
"highlight.js": "^11.3.1",
"html-entities": "^2.0.0",
"is-ip": "^3.1.0",
Expand Down Expand Up @@ -133,6 +132,7 @@
"sanitize-html": "2.13.0",
"tar-js": "^0.3.0",
"ua-parser-js": "^1.0.2",
"unicode-segmenter": "^0.9.0",
"uuid": "^10.0.0",
"what-input": "^5.2.10"
},
Expand Down
11 changes: 5 additions & 6 deletions src/HtmlUtils.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import { decode } from "html-entities";
import { IContent } from "matrix-js-sdk/src/matrix";
import { Optional } from "matrix-events-sdk";
import escapeHtml from "escape-html";
import GraphemeSplitter from "graphemer";
import { GraphemeCategory, graphemeSegments } from "unicode-segmenter";
import { getEmojiFromUnicode } from "@matrix-org/emojibase-bindings";

import { IExtendedSanitizeOptions } from "./@types/sanitize-html";
Expand Down Expand Up @@ -265,17 +265,16 @@ export function formatEmojis(message: string | undefined, isHtmlMessage?: boolea
let text = "";
let key = 0;

const splitter = new GraphemeSplitter();
for (const char of splitter.iterateGraphemes(message)) {
if (EMOJIBASE_REGEX.test(char)) {
for (const { segment, _catBegin } of graphemeSegments(message)) {
if (_catBegin === GraphemeCategory.Extended_Pictographic || _catBegin === GraphemeCategory.Regional_Indicator) {
if (text) {
result.push(text);
text = "";
}
result.push(emojiToSpan(char, key));
result.push(emojiToSpan(segment, key));
key++;
} else {
text += char;
text += segment;
}
}
if (text) {
Expand Down
18 changes: 6 additions & 12 deletions src/editor/parts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ limitations under the License.

import EMOJIBASE_REGEX from "emojibase-regex";
import { MatrixClient, RoomMember, Room } from "matrix-js-sdk/src/matrix";
import GraphemeSplitter from "graphemer";
import { GraphemeCategory, graphemeSegments } from "unicode-segmenter/grapheme";

import AutocompleteWrapperModel, { GetAutocompleterComponent, UpdateCallback, UpdateQuery } from "./autocomplete";
import { unicodeToShortcode } from "../HtmlUtils";
Expand Down Expand Up @@ -641,28 +641,22 @@ export class PartCreator {
return new UserPillPart(userId, displayName, member || undefined);
}

private static isRegionalIndicator(c: string): boolean {
const codePoint = c.codePointAt(0) ?? 0;
return codePoint != 0 && c.length == 2 && 0x1f1e6 <= codePoint && codePoint <= 0x1f1ff;
}

public plainWithEmoji(text: string): (PlainPart | EmojiPart)[] {
const parts: (PlainPart | EmojiPart)[] = [];
let plainText = "";

const splitter = new GraphemeSplitter();
for (const char of splitter.iterateGraphemes(text)) {
if (EMOJIBASE_REGEX.test(char)) {
for (const { segment, _catBegin } of graphemeSegments(text)) {
if (_catBegin === GraphemeCategory.Extended_Pictographic || _catBegin === GraphemeCategory.Regional_Indicator) {
if (plainText) {
parts.push(this.plain(plainText));
plainText = "";
}
parts.push(this.emoji(char));
if (PartCreator.isRegionalIndicator(text)) {
parts.push(this.emoji(segment));
if (_catBegin === GraphemeCategory.Regional_Indicator) {
parts.push(this.plain(REGIONAL_EMOJI_SEPARATOR));
}
} else {
plainText += char;
plainText += segment;
}
}
if (plainText) {
Expand Down
8 changes: 4 additions & 4 deletions src/utils/strings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ limitations under the License.
* @param text the plaintext to put in the user's clipboard
*/
import { logger } from "matrix-js-sdk/src/logger";
import GraphemeSplitter from "graphemer";
import { graphemeSegments } from "unicode-segmenter";

export async function copyPlaintext(text: string): Promise<boolean> {
try {
Expand Down Expand Up @@ -92,7 +92,7 @@ export function getSelectedText(): string {
* @returns the first grapheme or an empty string if given an empty string
*/
export function getFirstGrapheme(str: string): string {
const splitter = new GraphemeSplitter();
const result = splitter.iterateGraphemes(str).next();
return result.done ? "" : result.value;
const segments = graphemeSegments(str);
const result = segments.next();
return result.done ? "" : result.value.segment;
}
36 changes: 8 additions & 28 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -8918,16 +8918,7 @@ string-length@^4.0.1:
char-regex "^1.0.2"
strip-ansi "^6.0.0"

"string-width-cjs@npm:string-width@^4.2.0":
version "4.2.3"
resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
dependencies:
emoji-regex "^8.0.0"
is-fullwidth-code-point "^3.0.0"
strip-ansi "^6.0.1"

string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3:
"string-width-cjs@npm:string-width@^4.2.0", string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3:
version "4.2.3"
resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
Expand Down Expand Up @@ -9033,14 +9024,7 @@ string_decoder@~1.1.1:
dependencies:
safe-buffer "~5.1.0"

"strip-ansi-cjs@npm:strip-ansi@^6.0.1":
version "6.0.1"
resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9"
integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==
dependencies:
ansi-regex "^5.0.1"

strip-ansi@^6.0.0, strip-ansi@^6.0.1:
"strip-ansi-cjs@npm:strip-ansi@^6.0.1", strip-ansi@^6.0.0, strip-ansi@^6.0.1:
version "6.0.1"
resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9"
integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==
Expand Down Expand Up @@ -9562,6 +9546,11 @@ unicode-property-aliases-ecmascript@^2.0.0:
resolved "https://registry.yarnpkg.com/unicode-property-aliases-ecmascript/-/unicode-property-aliases-ecmascript-2.1.0.tgz#43d41e3be698bd493ef911077c9b131f827e8ccd"
integrity sha512-6t3foTQI9qne+OZoVQB/8x8rk2k1eVy1gRXhV3oFQ5T6R1dqQ1xtin3XqSlx3+ATBkliTaR/hHyJBm+LVPNM8w==

unicode-segmenter@^0.9.0:
version "0.9.0"
resolved "https://registry.yarnpkg.com/unicode-segmenter/-/unicode-segmenter-0.9.0.tgz#4285467f2629b0b7d4fa0f16df87c76064af1536"
integrity sha512-Y1TfI9jUxhEF6j0rKDLoNSou38jR5dd79xe8H9I12jiMm03Wwjrrk4u70u8NQz9CeamROcAQG8KEQaFhJPwBPA==

universalify@^0.2.0:
version "0.2.0"
resolved "https://registry.yarnpkg.com/universalify/-/universalify-0.2.0.tgz#6451760566fa857534745ab1dde952d1b1761be0"
Expand Down Expand Up @@ -9870,7 +9859,7 @@ which@^2.0.1:
dependencies:
isexe "^2.0.0"

"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0":
"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0", wrap-ansi@^7.0.0:
version "7.0.0"
resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43"
integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==
Expand All @@ -9888,15 +9877,6 @@ wrap-ansi@^6.2.0:
string-width "^4.1.0"
strip-ansi "^6.0.0"

wrap-ansi@^7.0.0:
version "7.0.0"
resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43"
integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==
dependencies:
ansi-styles "^4.0.0"
string-width "^4.1.0"
strip-ansi "^6.0.0"

wrap-ansi@^8.1.0:
version "8.1.0"
resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-8.1.0.tgz#56dc22368ee570face1b49819975d9b9a5ead214"
Expand Down
Loading