Skip to content
This repository has been archived by the owner on Sep 11, 2024. It is now read-only.

Switch from graphemer to Intl.Segmenter #12697

Merged
merged 1 commit into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@
"filesize": "10.1.2",
"github-markdown-css": "^5.5.1",
"glob-to-regexp": "^0.4.1",
"graphemer": "^1.4.0",
"highlight.js": "^11.3.1",
"html-entities": "^2.0.0",
"is-ip": "^3.1.0",
Expand Down
11 changes: 5 additions & 6 deletions src/HtmlUtils.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@ import { decode } from "html-entities";
import { IContent } from "matrix-js-sdk/src/matrix";
import { Optional } from "matrix-events-sdk";
import escapeHtml from "escape-html";
import GraphemeSplitter from "graphemer";
import { getEmojiFromUnicode } from "@matrix-org/emojibase-bindings";

import { IExtendedSanitizeOptions } from "./@types/sanitize-html";
import SettingsStore from "./settings/SettingsStore";
import { stripHTMLReply, stripPlainReply } from "./utils/Reply";
import { PERMITTED_URL_SCHEMES } from "./utils/UrlUtils";
import { sanitizeHtmlParams, transformTags } from "./Linkify";
import { graphemeSegmenter } from "./utils/strings";

export { Linkify, linkifyElement, linkifyAndSanitizeHtml } from "./Linkify";

Expand Down Expand Up @@ -265,17 +265,16 @@ export function formatEmojis(message: string | undefined, isHtmlMessage?: boolea
let text = "";
let key = 0;

const splitter = new GraphemeSplitter();
for (const char of splitter.iterateGraphemes(message)) {
if (EMOJIBASE_REGEX.test(char)) {
for (const data of graphemeSegmenter.segment(message)) {
if (EMOJIBASE_REGEX.test(data.segment)) {
if (text) {
result.push(text);
text = "";
}
result.push(emojiToSpan(char, key));
result.push(emojiToSpan(data.segment, key));
key++;
} else {
text += char;
text += data.segment;
}
}
if (text) {
Expand Down
12 changes: 5 additions & 7 deletions src/editor/parts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,14 @@ limitations under the License.

import EMOJIBASE_REGEX from "emojibase-regex";
import { MatrixClient, RoomMember, Room } from "matrix-js-sdk/src/matrix";
import GraphemeSplitter from "graphemer";

import AutocompleteWrapperModel, { GetAutocompleterComponent, UpdateCallback, UpdateQuery } from "./autocomplete";
import { unicodeToShortcode } from "../HtmlUtils";
import * as Avatar from "../Avatar";
import defaultDispatcher from "../dispatcher/dispatcher";
import { Action } from "../dispatcher/actions";
import SettingsStore from "../settings/SettingsStore";
import { getFirstGrapheme } from "../utils/strings";
import { getFirstGrapheme, graphemeSegmenter } from "../utils/strings";

const REGIONAL_EMOJI_SEPARATOR = String.fromCodePoint(0x200b);

Expand Down Expand Up @@ -650,19 +649,18 @@ export class PartCreator {
const parts: (PlainPart | EmojiPart)[] = [];
let plainText = "";

const splitter = new GraphemeSplitter();
for (const char of splitter.iterateGraphemes(text)) {
if (EMOJIBASE_REGEX.test(char)) {
for (const data of graphemeSegmenter.segment(text)) {
if (EMOJIBASE_REGEX.test(data.segment)) {
if (plainText) {
parts.push(this.plain(plainText));
plainText = "";
}
parts.push(this.emoji(char));
parts.push(this.emoji(data.segment));
if (PartCreator.isRegionalIndicator(text)) {
parts.push(this.plain(REGIONAL_EMOJI_SEPARATOR));
}
} else {
plainText += char;
plainText += data.segment;
}
}
if (plainText) {
Expand Down
8 changes: 4 additions & 4 deletions src/utils/strings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ limitations under the License.
* @param text the plaintext to put in the user's clipboard
*/
import { logger } from "matrix-js-sdk/src/logger";
import GraphemeSplitter from "graphemer";

export async function copyPlaintext(text: string): Promise<boolean> {
try {
Expand Down Expand Up @@ -85,14 +84,15 @@ export function getSelectedText(): string {
return window.getSelection()!.toString();
}

export const graphemeSegmenter = new Intl.Segmenter();

/**
* Returns the first grapheme in the given string,
* especially useful for strings containing emoji, will not break compound emoji up.
* @param str string to parse
* @returns the first grapheme or an empty string if given an empty string
*/
export function getFirstGrapheme(str: string): string {
const splitter = new GraphemeSplitter();
const result = splitter.iterateGraphemes(str).next();
return result.done ? "" : result.value;
const result = graphemeSegmenter.segment(str)[Symbol.iterator]().next();
return result.done ? "" : result.value.segment;
}
2 changes: 1 addition & 1 deletion tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"outDir": "./lib",
"declaration": true,
"jsx": "react",
"lib": ["es2021", "dom", "dom.iterable"],
"lib": ["es2022", "dom", "dom.iterable"],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unintentional commit?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope, it is necessary for the types for Intl.Segmenter

"strict": true
},
"include": [
Expand Down
Loading