Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Apply Arabic contextual forms natively #3

Draft
wants to merge 11 commits into
base: complex-text-50
Choose a base branch
from
45 changes: 45 additions & 0 deletions build/generate-unicode-data.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import * as fs from 'fs';

// Or https://www.unicode.org/Public/draft/UCD/ucd if the next Unicode version is finalized and awaiting publication.
const ucdBaseUrl = 'https://www.unicode.org/Public/UCD/latest/ucd';

async function getPropertyData(property: string, value: string): Promise<{[_: string]: string}> {
const indicSyllabicCategoryUrl = `${ucdBaseUrl}/${property.replaceAll('_', '')}.txt`;
const response = await fetch(indicSyllabicCategoryUrl);
if (!response.ok) {
throw new Error(`Unable to fetch latest Unicode character database file for ${property}: ${response.status}`);
}

const table = await response.text();
const header = table.match(/^# \w+-(\d+\.\d+\.\d+)\.txt\n# Date: (\d\d\d\d-\d\d-\d\d)/);
const tableRegExp = new RegExp(`^([0-9A-Z]{4,6}(?:..[0-9A-Z]{4,6})?)(?= *; ${value})`, 'gm');
const characterClass = table
.match(tableRegExp)
.map(record => record
.split('..')
.map(codePoint => (codePoint.length > 4) ? `\\u{${codePoint}}` : `\\u${codePoint}`)
.join('-'))
.join('');
return {
version: header && header[1],
date: header && header[2],
characterClass,
};
}

const indicSyllabicCategory = await getPropertyData('Indic_Syllabic_Category', 'Invisible_Stacker');

fs.writeFileSync('src/data/unicode_properties.ts',
`// This file is generated. Edit build/generate-unicode-data.ts, then run \`npm run generate-unicode-data\`.

/**
* Returns whether two grapheme clusters detected by \`Intl.Segmenter\` can be combined to prevent an invisible combining mark from appearing unexpectedly.
*/
export function canCombineGraphemes(former: string, latter: string): boolean {
// Zero-width joiner
// Indic_Syllabic_Category=Invisible_Stacker as of Unicode ${indicSyllabicCategory.version}, published ${indicSyllabicCategory.date}.
// eslint-disable-next-line no-misleading-character-class
const terminalJoinersRegExp = /[\\u200D${indicSyllabicCategory.characterClass}]$/u;
return terminalJoinersRegExp.test(former) || /^\\p{gc=Mc}/u.test(latter);
}
`);
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@
},
"scripts": {
"generate-dist-package": "node --no-warnings --loader ts-node/esm build/generate-dist-package.js",
"generate-unicode-data": "node --no-warnings --loader ts-node/esm build/generate-unicode-data.ts",
"generate-shaders": "node --no-warnings --loader ts-node/esm build/generate-shaders.ts",
"generate-struct-arrays": "node --no-warnings --loader ts-node/esm build/generate-struct-arrays.ts",
"generate-style-code": "node --no-warnings --loader ts-node/esm build/generate-style-code.ts",
Expand Down
20 changes: 13 additions & 7 deletions src/data/bucket/symbol_bucket.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import {ProgramConfigurationSet} from '../program_configuration';
import {TriangleIndexArray, LineIndexArray} from '../index_array_type';
import {transformText} from '../../symbol/transform_text';
import {mergeLines} from '../../symbol/merge_lines';
import {allowsVerticalWritingMode, stringContainsRTLText} from '../../util/script_detection';
import {allowsVerticalWritingMode, splitByGraphemeCluster, stringContainsRTLText} from '../../util/script_detection';
import {WritingMode} from '../../symbol/shaping';
import {loadGeometry} from '../load_geometry';
import {toEvaluationFeature} from '../evaluation_feature';
Expand Down Expand Up @@ -108,8 +108,6 @@ const shaderOpacityAttributes = [
{name: 'a_fade_opacity', components: 1, type: 'Uint8' as ViewType, offset: 0}
];

const segmenter = new Intl.Segmenter();

function addVertex(
array: StructArray,
anchorX: number,
Expand Down Expand Up @@ -426,12 +424,20 @@ export class SymbolBucket implements Bucket {
allowVerticalPlacement: boolean,
doesAllowVerticalWritingMode: boolean) {

for (const {segment} of segmenter.segment(text)) {
stack[segment] = true;
for (const grapheme of splitByGraphemeCluster(text)) {
stack[grapheme] = true;
if (/\p{sc=Arab}/u.test(grapheme)) {
// Depend on all four forms of an Arabic letter, just in case.
const tatweel = '\u0640';
stack[tatweel] = true; // isolated
stack[grapheme + tatweel] = true; // initial
stack[tatweel + grapheme + tatweel] = true; // medial
stack[tatweel + grapheme] = true; // final
}
if ((textAlongLine || allowVerticalPlacement) && doesAllowVerticalWritingMode) {
const verticalChar = verticalizedCharacterMap[segment];
const verticalChar = verticalizedCharacterMap[grapheme];
if (verticalChar) {
stack[segment] = true;
stack[grapheme] = true;
}
}
}
Expand Down
12 changes: 12 additions & 0 deletions src/data/unicode_properties.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// This file is generated. Edit build/generate-unicode-data.ts, then run `npm run generate-unicode-data`.

/**
* Returns whether two grapheme clusters detected by `Intl.Segmenter` can be combined to prevent an invisible combining mark from appearing unexpectedly.
*/
export function canCombineGraphemes(former: string, latter: string): boolean {
// Zero-width joiner
// Indic_Syllabic_Category=Invisible_Stacker as of Unicode 16.0.0, published 2024-04-30.
// eslint-disable-next-line no-misleading-character-class
const terminalJoinersRegExp = /[\u200D\u1039\u17D2\u1A60\u1BAB\uAAF6\u{10A3F}\u{11133}\u{113D0}\u{1193E}\u{11A47}\u{11A99}\u{11D45}\u{11D97}\u{11F42}]$/u;
return terminalJoinersRegExp.test(former) || /^\p{gc=Mc}/u.test(latter);
}
23 changes: 20 additions & 3 deletions src/render/glyph_manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -176,17 +176,34 @@ export class GlyphManager {

const char = tinySDF.draw(grapheme);

let leftOffset = 0;
let rightOffset = 0;
const tatweel = '\u0640';
if (grapheme !== tatweel && grapheme.includes(tatweel)) {
let tatweelGlyph = entry.glyphs[tatweel];
if (!tatweelGlyph) {
// Render a kashida/tatweel to get its metrics.
tatweelGlyph = entry.glyphs[tatweel] = this._tinySDF(entry, stack, tatweel);
}
if (grapheme.startsWith(tatweel)) {
rightOffset += tatweelGlyph.metrics.advance;
}
if (grapheme.endsWith(tatweel)) {
leftOffset += tatweelGlyph.metrics.advance;
}
}

const isControl = /^\p{gc=Cf}+$/u.test(grapheme);

return {
grapheme,
bitmap: new AlphaImage({width: char.width || 30 * textureScale, height: char.height || 30 * textureScale}, char.data),
metrics: {
width: isControl ? 0 : (char.glyphWidth / textureScale || 24),
width: isControl ? 0 : ((char.glyphWidth / textureScale || 24) - leftOffset - rightOffset),
height: char.glyphHeight / textureScale || 24,
left: (char.glyphLeft - buffer) / textureScale || 0,
left: ((char.glyphLeft - buffer) / textureScale || 0) - leftOffset,
top: char.glyphTop / textureScale || 0,
advance: isControl ? 0 : (char.glyphAdvance / textureScale || 24),
advance: isControl ? 0 : ((char.glyphAdvance / textureScale || 24) - leftOffset - rightOffset),
isDoubleResolution: true
}
};
Expand Down
6 changes: 1 addition & 5 deletions src/source/rtl_text_plugin_worker.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import {PluginState, RTLPluginStatus} from './rtl_text_plugin_status';

export interface RTLTextPlugin {
applyArabicShaping: (a: string) => string;
processBidirectionalText: ((b: string, a: Array<number>) => Array<string>);
processStyledBidirectionalText: ((c: string, b: Array<number>, a: Array<number>) => Array<[string, Array<number>]>);
}

class RTLWorkerPlugin implements RTLTextPlugin {
applyArabicShaping: (a: string) => string = null;
processBidirectionalText: ((b: string, a: Array<number>) => Array<string>) = null;
processStyledBidirectionalText: ((c: string, b: Array<number>, a: Array<number>) => Array<[string, Array<number>]>) = null;
pluginStatus: RTLPluginStatus = 'unavailable';
Expand All @@ -26,14 +24,12 @@ class RTLWorkerPlugin implements RTLTextPlugin {
}

setMethods(rtlTextPlugin: RTLTextPlugin) {
this.applyArabicShaping = rtlTextPlugin.applyArabicShaping;
this.processBidirectionalText = rtlTextPlugin.processBidirectionalText;
this.processStyledBidirectionalText = rtlTextPlugin.processStyledBidirectionalText;
}

isParsed(): boolean {
return this.applyArabicShaping != null &&
this.processBidirectionalText != null &&
return this.processBidirectionalText != null &&
this.processStyledBidirectionalText != null;
}

Expand Down
4 changes: 0 additions & 4 deletions src/source/worker.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ describe('Worker RTLTextPlugin', () => {
worker = new Worker(_self);
global.fetch = null;
rtlWorkerPlugin.setMethods({
applyArabicShaping: null,
processBidirectionalText: null,
processStyledBidirectionalText: null
});
Expand All @@ -49,13 +48,11 @@ describe('Worker RTLTextPlugin', () => {

test('should not throw and set values in plugin', () => {
const rtlTextPlugin = {
applyArabicShaping: 'test',
processBidirectionalText: 'test',
processStyledBidirectionalText: 'test',
};

_self.registerRTLTextPlugin(rtlTextPlugin);
expect(rtlWorkerPlugin.applyArabicShaping).toBe('test');
expect(rtlWorkerPlugin.processBidirectionalText).toBe('test');
expect(rtlWorkerPlugin.processStyledBidirectionalText).toBe('test');
});
Expand All @@ -66,7 +63,6 @@ describe('Worker RTLTextPlugin', () => {
});

const rtlTextPlugin = {
applyArabicShaping: jest.fn(),
processBidirectionalText: jest.fn(),
processStyledBidirectionalText: jest.fn(),
};
Expand Down
Loading