Skip to content

Commit

Permalink
Make Rikaichamp better recognize text on asahi.com and nikkei.com etc.
Browse files Browse the repository at this point in the history
  • Loading branch information
birtles committed May 22, 2021
1 parent 35a4a0c commit aa8209b
Show file tree
Hide file tree
Showing 3 changed files with 162 additions and 47 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

- Fixed handling of the first character of a text box
([#605](https://github.com/birtles/rikaichamp/issues/605))
- Made Rikaichamp better able to recognize covered-up text such as is used on
[asahi.com](https://asahi.com) and [nikkei.com](https://nikkei.com)

## 0.5.5 (2021-05-11)

Expand Down
190 changes: 143 additions & 47 deletions src/get-text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,18 +47,7 @@ export function getTextAtPoint(
point: Point,
maxLength?: number
): GetTextAtPointResult | null {
let position: CursorPosition | null;
if (document.caretPositionFromPoint) {
position = document.caretPositionFromPoint(point.x, point.y);
} else {
const range = document.caretRangeFromPoint(point.x, point.y);
position = range
? {
offsetNode: range.startContainer,
offset: range.startOffset,
}
: null;
}
let position = carentPositionFromPoint(point);

// Chrome not only doesn't support caretPositionFromPoint, but also
// caretRangeFromPoint doesn't return text input elements. Instead it returns
Expand Down Expand Up @@ -114,47 +103,65 @@ export function getTextAtPoint(
point
);

let closeEnough = true;
if (distanceResult) {
// If we're more than about three characters away, don't show the
// pop-up.
const { distance, glyphExtent } = distanceResult;
if (distance > glyphExtent * 3) {
previousResult = undefined;
return null;
closeEnough = false;
}
}

const result = getTextFromTextNode(
startNode,
position!.offset,
point,
maxLength
);

if (result) {
console.assert(
!!result.rangeStart,
'The range start should be set when getting text from a text node'
);
if (closeEnough) {
const result = getTextFromTextNode({
startNode,
startOffset: position!.offset,
point,
maxLength,
});

// If we synthesized a text node, substitute the original node back in.
if (startNode !== position!.offsetNode) {
console.assert(
result.rangeStart!.container === startNode,
'When using a synthesized text node the range should start' +
' from that node'
);
if (result) {
console.assert(
result.rangeEnds.length === 1 &&
result.rangeEnds[0].container === startNode,
'When using a synthesized text node there should be a single' +
' range end using the synthesized node'
!!result.rangeStart,
'The range start should be set when getting text from a text node'
);
result.rangeStart!.container = position!.offsetNode;
result.rangeEnds[0].container = position!.offsetNode;

// If we synthesized a text node, substitute the original node back in.
if (startNode !== position!.offsetNode) {
console.assert(
result.rangeStart!.container === startNode,
'When using a synthesized text node the range should start' +
' from that node'
);
console.assert(
result.rangeEnds.length === 1 &&
result.rangeEnds[0].container === startNode,
'When using a synthesized text node there should be a single' +
' range end using the synthesized node'
);
result.rangeStart!.container = position!.offsetNode;
result.rangeEnds[0].container = position!.offsetNode;
}

previousResult = { point, position: position!, result };
return result;
}
}
}

previousResult = { point, position: position!, result };
// See if we are dealing with a covering link
const parentLink = getParentLink(startNode);
if (parentLink) {
const result = getTextFromCoveringLink({
linkElem: parentLink,
originalElem: startNode,
point,
maxLength,
});
if (result) {
// Don't cache `position` since it's not the position we actually used.
previousResult = { point, position: undefined, result };
return result;
}
}
Expand All @@ -171,7 +178,6 @@ export function getTextAtPoint(
rangeEnds: [],
};
previousResult = { point, position: undefined, result };

return result;
}
}
Expand All @@ -193,6 +199,20 @@ export function getTextAtPoint(
return null;
}

function carentPositionFromPoint(point: Point): CursorPosition | null {
if (document.caretPositionFromPoint) {
return document.caretPositionFromPoint(point.x, point.y);
}

const range = document.caretRangeFromPoint(point.x, point.y);
return range
? {
offsetNode: range.startContainer,
offset: range.startOffset,
}
: null;
}

function getOffsetFromTextInputNode({
node,
point,
Expand Down Expand Up @@ -289,15 +309,20 @@ function getDistanceFromTextNode(
return { distance, glyphExtent };
}

function getTextFromTextNode(
startNode: CharacterData,
startOffset: number,
function getTextFromTextNode({
startNode,
startOffset,
point,
maxLength,
}: {
startNode: CharacterData;
startOffset: number;
point: {
x: number;
y: number;
},
maxLength?: number
): GetTextAtPointResult | null {
};
maxLength?: number;
}): GetTextAtPointResult | null {
const isRubyAnnotationElement = (element: Element | null) => {
if (!element) {
return false;
Expand Down Expand Up @@ -529,6 +554,77 @@ function getTextFromTextNode(
return result;
}

function getParentLink(node: Node | null): HTMLAnchorElement | null {
if (node && node.nodeType === Node.ELEMENT_NODE) {
return (node as Element).closest('a');
}

if (isTextNode(node)) {
return node.parentElement ? node.parentElement.closest('a') : null;
}

return null;
}

// Take care of "covering links". "Convering links" is the name we give to the
// approach used by at least asahi.com and nikkei.com on their homepages where
// they create a big <a> element and a tiny (1px x 1px) span with the link text
// and then render the actual link content in a separate layer.
//
// Roughly it looks something like the following:
//
// <div>
// <a> <-- Link to article with abs-pos left/right/top/bottom: 0
// <span/> <-- Link text as a 1x1 div
// </a>
// <div> <!-- Actual link content
// <figure/>
// <h2><a>Link text again</a></h2>
// etc.
// </div>
// </div>
//
// If we fail to find any text but are pointing at a link, we should try digging
// for content underneath the link
function getTextFromCoveringLink({
linkElem,
originalElem,
point,
maxLength,
}: {
linkElem: HTMLAnchorElement;
originalElem: Node | null;
point: {
x: number;
y: number;
};
maxLength?: number;
}): GetTextAtPointResult | null {
// Turn off pointer-events for the covering link
const previousPointEvents = linkElem.style.pointerEvents;
linkElem.style.pointerEvents = 'none';

const position = carentPositionFromPoint(point);

linkElem.style.pointerEvents = previousPointEvents;

// See if we successfully found a different text node
if (
!position ||
position.offsetNode === originalElem ||
!isTextNode(position.offsetNode)
) {
return null;
}

return getTextFromTextNode({
startNode: position.offsetNode,
startOffset: position.offset,
point,
maxLength,
});
}

// This is a bit complicated because for a numeric year we don't require the
// 年 but for 元年 we do. i.e. '令和2' is valid but '令和元' is not.
const yearRegex = /(?:([0-9-]+)\s*?|(?:\s*))/;
Expand Down
17 changes: 17 additions & 0 deletions tests/get-text.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,23 @@ describe('getTextAtPoint', () => {
assertTextResultEqual(result, 'うえお', lastTextNode, 0, lastTextNode, 3);
});

it('should dig into the content behind covering links', () => {
// The following is based very heavily on the structure of article previews
// in asahi.com as of 2021-05-22 although nikkei.com is similar
testDiv.innerHTML =
'<div><a href="/articles/" style="position: absolute; top: 0; bottom: 0; left: 0; right: 0; z-index: 1"><span aria-hidden="true" style="display: block; width: 1px; height: 1px; overflow: hidden">あいうえお</span></a><div><div style="position: relative; width: 100%"><h2 style="z-index: auto"><a href="/articles/" id="innerLink">あいうえお</a></h2></div></div>';

const textNode = testDiv.querySelector('#innerLink')!.firstChild as Text;
const bbox = getBboxForOffset(textNode, 0);

const result = getTextAtPoint({
x: bbox.right,
y: bbox.top + bbox.height / 2,
});

assertTextResultEqual(result, 'いうえお', textNode, 1, textNode, 5);
});

it('should ignore non-Japanese characters', () => {
testDiv.append('あいabc');
const textNode = testDiv.firstChild as Text;
Expand Down

0 comments on commit aa8209b

Please sign in to comment.