Skip to content

Commit

Permalink
feat: add support for metric suffixes on currency values
Browse files Browse the repository at this point in the history
Fixes #2000.
  • Loading branch information
birtles committed Oct 17, 2024
1 parent cc44423 commit e0b3b8f
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 5 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ app.
- Made tap detection more reliable
thanks to [@maiself](https://github.com/maiself)
([#2014](https://github.com/birchill/10ten-ja-reader/pull/2014)).
- Added handling for metric suffixes on currencies (e.g. 40k円)
([#2000](https://github.com/birchill/10ten-ja-reader/pull/2000)).
- Added handling for 戶 and 內 kyūjitai.
- Added deinflection for additional forms of -sugiru
([#2033](https://github.com/birchill/10ten-ja-reader/pull/2033)).
Expand Down
34 changes: 34 additions & 0 deletions src/content/currency.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,5 +95,39 @@ describe('extractCurrencyMetadata', () => {
value: 10000000000,
matchLen: 5,
});

// Metric suffixes
expect(extractCurrencyMetadata('200k円')).toEqual({
type: 'currency',
value: 200_000,
matchLen: 5,
});
expect(extractCurrencyMetadata('1k 円')).toEqual({
type: 'currency',
value: 1_000,
matchLen: 4,
});
expect(extractCurrencyMetadata('1M円')).toEqual({
type: 'currency',
value: 1_000_000,
matchLen: 3,
});
expect(extractCurrencyMetadata('¥40k')).toEqual({
type: 'currency',
value: 40_000,
matchLen: 4,
});
// We don't allow white space before the suffix
expect(extractCurrencyMetadata('¥40 k')).toEqual({
type: 'currency',
value: 40,
matchLen: 3,
});
// Should ignore the suffix since it's not followed by a word boundary
expect(extractCurrencyMetadata('¥40billion')).toEqual({
type: 'currency',
value: 40,
matchLen: 3,
});
});
});
32 changes: 27 additions & 5 deletions src/content/currency.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ export function lookForCurrency({

const japaneseOrPrice = getCombinedCharRange([
getNegatedCharRange(originalTextDelimiter),
/[¥\s,.]/,
/[¥\s,.kKmMbBtT]/,
]);
const textDelimiter = getNegatedCharRange(japaneseOrPrice);

Expand All @@ -48,7 +48,7 @@ export function lookForCurrency({
}

const currencyRegex =
/([¥]\s*([0-9.,-]+))|(([0-9.,-]+)\s*)/;
/([¥]\s*([0-9.,-]+)([kKmMbBtT]\b)?)|(([0-9.,-]+)([kKmMbBtT])?\s*)/;

export function extractCurrencyMetadata(
text: string
Expand All @@ -58,16 +58,38 @@ export function extractCurrencyMetadata(
return undefined;
}

const valueStr = matches[2] ?? matches[4];
const valueStr = matches[2] ?? matches[5];

if (!valueStr) {
return undefined;
}

const value = parseNumber(valueStr);
if (!value) {
let value = parseNumber(valueStr);
if (value === null) {
return undefined;
}

// Handle metric suffixes---we handle them here instead of in parseNumber
// because we only support them when they are part of a currency.
const metricSuffix = matches[2] ? matches[3] : matches[6];
switch (metricSuffix) {
case 'k':
case 'K':
value *= 1_000;
break;
case 'm':
case 'M':
value *= 1_000_000;
break;
case 'b':
case 'B':
value *= 1_000_000_000;
break;
case 't':
case 'T':
value *= 1_000_000_000_000;
break;
}

return { type: 'currency', value, matchLen: matches[0].length };
}
5 changes: 5 additions & 0 deletions src/content/numbers.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ describe('parseNumber', () => {
// Putting the powers of ten in the wrong order
expect(parseNumber('七十二百一')).toStrictEqual(null);

// Don't handle metric suffixes for bare numbers--we only allow them when
// part of a currency.
expect(parseNumber('40k')).toStrictEqual(null);

// Completely invalid inputs
expect(parseNumber('abc')).toStrictEqual(null);
expect(parseNumber('')).toStrictEqual(null);
Expand All @@ -79,6 +83,7 @@ describe('extractNumberMetadata', () => {
expect(extractNumberMetadata('43.2')).toStrictEqual(undefined);
expect(extractNumberMetadata('43.2')).toStrictEqual(undefined);
expect(extractNumberMetadata('43。2')).toStrictEqual(undefined);
expect(extractNumberMetadata('40k')).toStrictEqual(undefined);

// Shouldn't be zero
expect(extractNumberMetadata('〇〇〇')).toStrictEqual(undefined);
Expand Down
20 changes: 20 additions & 0 deletions tests/get-text.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,26 @@ describe('getTextAtPoint', () => {
});
});

it('should recognize Japanese yen values with metric suffixes', () => {
testDiv.append('1k 円 for 240 blank cards...');
const textNode = testDiv.firstChild as Text;
const bbox = getBboxForOffset(textNode, 0);

const result = getTextAtPoint({
point: {
x: bbox.left + bbox.width / 2,
y: bbox.top + bbox.height / 2,
},
});

assertTextResultEqual(result, '1k 円 ', [textNode, 0, 5]);
assert.deepEqual(result?.meta, {
type: 'currency',
value: 1_000,
matchLen: 4,
});
});

it('should recognize 畳 measurements', () => {
testDiv.append('面積:6畳です');
const textNode = testDiv.firstChild as Text;
Expand Down
1 change: 1 addition & 0 deletions tests/playground.html
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ <h3>Currency tests</h3>
<li>100億円</li>
<li>25.1万円</li>
<li><span>39,800</span><span>万円</span></li>
<li>1,000,000円=1,000k円=1M円です</li>
</ul>

<h3>Number tests</h3>
Expand Down

0 comments on commit e0b3b8f

Please sign in to comment.