Skip to content

Commit

Permalink
Merge pull request #39669 from owncloud/fix_bom_preview
Browse files Browse the repository at this point in the history
Fix the preview of text files containing BOM for utf8
  • Loading branch information
jvillafanez authored Feb 15, 2022
2 parents 152ebe3 + 2e19041 commit f42653c
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 1 deletion.
7 changes: 7 additions & 0 deletions changelog/unreleased/39669
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Bugfix: Text previews had faulty content if BOM was present

The BOM was incorrectly detected and was causing ownCloud to choose
the wrong font for the text, showing unexpected results.
The BOM is now processed correctly and the preview is shown as expected

https://github.com/owncloud/core/pull/39669
1 change: 1 addition & 0 deletions lib/private/Preview/TXT.php
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ private function getFontFile(array $info): string {
'Hangul' => 'NotoSansCJKkr/NotoSansMonoCJKkr-Regular.otf', // korean
'Devanagari' => 'NotoSansDevanagari/NotoSansDevanagari-Regular.ttf', // devanagari
'Arabic' => 'NotoSansArabic/NotoSansArabic-Regular.ttf', // arabic
'Latin' => 'NotoSans/NotoSans-Regular.ttf', // latin
];

$countInfo = $info['count'];
Expand Down
2 changes: 1 addition & 1 deletion lib/private/Utf8Analyzer.php
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ class Utf8Analyzer {
['range' => [0xf900, 0xfaff], 'script' => 'Han'],
['range' => [0xfb1d, 0xfb4f], 'script' => 'Hebrew'], // some unicode chars aren't assigned
['range' => [0xfb50, 0xfdff], 'script' => 'Arabic'],
['range' => [0xfe70, 0xfeff], 'script' => 'Arabic'],
['range' => [0xfe70, 0xfefc], 'script' => 'Arabic'],
];

/**
Expand Down
87 changes: 87 additions & 0 deletions tests/lib/Utf8AnalyzerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,37 @@ public function analyzeStreamProvider() {
]
]
],
// Include BOM marker at the beginning
[
'data://text/plain;base64,77u/bGHMiHJsIMOxbwo=',
['count', 'details', 'lines'],
PHP_INT_MAX,
[
"count" => [
"_unknown" => 2,
"Latin" => 6,
"Common" => 2,
],
"details" => [
["range" => "0-2", "str" => "", "unicode" => 65279, "unicodeHex" => "feff", "script" => "_unknown"],
["range" => "3-3", "str" => "l", "unicode" => 108, "unicodeHex" => "6c", "script" => "Latin"],
["range" => "4-4", "str" => "a", "unicode" => 97, "unicodeHex" => "61", "script" => "Latin"],
["range" => "5-6", "str" => \mb_chr(776), "unicode" => 776, "unicodeHex" => "308", "script" => "_unknown"],
["range" => "7-7", "str" => "r", "unicode" => 114, "unicodeHex" => "72", "script" => "Latin"],
["range" => "8-8", "str" => "l", "unicode" => 108, "unicodeHex" => "6c", "script" => "Latin"],
["range" => "9-9", "str" => " ", "unicode" => 32, "unicodeHex" => "20", "script" => "Common"],
["range" => "10-11", "str" => "ñ", "unicode" => 241, "unicodeHex" => "f1", "script" => "Latin"],
["range" => "12-12", "str" => "o", "unicode" => 111, "unicodeHex" => "6f", "script" => "Latin"],
["range" => "13-13", "str" => "\n", "unicode" => 10, "unicodeHex" => "a", "script" => "Common"],
],
"lines" => [
"linesNumber" => 2,
"lines" => [
["", "l", "a", \mb_chr(776), "r", "l", " ", "ñ", "o"],
]
]
],
],
// with limited chars
[
'data://text/plain;base64,44Gr56e75YuVCuacnQo=',
Expand Down Expand Up @@ -450,6 +481,62 @@ public function analyzeStringProvider() {
]
],
],
// Include BOM marker at the beginning
[
"\xef\xbb\xbflat pos",
['count'],
[
"count" => [
"_unknown" => 1,
"Latin" => 6,
"Common" => 1,
],
],
],
[
"\xef\xbb\xbflat pos",
['count', 'lines'],
[
"count" => [
"_unknown" => 1,
"Latin" => 6,
"Common" => 1,
],
"lines" => [
"linesNumber" => 1,
"lines" => [
["", "l", "a", "t", " ", "p", "o", "s"],
]
]
],
],
[
"\xef\xbb\xbflat pos",
['count', 'details', 'lines'],
[
"count" => [
"_unknown" => 1,
"Latin" => 6,
"Common" => 1,
],
"details" => [
["range" => "0-2", "str" => "", "unicode" => 65279, "unicodeHex" => "feff", "script" => "_unknown"],
["range" => "3-3", "str" => "l", "unicode" => 108, "unicodeHex" => "6c", "script" => "Latin"],
["range" => "4-4", "str" => "a", "unicode" => 97, "unicodeHex" => "61", "script" => "Latin"],
["range" => "5-5", "str" => "t", "unicode" => 116, "unicodeHex" => "74", "script" => "Latin"],
["range" => "6-6", "str" => " ", "unicode" => 32, "unicodeHex" => "20", "script" => "Common"],
["range" => "7-7", "str" => "p", "unicode" => 112, "unicodeHex" => "70", "script" => "Latin"],
["range" => "8-8", "str" => "o", "unicode" => 111, "unicodeHex" => "6f", "script" => "Latin"],
["range" => "9-9", "str" => "s", "unicode" => 115, "unicodeHex" => "73", "script" => "Latin"],
],
"lines" => [
"linesNumber" => 1,
"lines" => [
["", "l", "a", "t", " ", "p", "o", "s"],
]
]
],
],
];
}

Expand Down

0 comments on commit f42653c

Please sign in to comment.