diff --git a/changelog/unreleased/39669 b/changelog/unreleased/39669 new file mode 100644 index 000000000000..39a92a70130f --- /dev/null +++ b/changelog/unreleased/39669 @@ -0,0 +1,7 @@ +Bugfix: Text previews had faulty content if BOM was present + +The BOM was incorrectly detected and was causing ownCloud to choose +the wrong font for the text, showing unexpected results. +The BOM is now processed correctly and the preview is shown as expected + +https://github.com/owncloud/core/pull/39669 diff --git a/lib/private/Preview/TXT.php b/lib/private/Preview/TXT.php index 6e48d029124c..5caa3ac4551b 100644 --- a/lib/private/Preview/TXT.php +++ b/lib/private/Preview/TXT.php @@ -138,6 +138,7 @@ private function getFontFile(array $info): string { 'Hangul' => 'NotoSansCJKkr/NotoSansMonoCJKkr-Regular.otf', // korean 'Devanagari' => 'NotoSansDevanagari/NotoSansDevanagari-Regular.ttf', // devanagari 'Arabic' => 'NotoSansArabic/NotoSansArabic-Regular.ttf', // arabic + 'Latin' => 'NotoSans/NotoSans-Regular.ttf', // latin ]; $countInfo = $info['count']; diff --git a/lib/private/Utf8Analyzer.php b/lib/private/Utf8Analyzer.php index 7ce47d6f2980..bcb16dc77753 100644 --- a/lib/private/Utf8Analyzer.php +++ b/lib/private/Utf8Analyzer.php @@ -92,7 +92,7 @@ class Utf8Analyzer { ['range' => [0xf900, 0xfaff], 'script' => 'Han'], ['range' => [0xfb1d, 0xfb4f], 'script' => 'Hebrew'], // some unicode chars aren't assigned ['range' => [0xfb50, 0xfdff], 'script' => 'Arabic'], - ['range' => [0xfe70, 0xfeff], 'script' => 'Arabic'], + ['range' => [0xfe70, 0xfefc], 'script' => 'Arabic'], ]; /** diff --git a/tests/lib/Utf8AnalyzerTest.php b/tests/lib/Utf8AnalyzerTest.php index ef6a2c4b68e2..accf7c107983 100644 --- a/tests/lib/Utf8AnalyzerTest.php +++ b/tests/lib/Utf8AnalyzerTest.php @@ -230,6 +230,37 @@ public function analyzeStreamProvider() { ] ] ], + // Include BOM marker at the beginning + [ + 'data://text/plain;base64,77u/bGHMiHJsIMOxbwo=', + ['count', 'details', 'lines'], + PHP_INT_MAX, + [ + "count" => [ + "_unknown" => 2, + "Latin" => 6, + "Common" => 2, + ], + "details" => [ + ["range" => "0-2", "str" => "", "unicode" => 65279, "unicodeHex" => "feff", "script" => "_unknown"], + ["range" => "3-3", "str" => "l", "unicode" => 108, "unicodeHex" => "6c", "script" => "Latin"], + ["range" => "4-4", "str" => "a", "unicode" => 97, "unicodeHex" => "61", "script" => "Latin"], + ["range" => "5-6", "str" => \mb_chr(776), "unicode" => 776, "unicodeHex" => "308", "script" => "_unknown"], + ["range" => "7-7", "str" => "r", "unicode" => 114, "unicodeHex" => "72", "script" => "Latin"], + ["range" => "8-8", "str" => "l", "unicode" => 108, "unicodeHex" => "6c", "script" => "Latin"], + ["range" => "9-9", "str" => " ", "unicode" => 32, "unicodeHex" => "20", "script" => "Common"], + ["range" => "10-11", "str" => "ñ", "unicode" => 241, "unicodeHex" => "f1", "script" => "Latin"], + ["range" => "12-12", "str" => "o", "unicode" => 111, "unicodeHex" => "6f", "script" => "Latin"], + ["range" => "13-13", "str" => "\n", "unicode" => 10, "unicodeHex" => "a", "script" => "Common"], + ], + "lines" => [ + "linesNumber" => 2, + "lines" => [ + ["", "l", "a", \mb_chr(776), "r", "l", " ", "ñ", "o"], + ] + ] + ], + ], // with limited chars [ 'data://text/plain;base64,44Gr56e75YuVCuacnQo=', @@ -450,6 +481,62 @@ public function analyzeStringProvider() { ] ], ], + // Include BOM marker at the beginning + [ + "\xef\xbb\xbflat pos", + ['count'], + [ + "count" => [ + "_unknown" => 1, + "Latin" => 6, + "Common" => 1, + ], + ], + ], + [ + "\xef\xbb\xbflat pos", + ['count', 'lines'], + [ + "count" => [ + "_unknown" => 1, + "Latin" => 6, + "Common" => 1, + ], + "lines" => [ + "linesNumber" => 1, + "lines" => [ + ["", "l", "a", "t", " ", "p", "o", "s"], + ] + ] + ], + ], + [ + "\xef\xbb\xbflat pos", + ['count', 'details', 'lines'], + [ + "count" => [ + "_unknown" => 1, + "Latin" => 6, + "Common" => 1, + ], + "details" => [ + ["range" => "0-2", "str" => "", "unicode" => 65279, "unicodeHex" => "feff", "script" => "_unknown"], + ["range" => "3-3", "str" => "l", "unicode" => 108, "unicodeHex" => "6c", "script" => "Latin"], + ["range" => "4-4", "str" => "a", "unicode" => 97, "unicodeHex" => "61", "script" => "Latin"], + ["range" => "5-5", "str" => "t", "unicode" => 116, "unicodeHex" => "74", "script" => "Latin"], + ["range" => "6-6", "str" => " ", "unicode" => 32, "unicodeHex" => "20", "script" => "Common"], + ["range" => "7-7", "str" => "p", "unicode" => 112, "unicodeHex" => "70", "script" => "Latin"], + ["range" => "8-8", "str" => "o", "unicode" => 111, "unicodeHex" => "6f", "script" => "Latin"], + ["range" => "9-9", "str" => "s", "unicode" => 115, "unicodeHex" => "73", "script" => "Latin"], + ], + "lines" => [ + "linesNumber" => 1, + "lines" => [ + ["", "l", "a", "t", " ", "p", "o", "s"], + ] + ] + ], + ], ]; }