Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Php/iconv Should Not Treat FFFE/FFFF as Valid #2910

Merged
merged 2 commits into from
Jul 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 0 additions & 30 deletions phpstan-baseline.neon
Original file line number Diff line number Diff line change
Expand Up @@ -2785,36 +2785,6 @@ parameters:
count: 1
path: src/PhpSpreadsheet/Shared/OLERead.php

-
message: "#^Method PhpOffice\\\\PhpSpreadsheet\\\\Shared\\\\StringHelper\\:\\:formatNumber\\(\\) should return string but returns array\\|string\\.$#"
count: 1
path: src/PhpSpreadsheet/Shared/StringHelper.php

-
message: "#^Method PhpOffice\\\\PhpSpreadsheet\\\\Shared\\\\StringHelper\\:\\:sanitizeUTF8\\(\\) should return string but returns string\\|false\\.$#"
count: 1
path: src/PhpSpreadsheet/Shared/StringHelper.php

-
message: "#^Parameter \\#1 \\$string of function strlen expects string, float given\\.$#"
count: 1
path: src/PhpSpreadsheet/Shared/StringHelper.php

-
message: "#^Parameter \\#3 \\$subject of function str_replace expects array\\|string, float given\\.$#"
count: 1
path: src/PhpSpreadsheet/Shared/StringHelper.php

-
message: "#^Static property PhpOffice\\\\PhpSpreadsheet\\\\Shared\\\\StringHelper\\:\\:\\$decimalSeparator \\(string\\) in isset\\(\\) is not nullable\\.$#"
count: 1
path: src/PhpSpreadsheet/Shared/StringHelper.php

-
message: "#^Static property PhpOffice\\\\PhpSpreadsheet\\\\Shared\\\\StringHelper\\:\\:\\$thousandsSeparator \\(string\\) in isset\\(\\) is not nullable\\.$#"
count: 1
path: src/PhpSpreadsheet/Shared/StringHelper.php

-
message: "#^Static method PhpOffice\\\\PhpSpreadsheet\\\\Shared\\\\TimeZone\\:\\:validateTimeZone\\(\\) is unused\\.$#"
count: 1
Expand Down
44 changes: 29 additions & 15 deletions src/PhpSpreadsheet/Shared/StringHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
namespace PhpOffice\PhpSpreadsheet\Shared;

use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
use UConverter;

class StringHelper
{
/** Constants */
/** Regular Expressions */
// Fraction
const STRING_REGEXP_FRACTION = '(-?)(\d+)\s+(\d+\/\d+)';
const STRING_REGEXP_FRACTION = '~^\s*(-?)((\d*)\s+)?(\d+\/\d+)\s*$~';

/**
* Control characters array.
Expand All @@ -28,14 +29,14 @@ class StringHelper
/**
* Decimal separator.
*
* @var string
* @var ?string
*/
private static $decimalSeparator;

/**
* Thousands separator.
*
* @var string
* @var ?string
*/
private static $thousandsSeparator;

Expand Down Expand Up @@ -328,39 +329,51 @@ public static function controlCharacterPHP2OOXML($textValue)
}

/**
* Try to sanitize UTF8, stripping invalid byte sequences. Not perfect. Does not surrogate characters.
* Try to sanitize UTF8, replacing invalid sequences with Unicode substitution characters.
*/
public static function sanitizeUTF8(string $textValue): string
{
$textValue = str_replace(["\xef\xbf\xbe", "\xef\xbf\xbf"], "\xef\xbf\xbd", $textValue);
if (class_exists(UConverter::class)) {
$returnValue = UConverter::transcode($textValue, 'UTF-8', 'UTF-8');
if ($returnValue !== false) {
return $returnValue;
}
}
// @codeCoverageIgnoreStart
// I don't think any of the code below should ever be executed.
if (self::getIsIconvEnabled()) {
$textValue = @iconv('UTF-8', 'UTF-8', $textValue);

return $textValue;
$returnValue = @iconv('UTF-8', 'UTF-8', $textValue);
if ($returnValue !== false) {
return $returnValue;
}
}

$textValue = mb_convert_encoding($textValue, 'UTF-8', 'UTF-8');
// Phpstan does not think this can return false.
$returnValue = mb_convert_encoding($textValue, 'UTF-8', 'UTF-8');

return $textValue;
return $returnValue;
// @codeCoverageIgnoreEnd
}

/**
* Check if a string contains UTF8 data.
*/
public static function isUTF8(string $textValue): bool
{
return $textValue === '' || preg_match('/^./su', $textValue) === 1;
return $textValue === self::sanitizeUTF8($textValue);
}

/**
* Formats a numeric value as a string for output in various output writers forcing
* point as decimal separator in case locale is other than English.
*
* @param mixed $numericValue
* @param float|int|string $numericValue
*/
public static function formatNumber($numericValue): string
{
if (is_float($numericValue)) {
return str_replace(',', '.', $numericValue);
return str_replace(',', '.', (string) $numericValue);
}

return (string) $numericValue;
Expand Down Expand Up @@ -537,9 +550,10 @@ public static function strCaseReverse(string $textValue): string
*/
public static function convertToNumberIfFraction(string &$operand): bool
{
if (preg_match('/^' . self::STRING_REGEXP_FRACTION . '$/i', $operand, $match)) {
if (preg_match(self::STRING_REGEXP_FRACTION, $operand, $match)) {
$sign = ($match[1] == '-') ? '-' : '+';
$fractionFormula = '=' . $sign . $match[2] . $sign . $match[3];
$wholePart = ($match[3] === '') ? '' : ($sign . $match[3]);
$fractionFormula = '=' . $wholePart . $sign . $match[4];
$operand = Calculation::getInstance()->_calculateFormulaValue($fractionFormula);

return true;
Expand Down Expand Up @@ -686,6 +700,6 @@ public static function testStringAsNumeric($textValue)
}
$v = (float) $textValue;

return (is_numeric(substr($textValue, 0, strlen($v)))) ? $v : $textValue;
return (is_numeric(substr($textValue, 0, strlen((string) $v)))) ? $v : $textValue;
}
}
19 changes: 19 additions & 0 deletions tests/PhpSpreadsheetTests/Reader/Csv/CsvEncodingTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,25 @@ public function testGuessEncoding(string $filename): void
self::assertEquals('sixième', $sheet->getCell('C2')->getValue());
}

public function testSurrogate(): void
{
// Surrogates should occur only in UTF-16, and should
// be properly converted to UTF8 when read.
// FFFE/FFFF are illegal, and should be converted to
// substitution character when read.
// Excel does not handle any of the cells in row 3 well.
// LibreOffice handles A3 fine, and discards B3/C3,
// which is a reasonable action.
$filename = 'tests/data/Reader/CSV/premiere.utf16le.csv';
$reader = new Csv();
$reader->setInputEncoding(Csv::guessEncoding($filename));
$spreadsheet = $reader->load($filename);
$sheet = $spreadsheet->getActiveSheet();
self::assertEquals('𐐀', $sheet->getCell('A3')->getValue());
self::assertEquals('', $sheet->getCell('B3')->getValue());
self::assertEquals('', $sheet->getCell('C3')->getValue());
}

/**
* @dataProvider providerGuessEncoding
*/
Expand Down
44 changes: 44 additions & 0 deletions tests/PhpSpreadsheetTests/Shared/StringHelperInvalidCharTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
<?php

namespace PhpOffice\PhpSpreadsheetTests\Shared;

use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
use PHPUnit\Framework\TestCase;

class StringHelperInvalidCharTest extends TestCase
{
public function testInvalidChar(): void
{
$spreadsheet = new Spreadsheet();
$sheet = $spreadsheet->getActiveSheet();
$substitution = '';
$array = [
['Normal string', 'Hello', 'Hello'],
['integer', 2, 2],
['float', 2.1, 2.1],
['boolean true', true, true],
['illegal FFFE/FFFF', "H\xef\xbf\xbe\xef\xbf\xbfello", "H{$substitution}{$substitution}ello"],
['illegal character', "H\xef\x00\x00ello", "H{$substitution}\x00\x00ello"],
['overlong character', "H\xc0\xa0ello", "H{$substitution}{$substitution}ello"],
['Osmanya as single character', "H\xf0\x90\x90\x80ello", 'H𐐀ello'],
['Osmanya as surrogate pair (x)', "\xed\xa0\x81\xed\xb0\x80", "{$substitution}{$substitution}{$substitution}{$substitution}{$substitution}{$substitution}"],
['Osmanya as surrogate pair (u)', "\u{d801}\u{dc00}", "{$substitution}{$substitution}{$substitution}{$substitution}{$substitution}{$substitution}"],
['Half surrogate pair (u)', "\u{d801}", "{$substitution}{$substitution}{$substitution}"],
['Control character', "\u{7}", "\u{7}"],
];

$sheet->fromArray($array);
$row = 0;
foreach ($array as $value) {
self::assertSame($value[1] === $value[2], StringHelper::isUTF8((string) $value[1]));
++$row;
$expected = $value[2];
self::assertSame(
$expected,
$sheet->getCell("B$row")->getValue(),
$sheet->getCell("A$row")->getValue()
);
}
}
}
29 changes: 29 additions & 0 deletions tests/PhpSpreadsheetTests/Shared/StringHelperTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -119,4 +119,33 @@ public function testSYLKtoUTF8(): void

self::assertEquals($expectedResult, $result);
}

/**
* @dataProvider providerFractions
*/
public function testFraction(string $expected, string $value): void
{
$originalValue = $value;
$result = StringHelper::convertToNumberIfFraction($value);
if ($result === false) {
self::assertSame($expected, $originalValue);
self::assertSame($expected, $value);
} else {
self::assertSame($expected, (string) $value);
self::assertNotEquals($value, $originalValue);
}
}

public function providerFractions(): array
{
return [
'non-fraction' => ['1', '1'],
'common fraction' => ['1.5', '1 1/2'],
'fraction between -1 and 0' => ['-0.5', '-1/2'],
'fraction between -1 and 0 with space' => ['-0.5', ' - 1/2'],
'fraction between 0 and 1' => ['0.75', '3/4 '],
'fraction between 0 and 1 with space' => ['0.75', ' 3/4'],
'improper fraction' => ['1.75', '7/4'],
];
}
}
Binary file modified tests/data/Reader/CSV/premiere.utf16le.csv
Binary file not shown.