diff --git a/docs/topics/reading-and-writing-to-file.md b/docs/topics/reading-and-writing-to-file.md index 6b51208e53..9e4376e95e 100644 --- a/docs/topics/reading-and-writing-to-file.md +++ b/docs/topics/reading-and-writing-to-file.md @@ -480,6 +480,41 @@ $reader->setSheetIndex(0); $spreadsheet = $reader->load('sample.csv'); ``` +You can also set the reader to guess the encoding +rather than calling guessEncoding directly. In this case, +the user-settable fallback encoding is used if nothing else works. + +```php +$reader = new \PhpOffice\PhpSpreadsheet\Reader\Csv(); +$reader->setInputEncoding(\PhpOffice\PhpSpreadsheet\Reader\Csv::GUESS_ENCODING); +$reader->setFallbackEncoding('ISO-8859-2'); // default CP1252 without this statement +$reader->setDelimiter(';'); +$reader->setEnclosure(''); +$reader->setSheetIndex(0); + +$spreadsheet = $reader->load('sample.csv'); +``` + +Finally, you can set a callback to be invoked when the constructor is executed, +either through `new Csv()` or `IOFactory::load`, +and have that callback set the customizable attributes to whatever +defaults are appropriate for your environment. + +```php +function constructorCallback(\PhpOffice\PhpSpreadsheet\Reader\Csv $reader): void +{ + $reader->setInputEncoding(\PhpOffice\PhpSpreadsheet\Reader\Csv::GUESS_ENCODING); + $reader->setFallbackEncoding('ISO-8859-2'); + $reader->setDelimiter(','); + $reader->setEnclosure('"'); + // Following represents how Excel behaves better than the default escape character + $reader->setEscapeCharacter((version_compare(PHP_VERSION, '7.4') < 0) ? "\x0" : ''); +} + +\PhpOffice\PhpSpreadsheet\Reader\Csv::setConstructorCallback('constructorCallback'); +$spreadsheet = \PhpSpreadsheet\IOFactory::load('sample.csv'); +``` + #### Read a specific worksheet CSV files can only contain one worksheet. Therefore, you can specify diff --git a/phpstan-baseline.neon b/phpstan-baseline.neon index 7fb5a0f68b..db5faf277c 100644 --- a/phpstan-baseline.neon +++ b/phpstan-baseline.neon @@ -2395,101 +2395,6 @@ parameters: count: 1 path: src/PhpSpreadsheet/Reader/BaseReader.php - - - message: "#^Property PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\:\\:\\$delimiter \\(string\\) does not accept string\\|null\\.$#" - count: 1 - path: src/PhpSpreadsheet/Reader/Csv.php - - - - message: "#^Parameter \\#1 \\$var of function count expects array\\|Countable, array\\|null given\\.$#" - count: 1 - path: src/PhpSpreadsheet/Reader/Csv.php - - - - message: "#^Method PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\:\\:openFileOrMemory\\(\\) has parameter \\$pFilename with no typehint specified\\.$#" - count: 1 - path: src/PhpSpreadsheet/Reader/Csv.php - - - - message: "#^Parameter \\#1 \\$value of static method PhpOffice\\\\PhpSpreadsheet\\\\Shared\\\\StringHelper\\:\\:convertEncoding\\(\\) expects string, string\\|false given\\.$#" - count: 1 - path: src/PhpSpreadsheet/Reader/Csv.php - - - - message: "#^Parameter \\#1 \\$fp of function fwrite expects resource, resource\\|false given\\.$#" - count: 1 - path: src/PhpSpreadsheet/Reader/Csv.php - - - - message: "#^Argument of an invalid type array\\|null supplied for foreach, only iterables are supported\\.$#" - count: 1 - path: src/PhpSpreadsheet/Reader/Csv.php - - - - message: "#^Parameter \\#2 \\$newvalue of function ini_set expects string, string\\|false given\\.$#" - count: 1 - path: src/PhpSpreadsheet/Reader/Csv.php - - - - message: "#^Call to function is_array\\(\\) with string will always evaluate to false\\.$#" - count: 1 - path: src/PhpSpreadsheet/Reader/Csv.php - - - - message: "#^Property PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\\\Delimiter\\:\\:\\$fileHandle has no typehint specified\\.$#" - count: 1 - path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php - - - - message: "#^Property PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\\\Delimiter\\:\\:\\$escapeCharacter has no typehint specified\\.$#" - count: 1 - path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php - - - - message: "#^Property PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\\\Delimiter\\:\\:\\$enclosure has no typehint specified\\.$#" - count: 1 - path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php - - - - message: "#^Property PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\\\Delimiter\\:\\:\\$counts has no typehint specified\\.$#" - count: 1 - path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php - - - - message: "#^Property PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\\\Delimiter\\:\\:\\$numberLines has no typehint specified\\.$#" - count: 1 - path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php - - - - message: "#^Property PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\\\Delimiter\\:\\:\\$delimiter has no typehint specified\\.$#" - count: 1 - path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php - - - - message: "#^Method PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\\\Delimiter\\:\\:__construct\\(\\) has parameter \\$enclosure with no typehint specified\\.$#" - count: 1 - path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php - - - - message: "#^Method PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\\\Delimiter\\:\\:__construct\\(\\) has parameter \\$escapeCharacter with no typehint specified\\.$#" - count: 1 - path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php - - - - message: "#^Method PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\\\Delimiter\\:\\:__construct\\(\\) has parameter \\$fileHandle with no typehint specified\\.$#" - count: 1 - path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php - - - - message: "#^Parameter \\#2 \\$subject of function preg_match expects string, string\\|null given\\.$#" - count: 1 - path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php - - - - message: "#^Method PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\\\Delimiter\\:\\:getNextLine\\(\\) should return string\\|false but returns string\\|null\\.$#" - count: 1 - path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php - - message: "#^Property PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Html\\:\\:\\$rowspan has no typehint specified\\.$#" count: 1 @@ -7535,41 +7440,6 @@ parameters: count: 5 path: tests/PhpSpreadsheetTests/NamedRangeTest.php - - - message: "#^Method PhpOffice\\\\PhpSpreadsheetTests\\\\Reader\\\\CsvContiguousFilter\\:\\:setFilterType\\(\\) has parameter \\$type with no typehint specified\\.$#" - count: 1 - path: tests/PhpSpreadsheetTests/Reader/CsvContiguousFilter.php - - - - message: "#^Method PhpOffice\\\\PhpSpreadsheetTests\\\\Reader\\\\CsvContiguousFilter\\:\\:filter1\\(\\) has no return typehint specified\\.$#" - count: 1 - path: tests/PhpSpreadsheetTests/Reader/CsvContiguousFilter.php - - - - message: "#^Method PhpOffice\\\\PhpSpreadsheetTests\\\\Reader\\\\CsvContiguousFilter\\:\\:filter1\\(\\) has parameter \\$row with no typehint specified\\.$#" - count: 1 - path: tests/PhpSpreadsheetTests/Reader/CsvContiguousFilter.php - - - - message: "#^Method PhpOffice\\\\PhpSpreadsheetTests\\\\Reader\\\\CsvContiguousFilter\\:\\:filter0\\(\\) has no return typehint specified\\.$#" - count: 1 - path: tests/PhpSpreadsheetTests/Reader/CsvContiguousFilter.php - - - - message: "#^Method PhpOffice\\\\PhpSpreadsheetTests\\\\Reader\\\\CsvContiguousFilter\\:\\:filter0\\(\\) has parameter \\$row with no typehint specified\\.$#" - count: 1 - path: tests/PhpSpreadsheetTests/Reader/CsvContiguousFilter.php - - - - message: "#^Cannot call method getCell\\(\\) on PhpOffice\\\\PhpSpreadsheet\\\\Worksheet\\\\Worksheet\\|null\\.$#" - count: 3 - path: tests/PhpSpreadsheetTests/Reader/CsvContiguousTest.php - - - - message: "#^Call to static method PHPUnit\\\\Framework\\\\Assert\\:\\:assertNull\\(\\) with string will always evaluate to false\\.$#" - count: 1 - path: tests/PhpSpreadsheetTests/Reader/CsvTest.php - - message: "#^Unreachable statement \\- code above always terminates\\.$#" count: 1 diff --git a/src/PhpSpreadsheet/Reader/Csv.php b/src/PhpSpreadsheet/Reader/Csv.php index dc746735d2..b7bc0d49b4 100644 --- a/src/PhpSpreadsheet/Reader/Csv.php +++ b/src/PhpSpreadsheet/Reader/Csv.php @@ -10,6 +10,8 @@ class Csv extends BaseReader { + const DEFAULT_FALLBACK_ENCODING = 'CP1252'; + const GUESS_ENCODING = 'guess'; const UTF8_BOM = "\xEF\xBB\xBF"; const UTF8_BOM_LEN = 3; const UTF16BE_BOM = "\xfe\xff"; @@ -33,10 +35,17 @@ class Csv extends BaseReader private $inputEncoding = 'UTF-8'; /** - * Delimiter. + * Fallback encoding if 'guess' strikes out. * * @var string */ + private $fallbackEncoding = self::DEFAULT_FALLBACK_ENCODING; + + /** + * Delimiter. + * + * @var ?string + */ private $delimiter; /** @@ -67,38 +76,65 @@ class Csv extends BaseReader */ private $escapeCharacter = '\\'; + /** + * Callback for setting defaults in construction. + * + * @var ?callable + */ + private static $constructorCallback; + /** * Create a new CSV Reader instance. */ public function __construct() { parent::__construct(); + $callback = self::$constructorCallback; + if ($callback !== null) { + $callback($this); + } } /** - * Set input encoding. - * - * @param string $pValue Input encoding, eg: 'UTF-8' + * Set a callback to change the defaults. * - * @return $this + * The callback must accept the Csv Reader object as the first parameter, + * and it should return void. */ - public function setInputEncoding($pValue) + public static function setConstructorCallback(?callable $callback): void + { + self::$constructorCallback = $callback; + } + + public static function getConstructorCallback(): ?callable + { + return self::$constructorCallback; + } + + public function setInputEncoding(string $pValue): self { $this->inputEncoding = $pValue; return $this; } - /** - * Get input encoding. - * - * @return string - */ - public function getInputEncoding() + public function getInputEncoding(): string { return $this->inputEncoding; } + public function setFallbackEncoding(string $pValue): self + { + $this->fallbackEncoding = $pValue; + + return $this; + } + + public function getFallbackEncoding(): string + { + return $this->fallbackEncoding; + } + /** * Move filepointer past any BOM marker. */ @@ -161,12 +197,8 @@ protected function inferSeparator(): void /** * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). - * - * @param string $pFilename - * - * @return array */ - public function listWorksheetInfo($pFilename) + public function listWorksheetInfo(string $pFilename): array { // Open file $this->openFileOrMemory($pFilename); @@ -185,9 +217,11 @@ public function listWorksheetInfo($pFilename) $worksheetInfo[0]['totalColumns'] = 0; // Loop through each line of the file in turn - while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) { + $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter); + while (is_array($rowData)) { ++$worksheetInfo[0]['totalRows']; $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1); + $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter); } $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1); @@ -215,34 +249,35 @@ public function load($pFilename) return $this->loadIntoExisting($pFilename, $spreadsheet); } - private function openFileOrMemory($pFilename): void + private function openFileOrMemory(string $pFilename): void { // Open file $fhandle = $this->canRead($pFilename); if (!$fhandle) { throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); } + if ($this->inputEncoding === self::GUESS_ENCODING) { + $this->inputEncoding = self::guessEncoding($pFilename, $this->fallbackEncoding); + } $this->openFile($pFilename); if ($this->inputEncoding !== 'UTF-8') { fclose($this->fileHandle); $entireFile = file_get_contents($pFilename); $this->fileHandle = fopen('php://memory', 'r+b'); - $data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding); - fwrite($this->fileHandle, $data); - $this->skipBOM(); + if ($this->fileHandle !== false && $entireFile !== false) { + $data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding); + fwrite($this->fileHandle, $data); + $this->skipBOM(); + } } } /** * Loads PhpSpreadsheet from file into PhpSpreadsheet instance. - * - * @param string $pFilename - * - * @return Spreadsheet */ - public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet) + public function loadIntoExisting(string $pFilename, Spreadsheet $spreadsheet): Spreadsheet { - $lineEnding = ini_get('auto_detect_line_endings'); + $lineEnding = ini_get('auto_detect_line_endings') ?: '0'; ini_set('auto_detect_line_endings', '1'); // Open file @@ -265,7 +300,8 @@ public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet) $outRow = 0; // Loop through each line of the file in turn - while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) { + $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter); + while (is_array($rowData)) { $noOutputYet = true; $columnLetter = 'A'; foreach ($rowData as $rowDatum) { @@ -283,6 +319,7 @@ public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet) } ++$columnLetter; } + $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter); ++$currentRow; } @@ -295,48 +332,24 @@ public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet) return $spreadsheet; } - /** - * Get delimiter. - * - * @return string - */ - public function getDelimiter() + public function getDelimiter(): ?string { return $this->delimiter; } - /** - * Set delimiter. - * - * @param string $delimiter Delimiter, eg: ',' - * - * @return $this - */ - public function setDelimiter($delimiter) + public function setDelimiter(string $delimiter): self { $this->delimiter = $delimiter; return $this; } - /** - * Get enclosure. - * - * @return string - */ - public function getEnclosure() + public function getEnclosure(): string { return $this->enclosure; } - /** - * Set enclosure. - * - * @param string $enclosure Enclosure, defaults to " - * - * @return $this - */ - public function setEnclosure($enclosure) + public function setEnclosure(string $enclosure): self { if ($enclosure == '') { $enclosure = '"'; @@ -346,76 +359,53 @@ public function setEnclosure($enclosure) return $this; } - /** - * Get sheet index. - * - * @return int - */ - public function getSheetIndex() + public function getSheetIndex(): int { return $this->sheetIndex; } - /** - * Set sheet index. - * - * @param int $pValue Sheet index - * - * @return $this - */ - public function setSheetIndex($pValue) + public function setSheetIndex(int $pValue): self { $this->sheetIndex = $pValue; return $this; } - /** - * Set Contiguous. - * - * @param bool $contiguous - * - * @return $this - */ - public function setContiguous($contiguous) + public function setContiguous(bool $contiguous): self { $this->contiguous = (bool) $contiguous; return $this; } - /** - * Get Contiguous. - * - * @return bool - */ - public function getContiguous() + public function getContiguous(): bool { return $this->contiguous; } - /** - * Set escape backslashes. - * - * @param string $escapeCharacter - * - * @return $this - */ - public function setEscapeCharacter($escapeCharacter) + public function setEscapeCharacter(string $escapeCharacter): self { $this->escapeCharacter = $escapeCharacter; return $this; } + public function getEscapeCharacter(): string + { + return $this->escapeCharacter; + } + /** - * Get escape backslashes. + * Scrutinizer believes, incorrectly, that the specific pathinfo + * call in canRead can return something other than an array. + * Phpstan knows better. + * This function satisfies both. * - * @return string + * @param mixed $extension */ - public function getEscapeCharacter() + private static function extractStringLower($extension): string { - return $this->escapeCharacter; + return is_string($extension) ? strtolower($extension) : ''; } /** @@ -437,8 +427,7 @@ public function canRead($pFilename) fclose($this->fileHandle); // Trust file extension if any - $extension = pathinfo($pFilename, PATHINFO_EXTENSION); - $extension = is_array($extension) ? '' : strtolower($extension); + $extension = self::extractStringLower(pathinfo($pFilename, PATHINFO_EXTENSION)); if (in_array($extension, ['csv', 'tsv'])) { return true; } @@ -504,7 +493,7 @@ private static function guessEncodingBom(string $filename): string return $encoding; } - public static function guessEncoding(string $filename, string $dflt = 'CP1252'): string + public static function guessEncoding(string $filename, string $dflt = self::DEFAULT_FALLBACK_ENCODING): string { $encoding = self::guessEncodingBom($filename); if ($encoding === '') { diff --git a/src/PhpSpreadsheet/Reader/Csv/Delimiter.php b/src/PhpSpreadsheet/Reader/Csv/Delimiter.php index eb62c9ac04..fc298957b8 100644 --- a/src/PhpSpreadsheet/Reader/Csv/Delimiter.php +++ b/src/PhpSpreadsheet/Reader/Csv/Delimiter.php @@ -6,19 +6,28 @@ class Delimiter { protected const POTENTIAL_DELIMETERS = [',', ';', "\t", '|', ':', ' ', '~']; + /** @var resource */ protected $fileHandle; + /** @var string */ protected $escapeCharacter; + /** @var string */ protected $enclosure; + /** @var array */ protected $counts = []; + /** @var int */ protected $numberLines = 0; + /** @var ?string */ protected $delimiter; - public function __construct($fileHandle, $escapeCharacter, $enclosure) + /** + * @param resource $fileHandle + */ + public function __construct($fileHandle, string $escapeCharacter, string $enclosure) { $this->fileHandle = $fileHandle; $this->escapeCharacter = $escapeCharacter; @@ -52,15 +61,13 @@ protected function countPotentialDelimiters(): void protected function countDelimiterValues(string $line, array $delimiterKeys): void { $splitString = str_split($line, 1); - if (!is_array($splitString)) { - return; - } + if (is_array($splitString)) { + $distribution = array_count_values($splitString); + $countLine = array_intersect_key($distribution, $delimiterKeys); - $distribution = array_count_values($splitString); - $countLine = array_intersect_key($distribution, $delimiterKeys); - - foreach (self::POTENTIAL_DELIMETERS as $delimiter) { - $this->counts[$delimiter][] = $countLine[$delimiter] ?? 0; + foreach (self::POTENTIAL_DELIMETERS as $delimiter) { + $this->counts[$delimiter][] = $countLine[$delimiter] ?? 0; + } } } @@ -137,8 +144,8 @@ public function getNextLine() // See if we have any enclosures left in the line // if we still have an enclosure then we need to read the next line as well - } while (preg_match('/(' . $enclosure . ')/', $line) > 0); + } while (preg_match('/(' . $enclosure . ')/', $line ?? '') > 0); - return $line; + return $line ?? false; } } diff --git a/tests/PhpSpreadsheetTests/Reader/Csv/CsvCallbackTest.php b/tests/PhpSpreadsheetTests/Reader/Csv/CsvCallbackTest.php new file mode 100644 index 0000000000..c27d3b71ff --- /dev/null +++ b/tests/PhpSpreadsheetTests/Reader/Csv/CsvCallbackTest.php @@ -0,0 +1,93 @@ +setInputEncoding(Csv::GUESS_ENCODING); + $spreadsheet = $reader->load($filename); + $sheet = $spreadsheet->getActiveSheet(); + self::assertEquals('Å', $sheet->getCell('A1')->getValue()); + } + + public function callbackSetFallbackEncoding(Csv $reader): void + { + $reader->setFallbackEncoding('ISO-8859-2'); + $reader->setInputEncoding(Csv::GUESS_ENCODING); + $reader->setEscapeCharacter((version_compare(PHP_VERSION, '7.4') < 0) ? "\x0" : ''); + } + + public function testFallbackEncodingDefltIso2(): void + { + Csv::setConstructorCallback([$this, 'callbackSetFallbackEncoding']); + $filename = 'tests/data/Reader/CSV/premiere.win1252.csv'; + $reader = new Csv(); + $spreadsheet = $reader->load($filename); + $sheet = $spreadsheet->getActiveSheet(); + self::assertEquals('premičre', $sheet->getCell('A1')->getValue()); + self::assertEquals('sixičme', $sheet->getCell('C2')->getValue()); + } + + public function testIOFactory(): void + { + Csv::setConstructorCallback([$this, 'callbackSetFallbackEncoding']); + $filename = 'tests/data/Reader/CSV/premiere.win1252.csv'; + $spreadsheet = IOFactory::load($filename); + $sheet = $spreadsheet->getActiveSheet(); + self::assertEquals('premičre', $sheet->getCell('A1')->getValue()); + self::assertEquals('sixičme', $sheet->getCell('C2')->getValue()); + } + + public function testNonFallbackEncoding(): void + { + Csv::setConstructorCallback([$this, 'callbackSetFallbackEncoding']); + $filename = 'tests/data/Reader/CSV/premiere.utf16be.csv'; + $reader = new Csv(); + $spreadsheet = $reader->load($filename); + $sheet = $spreadsheet->getActiveSheet(); + self::assertEquals('première', $sheet->getCell('A1')->getValue()); + self::assertEquals('sixième', $sheet->getCell('C2')->getValue()); + } + + public function testDefaultEscape(): void + { + self::assertNull(Csv::getConstructorCallback()); + $filename = 'tests/data/Reader/CSV/escape.csv'; + $spreadsheet = IOFactory::load($filename); + $sheet = $spreadsheet->getActiveSheet(); + // this is not how Excel views the file + self::assertEquals('a\"hello', $sheet->getCell('A1')->getValue()); + } + + public function testBetterEscape(): void + { + Csv::setConstructorCallback([$this, 'callbackSetFallbackEncoding']); + $filename = 'tests/data/Reader/CSV/escape.csv'; + $spreadsheet = IOFactory::load($filename); + $sheet = $spreadsheet->getActiveSheet(); + // this is how Excel views the file + self::assertEquals('a\"hello;hello;hello;\"', $sheet->getCell('A1')->getValue()); + } +} diff --git a/tests/PhpSpreadsheetTests/Reader/CsvContiguousFilter.php b/tests/PhpSpreadsheetTests/Reader/Csv/CsvContiguousFilter.php similarity index 86% rename from tests/PhpSpreadsheetTests/Reader/CsvContiguousFilter.php rename to tests/PhpSpreadsheetTests/Reader/Csv/CsvContiguousFilter.php index 1abe9940a4..346a6558eb 100644 --- a/tests/PhpSpreadsheetTests/Reader/CsvContiguousFilter.php +++ b/tests/PhpSpreadsheetTests/Reader/Csv/CsvContiguousFilter.php @@ -1,6 +1,6 @@ endRow = $startRow + $chunkSize; } - public function setFilterType($type): void + public function setFilterType(int $type): void { $this->filterType = $type; } - public function filter1($row) + public function filter1(int $row): bool { // Include rows 1-10, followed by 100-110, etc. return $row % 100 <= 10; } - public function filter0($row) + public function filter0(int $row): bool { // Only read the heading row, and the rows that are configured in $this->_startRow and $this->_endRow if (($row == 1) || ($row >= $this->startRow && $row < $this->endRow)) { diff --git a/tests/PhpSpreadsheetTests/Reader/CsvContiguousTest.php b/tests/PhpSpreadsheetTests/Reader/Csv/CsvContiguousTest.php similarity index 81% rename from tests/PhpSpreadsheetTests/Reader/CsvContiguousTest.php rename to tests/PhpSpreadsheetTests/Reader/Csv/CsvContiguousTest.php index 82f960e408..ff095dba2b 100644 --- a/tests/PhpSpreadsheetTests/Reader/CsvContiguousTest.php +++ b/tests/PhpSpreadsheetTests/Reader/Csv/CsvContiguousTest.php @@ -1,6 +1,6 @@ getActiveSheet()->setTitle('Country Data #' . (++$sheet)); } - $sheet = $spreadsheet->getSheetByName('Country Data #1'); - self::assertEquals('Kabul', $sheet->getCell('A2')->getValue()); - $sheet = $spreadsheet->getSheetByName('Country Data #2'); - self::assertEquals('Lesotho', $sheet->getCell('B4')->getValue()); - $sheet = $spreadsheet->getSheetByName('Country Data #3'); - self::assertEquals(-20.1, $sheet->getCell('C6')->getValue()); + self::assertSame('Kabul', self::getCellValue($spreadsheet, 'Country Data #1', 'A2')); + self::assertSame('Lesotho', self::getCellValue($spreadsheet, 'Country Data #2', 'B4')); + self::assertSame('-20.1', self::getCellValue($spreadsheet, 'Country Data #3', 'C6')); + } + + private static function getCellValue(Spreadsheet $spreadsheet, string $sheetName, string $cellAddress): string + { + $sheet = $spreadsheet->getSheetByName($sheetName); + if ($sheet === null) { + return ''; + } + + return (string) $sheet->getCell($cellAddress)->getValue(); } public function testContiguous2(): void diff --git a/tests/PhpSpreadsheetTests/Reader/Csv/CsvEncodingTest.php b/tests/PhpSpreadsheetTests/Reader/Csv/CsvEncodingTest.php new file mode 100644 index 0000000000..448d3d1e70 --- /dev/null +++ b/tests/PhpSpreadsheetTests/Reader/Csv/CsvEncodingTest.php @@ -0,0 +1,122 @@ +setInputEncoding($encoding); + $spreadsheet = $reader->load($filename); + $sheet = $spreadsheet->getActiveSheet(); + self::assertEquals('Å', $sheet->getCell('A1')->getValue()); + } + + /** + * @dataProvider providerEncodings + * + * @param string $filename + * @param string $encoding + */ + public function testWorkSheetInfo($filename, $encoding): void + { + $reader = new Csv(); + $reader->setInputEncoding($encoding); + $info = $reader->listWorksheetInfo($filename); + self::assertEquals('Worksheet', $info[0]['worksheetName']); + self::assertEquals('B', $info[0]['lastColumnLetter']); + self::assertEquals(1, $info[0]['lastColumnIndex']); + self::assertEquals(2, $info[0]['totalRows']); + self::assertEquals(2, $info[0]['totalColumns']); + } + + public function providerEncodings(): array + { + return [ + ['tests/data/Reader/CSV/encoding.iso88591.csv', 'ISO-8859-1'], + ['tests/data/Reader/CSV/encoding.utf8.csv', 'UTF-8'], + ['tests/data/Reader/CSV/encoding.utf8bom.csv', 'UTF-8'], + ['tests/data/Reader/CSV/encoding.utf16be.csv', 'UTF-16BE'], + ['tests/data/Reader/CSV/encoding.utf16le.csv', 'UTF-16LE'], + ['tests/data/Reader/CSV/encoding.utf32be.csv', 'UTF-32BE'], + ['tests/data/Reader/CSV/encoding.utf32le.csv', 'UTF-32LE'], + ]; + } + + /** + * @dataProvider providerGuessEncoding + */ + public function testGuessEncoding(string $filename): void + { + $reader = new Csv(); + $reader->setInputEncoding(Csv::guessEncoding($filename)); + $spreadsheet = $reader->load($filename); + $sheet = $spreadsheet->getActiveSheet(); + self::assertEquals('première', $sheet->getCell('A1')->getValue()); + self::assertEquals('sixième', $sheet->getCell('C2')->getValue()); + } + + /** + * @dataProvider providerGuessEncoding + */ + public function testFallbackEncoding(string $filename): void + { + $reader = new Csv(); + $reader->setInputEncoding(Csv::GUESS_ENCODING); + $spreadsheet = $reader->load($filename); + $sheet = $spreadsheet->getActiveSheet(); + self::assertEquals('première', $sheet->getCell('A1')->getValue()); + self::assertEquals('sixième', $sheet->getCell('C2')->getValue()); + } + + public function providerGuessEncoding(): array + { + return [ + ['tests/data/Reader/CSV/premiere.utf8.csv'], + ['tests/data/Reader/CSV/premiere.utf8bom.csv'], + ['tests/data/Reader/CSV/premiere.utf16be.csv'], + ['tests/data/Reader/CSV/premiere.utf16bebom.csv'], + ['tests/data/Reader/CSV/premiere.utf16le.csv'], + ['tests/data/Reader/CSV/premiere.utf16lebom.csv'], + ['tests/data/Reader/CSV/premiere.utf32be.csv'], + ['tests/data/Reader/CSV/premiere.utf32bebom.csv'], + ['tests/data/Reader/CSV/premiere.utf32le.csv'], + ['tests/data/Reader/CSV/premiere.utf32lebom.csv'], + ['tests/data/Reader/CSV/premiere.win1252.csv'], + ]; + } + + public function testGuessEncodingDefltIso2(): void + { + $filename = 'tests/data/Reader/CSV/premiere.win1252.csv'; + $reader = new Csv(); + $reader->setInputEncoding(Csv::guessEncoding($filename, 'ISO-8859-2')); + $spreadsheet = $reader->load($filename); + $sheet = $spreadsheet->getActiveSheet(); + self::assertEquals('premičre', $sheet->getCell('A1')->getValue()); + self::assertEquals('sixičme', $sheet->getCell('C2')->getValue()); + } + + public function testFallbackEncodingDefltIso2(): void + { + $filename = 'tests/data/Reader/CSV/premiere.win1252.csv'; + $reader = new Csv(); + self::assertSame('CP1252', $reader->getFallbackEncoding()); + $reader->setInputEncoding(Csv::GUESS_ENCODING); + $reader->setFallbackEncoding('ISO-8859-2'); + $spreadsheet = $reader->load($filename); + $sheet = $spreadsheet->getActiveSheet(); + self::assertEquals('premičre', $sheet->getCell('A1')->getValue()); + self::assertEquals('sixičme', $sheet->getCell('C2')->getValue()); + } +} diff --git a/tests/PhpSpreadsheetTests/Reader/CsvTest.php b/tests/PhpSpreadsheetTests/Reader/Csv/CsvTest.php similarity index 69% rename from tests/PhpSpreadsheetTests/Reader/CsvTest.php rename to tests/PhpSpreadsheetTests/Reader/Csv/CsvTest.php index 73c281ec0a..b29655fbdb 100644 --- a/tests/PhpSpreadsheetTests/Reader/CsvTest.php +++ b/tests/PhpSpreadsheetTests/Reader/Csv/CsvTest.php @@ -1,6 +1,6 @@ getDelimiter()); + $delim1 = $reader->getDelimiter(); + self::assertNull($delim1); $spreadsheet = $reader->load($filename); @@ -132,21 +133,6 @@ public function testEscapeCharacters(): void self::assertSame($expected, $worksheet->toArray()); } - /** - * @dataProvider providerEncodings - * - * @param string $filename - * @param string $encoding - */ - public function testEncodings($filename, $encoding): void - { - $reader = new Csv(); - $reader->setInputEncoding($encoding); - $spreadsheet = $reader->load($filename); - $sheet = $spreadsheet->getActiveSheet(); - self::assertEquals('Å', $sheet->getCell('A1')->getValue()); - } - public function testInvalidWorkSheetInfo(): void { $this->expectException(ReaderException::class); @@ -154,37 +140,6 @@ public function testInvalidWorkSheetInfo(): void $reader->listWorksheetInfo(''); } - /** - * @dataProvider providerEncodings - * - * @param string $filename - * @param string $encoding - */ - public function testWorkSheetInfo($filename, $encoding): void - { - $reader = new Csv(); - $reader->setInputEncoding($encoding); - $info = $reader->listWorksheetInfo($filename); - self::assertEquals('Worksheet', $info[0]['worksheetName']); - self::assertEquals('B', $info[0]['lastColumnLetter']); - self::assertEquals(1, $info[0]['lastColumnIndex']); - self::assertEquals(2, $info[0]['totalRows']); - self::assertEquals(2, $info[0]['totalColumns']); - } - - public function providerEncodings(): array - { - return [ - ['tests/data/Reader/CSV/encoding.iso88591.csv', 'ISO-8859-1'], - ['tests/data/Reader/CSV/encoding.utf8.csv', 'UTF-8'], - ['tests/data/Reader/CSV/encoding.utf8bom.csv', 'UTF-8'], - ['tests/data/Reader/CSV/encoding.utf16be.csv', 'UTF-16BE'], - ['tests/data/Reader/CSV/encoding.utf16le.csv', 'UTF-16LE'], - ['tests/data/Reader/CSV/encoding.utf32be.csv', 'UTF-32BE'], - ['tests/data/Reader/CSV/encoding.utf32le.csv', 'UTF-32LE'], - ]; - } - public function testUtf16LineBreak(): void { $reader = new Csv(); @@ -296,45 +251,4 @@ public function providerEscapes(): array [(version_compare(PHP_VERSION, '7.4') < 0) ? "\x0" : '', ','], ]; } - - /** - * @dataProvider providerGuessEncoding - */ - public function testGuessEncoding(string $filename): void - { - $reader = new Csv(); - $reader->setInputEncoding(Csv::guessEncoding($filename)); - $spreadsheet = $reader->load($filename); - $sheet = $spreadsheet->getActiveSheet(); - self::assertEquals('première', $sheet->getCell('A1')->getValue()); - self::assertEquals('sixième', $sheet->getCell('C2')->getValue()); - } - - public function providerGuessEncoding(): array - { - return [ - ['tests/data/Reader/CSV/premiere.utf8.csv'], - ['tests/data/Reader/CSV/premiere.utf8bom.csv'], - ['tests/data/Reader/CSV/premiere.utf16be.csv'], - ['tests/data/Reader/CSV/premiere.utf16bebom.csv'], - ['tests/data/Reader/CSV/premiere.utf16le.csv'], - ['tests/data/Reader/CSV/premiere.utf16lebom.csv'], - ['tests/data/Reader/CSV/premiere.utf32be.csv'], - ['tests/data/Reader/CSV/premiere.utf32bebom.csv'], - ['tests/data/Reader/CSV/premiere.utf32le.csv'], - ['tests/data/Reader/CSV/premiere.utf32lebom.csv'], - ['tests/data/Reader/CSV/premiere.win1252.csv'], - ]; - } - - public function testGuessEncodingDefltIso2(): void - { - $filename = 'tests/data/Reader/CSV/premiere.win1252.csv'; - $reader = new Csv(); - $reader->setInputEncoding(Csv::guessEncoding($filename, 'ISO-8859-2')); - $spreadsheet = $reader->load($filename); - $sheet = $spreadsheet->getActiveSheet(); - self::assertEquals('premičre', $sheet->getCell('A1')->getValue()); - self::assertEquals('sixičme', $sheet->getCell('C2')->getValue()); - } }