diff --git a/CHANGELOG.md b/CHANGELOG.md index e568b8a7b3..bdbeed04b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org). ### Added -- Nothing +- Xlsx Reader Optionally Ignore Rows With No Cells. [Issue #3982](https://github.com/PHPOffice/PhpSpreadsheet/issues/3982) [PR #4035](https://github.com/PHPOffice/PhpSpreadsheet/pull/4035) ### Changed @@ -29,6 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org). - POWER Null/Bool Args. [PR #4031](https://github.com/PHPOffice/PhpSpreadsheet/pull/4031) - Do Not Output Alignment and Protection for Conditional Format. [Issue #4025](https://github.com/PHPOffice/PhpSpreadsheet/issues/4025) [PR #4027](https://github.com/PHPOffice/PhpSpreadsheet/pull/4027) - Xls Conditional Format Improvements. [PR #4030](https://github.com/PHPOffice/PhpSpreadsheet/pull/4030) [PR #4033](https://github.com/PHPOffice/PhpSpreadsheet/pull/4033) +- Csv Reader allow use of html mimetype. [Issue #4036](https://github.com/PHPOffice/PhpSpreadsheet/issues/4036) [PR #4049](https://github.com/PHPOffice/PhpSpreadsheet/pull/4040) ## 2024-05-11 - 2.1.0 diff --git a/samples/Financial2/DISC.php b/samples/Financial2/DISC.php index f5666af4ff..ec2ce4417c 100644 --- a/samples/Financial2/DISC.php +++ b/samples/Financial2/DISC.php @@ -5,7 +5,7 @@ require __DIR__ . '/../Header.php'; -$helper->log('Returns the the Discount Rate for a security.'); +$helper->log('Returns the Discount Rate for a security.'); // Create new PhpSpreadsheet object $spreadsheet = new Spreadsheet(); diff --git a/src/PhpSpreadsheet/Reader/BaseReader.php b/src/PhpSpreadsheet/Reader/BaseReader.php index 23fdf9917c..1408f7bf0d 100644 --- a/src/PhpSpreadsheet/Reader/BaseReader.php +++ b/src/PhpSpreadsheet/Reader/BaseReader.php @@ -39,6 +39,13 @@ abstract class BaseReader implements IReader */ protected ?array $loadSheetsOnly = null; + /** + * Ignore rows with no cells? + * Identifies whether the Reader should ignore rows with no cells. + * Currently implemented only for Xlsx. + */ + protected bool $ignoreRowsWithNoCells = false; + /** * IReadFilter instance. */ @@ -78,6 +85,18 @@ public function setReadEmptyCells(bool $readEmptyCells): self return $this; } + public function getIgnoreRowsWithNoCells(): bool + { + return $this->ignoreRowsWithNoCells; + } + + public function setIgnoreRowsWithNoCells(bool $ignoreRowsWithNoCells): self + { + $this->ignoreRowsWithNoCells = $ignoreRowsWithNoCells; + + return $this; + } + public function getIncludeCharts(): bool { return $this->includeCharts; @@ -150,6 +169,9 @@ protected function processFlags(int $flags): void if (((bool) ($flags & self::SKIP_EMPTY_CELLS) || (bool) ($flags & self::IGNORE_EMPTY_CELLS)) === true) { $this->setReadEmptyCells(false); } + if (((bool) ($flags & self::IGNORE_ROWS_WITH_NO_CELLS)) === true) { + $this->setIgnoreRowsWithNoCells(true); + } } protected function loadSpreadsheetFromFile(string $filename): Spreadsheet diff --git a/src/PhpSpreadsheet/Reader/Csv.php b/src/PhpSpreadsheet/Reader/Csv.php index 9d9c26ede2..ac4fa688ab 100644 --- a/src/PhpSpreadsheet/Reader/Csv.php +++ b/src/PhpSpreadsheet/Reader/Csv.php @@ -566,6 +566,7 @@ public function canRead(string $filename): bool 'text/csv', 'text/plain', 'inode/x-empty', + 'text/html', ]; return in_array($type, $supportedTypes, true); diff --git a/src/PhpSpreadsheet/Reader/IReader.php b/src/PhpSpreadsheet/Reader/IReader.php index 79be3d8c7f..62c2103ab8 100644 --- a/src/PhpSpreadsheet/Reader/IReader.php +++ b/src/PhpSpreadsheet/Reader/IReader.php @@ -13,6 +13,8 @@ interface IReader public const SKIP_EMPTY_CELLS = 4; public const IGNORE_EMPTY_CELLS = 4; + public const IGNORE_ROWS_WITH_NO_CELLS = 8; + public function __construct(); /** diff --git a/src/PhpSpreadsheet/Reader/Xlsx.php b/src/PhpSpreadsheet/Reader/Xlsx.php index 5def2eb178..9bfb354342 100644 --- a/src/PhpSpreadsheet/Reader/Xlsx.php +++ b/src/PhpSpreadsheet/Reader/Xlsx.php @@ -796,10 +796,10 @@ protected function loadSpreadsheetFromFile(string $filename): Spreadsheet } $sheetViewOptions = new SheetViewOptions($docSheet, $xmlSheetNS); - $sheetViewOptions->load($this->getReadDataOnly(), $this->styleReader); + $sheetViewOptions->load($this->readDataOnly, $this->styleReader); (new ColumnAndRowAttributes($docSheet, $xmlSheetNS)) - ->load($this->getReadFilter(), $this->getReadDataOnly()); + ->load($this->getReadFilter(), $this->readDataOnly, $this->ignoreRowsWithNoCells); } $holdSelectedCells = $docSheet->getSelectedCells(); diff --git a/src/PhpSpreadsheet/Reader/Xlsx/ColumnAndRowAttributes.php b/src/PhpSpreadsheet/Reader/Xlsx/ColumnAndRowAttributes.php index 41eab03c5f..cf9046ce57 100644 --- a/src/PhpSpreadsheet/Reader/Xlsx/ColumnAndRowAttributes.php +++ b/src/PhpSpreadsheet/Reader/Xlsx/ColumnAndRowAttributes.php @@ -72,7 +72,7 @@ private function setRowAttributes(int $rowNumber, array $rowAttributes): void } } - public function load(?IReadFilter $readFilter = null, bool $readDataOnly = false): void + public function load(?IReadFilter $readFilter = null, bool $readDataOnly = false, bool $ignoreRowsWithNoCells = false): void { if ($this->worksheetXml === null) { return; @@ -85,7 +85,7 @@ public function load(?IReadFilter $readFilter = null, bool $readDataOnly = false } if ($this->worksheetXml->sheetData && $this->worksheetXml->sheetData->row) { - $rowsAttributes = $this->readRowAttributes($this->worksheetXml->sheetData->row, $readDataOnly); + $rowsAttributes = $this->readRowAttributes($this->worksheetXml->sheetData->row, $readDataOnly, $ignoreRowsWithNoCells); } if ($readFilter !== null && $readFilter::class === DefaultReadFilter::class) { @@ -189,13 +189,13 @@ private function isFilteredRow(IReadFilter $readFilter, int $rowCoordinate, arra return false; } - private function readRowAttributes(SimpleXMLElement $worksheetRow, bool $readDataOnly): array + private function readRowAttributes(SimpleXMLElement $worksheetRow, bool $readDataOnly, bool $ignoreRowsWithNoCells): array { $rowAttributes = []; foreach ($worksheetRow as $rowx) { $row = $rowx->attributes(); - if ($row !== null) { + if ($row !== null && (!$ignoreRowsWithNoCells || isset($rowx->c))) { if (isset($row['ht']) && !$readDataOnly) { $rowAttributes[(int) $row['r']]['rowHeight'] = (float) $row['ht']; } diff --git a/src/PhpSpreadsheet/Writer/ZipStream3.php b/src/PhpSpreadsheet/Writer/ZipStream3.php index d9c8d0b166..96e13ecf0e 100644 --- a/src/PhpSpreadsheet/Writer/ZipStream3.php +++ b/src/PhpSpreadsheet/Writer/ZipStream3.php @@ -2,7 +2,6 @@ namespace PhpOffice\PhpSpreadsheet\Writer; -use ZipStream\Option\Archive; use ZipStream\ZipStream; class ZipStream3 diff --git a/tests/PhpSpreadsheetTests/IOFactoryTest.php b/tests/PhpSpreadsheetTests/IOFactoryTest.php index 5309c6d694..90628fbeeb 100644 --- a/tests/PhpSpreadsheetTests/IOFactoryTest.php +++ b/tests/PhpSpreadsheetTests/IOFactoryTest.php @@ -103,6 +103,8 @@ public static function providerIdentify(): array //['samples/templates/Excel2003XMLTest.xml', 'Xml', Reader\Xml::class], ['samples/templates/46readHtml.html', 'Html', Reader\Html::class], ['tests/data/Reader/CSV/encoding.utf8bom.csv', 'Csv', Reader\Csv::class], + ['tests/data/Reader/HTML/charset.UTF-16.lebom.html', 'Html', Reader\Html::class], + ['tests/data/Reader/HTML/charset.UTF-8.bom.html', 'Html', Reader\Html::class], ]; } diff --git a/tests/PhpSpreadsheetTests/Reader/Csv/NotHtmlTest.php b/tests/PhpSpreadsheetTests/Reader/Csv/NotHtmlTest.php new file mode 100644 index 0000000000..1ee907d6e1 --- /dev/null +++ b/tests/PhpSpreadsheetTests/Reader/Csv/NotHtmlTest.php @@ -0,0 +1,83 @@ +tempFile !== '') { + unlink($this->tempFile); + $this->tempFile = ''; + } + } + + public function testHtmlCantRead(): void + { + // This test has a file which IOFactory will identify as Csv. + // So file can be read using either Csv Reader or IOFactory. + $this->tempFile = $filename = File::temporaryFilename(); + $cells = [ + ['1', 'example', '3'], + ['4', '5', '6'], + ]; + $handle = fopen($filename, 'wb'); + self::assertNotFalse($handle); + foreach ($cells as $row) { + fwrite($handle, "{$row[0]},{$row[1]},{$row[2]}\n"); + } + fclose($handle); + // Php8.3- identify file as text/html. + // Php8.4+ identify file as text/csv, and this type of change + // has been known to be retrofitted to prior versions. + $mime = mime_content_type($filename); + if ($mime !== 'text/csv') { + self::assertSame('text/html', $mime); + } + self::assertSame('Csv', IOFactory::identify($filename)); + $reader = new CsvReader(); + $spreadsheet = $reader->load($filename); + $sheet = $spreadsheet->getActiveSheet(); + self::assertSame($cells, $sheet->toArray()); + $spreadsheet->disconnectWorksheets(); + } + + public function testHtmlCanRead(): void + { + // This test has a file which IOFactory will identify as Html. + // So file has to be read using Csv Reader, not IOFactory. + $this->tempFile = $filename = File::temporaryFilename(); + $cells = [ + ['example', '
hello', '3'], + ['4', '5', '
'], + ]; + $handle = fopen($filename, 'wb'); + self::assertNotFalse($handle); + foreach ($cells as $row) { + fwrite($handle, "{$row[0]},{$row[1]},{$row[2]}\n"); + } + fclose($handle); + // Php8.3- identify file as text/html. + // Php8.4+ identify file as text/csv, and this type of change + // has been known to be retrofitted to prior versions. + $mime = mime_content_type($filename); + if ($mime !== 'text/csv') { + self::assertSame('text/html', $mime); + } + self::assertSame('Html', IOFactory::identify($filename)); + $reader = new CsvReader(); + $spreadsheet = $reader->load($filename); + $sheet = $spreadsheet->getActiveSheet(); + self::assertSame($cells, $sheet->toArray()); + $spreadsheet->disconnectWorksheets(); + } +} diff --git a/tests/PhpSpreadsheetTests/Reader/Xlsx/Issue3982Test.php b/tests/PhpSpreadsheetTests/Reader/Xlsx/Issue3982Test.php new file mode 100644 index 0000000000..855b4a84de --- /dev/null +++ b/tests/PhpSpreadsheetTests/Reader/Xlsx/Issue3982Test.php @@ -0,0 +1,41 @@ +getActiveSheet(); + $data = $sheet->toArray(null, true, false, true); + self::assertCount(1048576, $data); + $spreadsheet->disconnectWorksheets(); + } + + public function testIgnoreCellsWithNoRows(): void + { + $spreadsheet = IOFactory::load(self::$testbook, IReader::IGNORE_ROWS_WITH_NO_CELLS); + $sheet = $spreadsheet->getActiveSheet(); + $data = $sheet->toArray(null, true, false, true); + self::assertSame([1, 2, 3, 4, 5, 6], array_keys($data)); + $spreadsheet->disconnectWorksheets(); + } + + public function testDefaultSetting(): void + { + $reader = new XlsxReader(); + self::assertFalse($reader->getIgnoreRowsWithNoCells()); + self::assertFalse($reader->getReadDataOnly()); + self::assertFalse($reader->getIncludeCharts()); + self::assertTrue($reader->getReadEmptyCells()); + } +} diff --git a/tests/data/Reader/XLSX/issue.3982.xlsx b/tests/data/Reader/XLSX/issue.3982.xlsx new file mode 100644 index 0000000000..d822da2bda Binary files /dev/null and b/tests/data/Reader/XLSX/issue.3982.xlsx differ