diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e77664216..54f7530cf9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com) and this project adheres to [Semantic Versioning](https://semver.org). -## TBD - 3.5.0 +## 2024-11-22 - 3.5.0 ### Added @@ -13,7 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org). ### Changed -- Nothing yet. +- Settings::libXmlLoaderOptions is ignored. [PR #4233](https://github.com/PHPOffice/PhpSpreadsheet/pull/4233) ### Moved @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org). ### Deprecated +- Settings::setLibXmlLoaderOptions() and Settings::getLibXmlLoaderOptions() are no longer needed - no replacement. - Worksheet::getHashCode is no longer needed. ### Fixed diff --git a/docs/topics/reading-and-writing-to-file.md b/docs/topics/reading-and-writing-to-file.md index e1d5229cd0..19fe8a8ed9 100644 --- a/docs/topics/reading-and-writing-to-file.md +++ b/docs/topics/reading-and-writing-to-file.md @@ -298,7 +298,6 @@ versions of Microsoft Excel. **Excel 2003 XML limitations** Please note that Excel 2003 XML format has some limits regarding to styling cells and handling large spreadsheets via PHP. -Also, only files using charset UTF-8 or ISO-8859-* are supported. ### \PhpOffice\PhpSpreadsheet\Reader\Xml diff --git a/src/PhpSpreadsheet/Reader/Gnumeric.php b/src/PhpSpreadsheet/Reader/Gnumeric.php index d80a87ecc5..ed81efb224 100644 --- a/src/PhpSpreadsheet/Reader/Gnumeric.php +++ b/src/PhpSpreadsheet/Reader/Gnumeric.php @@ -11,7 +11,6 @@ use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; use PhpOffice\PhpSpreadsheet\ReferenceHelper; use PhpOffice\PhpSpreadsheet\RichText\RichText; -use PhpOffice\PhpSpreadsheet\Settings; use PhpOffice\PhpSpreadsheet\Shared\File; use PhpOffice\PhpSpreadsheet\Spreadsheet; use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet; @@ -104,7 +103,7 @@ public function listWorksheetNames(string $filename): array $xml = new XMLReader(); $contents = $this->gzfileGetContents($filename); - $xml->xml($contents, null, Settings::getLibXmlLoaderOptions()); + $xml->xml($contents); $xml->setParserProperty(2, true); $worksheetNames = []; @@ -133,7 +132,7 @@ public function listWorksheetInfo(string $filename): array $xml = new XMLReader(); $contents = $this->gzfileGetContents($filename); - $xml->xml($contents, null, Settings::getLibXmlLoaderOptions()); + $xml->xml($contents); $xml->setParserProperty(2, true); $worksheetInfo = []; @@ -248,7 +247,7 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp /** @var XmlScanner */ $securityScanner = $this->securityScanner; - $xml2 = simplexml_load_string($securityScanner->scan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions()); + $xml2 = simplexml_load_string($securityScanner->scan($gFileData)); $xml = self::testSimpleXml($xml2); $gnmXML = $xml->children(self::NAMESPACE_GNM); diff --git a/src/PhpSpreadsheet/Reader/Html.php b/src/PhpSpreadsheet/Reader/Html.php index 4e00f7dd3a..4ca781a87d 100644 --- a/src/PhpSpreadsheet/Reader/Html.php +++ b/src/PhpSpreadsheet/Reader/Html.php @@ -34,7 +34,7 @@ class Html extends BaseReader private const STARTS_WITH_BOM = '/^(?:\xfe\xff|\xff\xfe|\xEF\xBB\xBF)/'; - private const DECLARES_CHARSET = '/ charset=/i'; + private const DECLARES_CHARSET = '/\\bcharset=/i'; /** * Input encoding. diff --git a/src/PhpSpreadsheet/Reader/Ods.php b/src/PhpSpreadsheet/Reader/Ods.php index 85fa08ecf0..57111a9c2b 100644 --- a/src/PhpSpreadsheet/Reader/Ods.php +++ b/src/PhpSpreadsheet/Reader/Ods.php @@ -17,7 +17,6 @@ use PhpOffice\PhpSpreadsheet\Reader\Ods\Properties as DocumentProperties; use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; use PhpOffice\PhpSpreadsheet\RichText\RichText; -use PhpOffice\PhpSpreadsheet\Settings; use PhpOffice\PhpSpreadsheet\Shared\Date; use PhpOffice\PhpSpreadsheet\Shared\File; use PhpOffice\PhpSpreadsheet\Spreadsheet; @@ -58,9 +57,12 @@ public function canRead(string $filename): bool $mimeType = $zip->getFromName($stat['name']); } elseif ($zip->statName('META-INF/manifest.xml')) { $xml = simplexml_load_string( - $this->getSecurityScannerOrThrow()->scan($zip->getFromName('META-INF/manifest.xml')), - 'SimpleXMLElement', - Settings::getLibXmlLoaderOptions() + $this->getSecurityScannerOrThrow() + ->scan( + $zip->getFromName( + 'META-INF/manifest.xml' + ) + ) ); if ($xml !== false) { $namespacesContent = $xml->getNamespaces(true); @@ -98,9 +100,10 @@ public function listWorksheetNames(string $filename): array $xml = new XMLReader(); $xml->xml( - $this->getSecurityScannerOrThrow()->scanFile('zip://' . realpath($filename) . '#' . self::INITIAL_FILE), - null, - Settings::getLibXmlLoaderOptions() + $this->getSecurityScannerOrThrow() + ->scanFile( + 'zip://' . realpath($filename) . '#' . self::INITIAL_FILE + ) ); $xml->setParserProperty(2, true); @@ -145,9 +148,10 @@ public function listWorksheetInfo(string $filename): array $xml = new XMLReader(); $xml->xml( - $this->getSecurityScannerOrThrow()->scanFile('zip://' . realpath($filename) . '#' . self::INITIAL_FILE), - null, - Settings::getLibXmlLoaderOptions() + $this->getSecurityScannerOrThrow() + ->scanFile( + 'zip://' . realpath($filename) . '#' . self::INITIAL_FILE + ) ); $xml->setParserProperty(2, true); @@ -254,9 +258,8 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp // Meta $xml = @simplexml_load_string( - $this->getSecurityScannerOrThrow()->scan($zip->getFromName('meta.xml')), - 'SimpleXMLElement', - Settings::getLibXmlLoaderOptions() + $this->getSecurityScannerOrThrow() + ->scan($zip->getFromName('meta.xml')) ); if ($xml === false) { throw new Exception('Unable to read data from {$pFilename}'); @@ -270,8 +273,8 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp $dom = new DOMDocument('1.01', 'UTF-8'); $dom->loadXML( - $this->getSecurityScannerOrThrow()->scan($zip->getFromName('styles.xml')), - Settings::getLibXmlLoaderOptions() + $this->getSecurityScannerOrThrow() + ->scan($zip->getFromName('styles.xml')) ); $pageSettings = new PageSettings($dom); @@ -280,8 +283,8 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp $dom = new DOMDocument('1.01', 'UTF-8'); $dom->loadXML( - $this->getSecurityScannerOrThrow()->scan($zip->getFromName(self::INITIAL_FILE)), - Settings::getLibXmlLoaderOptions() + $this->getSecurityScannerOrThrow() + ->scan($zip->getFromName(self::INITIAL_FILE)) ); $officeNs = (string) $dom->lookupNamespaceUri('office'); @@ -690,8 +693,8 @@ private function processSettings(ZipArchive $zip, Spreadsheet $spreadsheet): voi { $dom = new DOMDocument('1.01', 'UTF-8'); $dom->loadXML( - $this->getSecurityScannerOrThrow()->scan($zip->getFromName('settings.xml')), - Settings::getLibXmlLoaderOptions() + $this->getSecurityScannerOrThrow() + ->scan($zip->getFromName('settings.xml')) ); //$xlinkNs = $dom->lookupNamespaceUri('xlink'); $configNs = (string) $dom->lookupNamespaceUri('config'); diff --git a/src/PhpSpreadsheet/Reader/Xlsx.php b/src/PhpSpreadsheet/Reader/Xlsx.php index 88d59634c4..d22f76ff4f 100644 --- a/src/PhpSpreadsheet/Reader/Xlsx.php +++ b/src/PhpSpreadsheet/Reader/Xlsx.php @@ -27,7 +27,6 @@ use PhpOffice\PhpSpreadsheet\Reader\Xlsx\WorkbookView; use PhpOffice\PhpSpreadsheet\ReferenceHelper; use PhpOffice\PhpSpreadsheet\RichText\RichText; -use PhpOffice\PhpSpreadsheet\Settings; use PhpOffice\PhpSpreadsheet\Shared\Date; use PhpOffice\PhpSpreadsheet\Shared\Drawing; use PhpOffice\PhpSpreadsheet\Shared\File; @@ -123,7 +122,7 @@ private function loadZip(string $filename, string $ns = '', bool $replaceUnclose $rels = @simplexml_load_string( $this->getSecurityScannerOrThrow()->scan($contents), 'SimpleXMLElement', - Settings::getLibXmlLoaderOptions(), + 0, $ns ); @@ -138,7 +137,7 @@ private function loadZipNonamespace(string $filename, string $ns): SimpleXMLElem $rels = simplexml_load_string( $this->getSecurityScannerOrThrow()->scan($contents), 'SimpleXMLElement', - Settings::getLibXmlLoaderOptions(), + 0, ($ns === '' ? $ns : '') ); @@ -245,11 +244,13 @@ public function listWorksheetInfo(string $filename): array $xml = new XMLReader(); $xml->xml( - $this->getSecurityScannerOrThrow()->scan( - $this->getFromZipArchive($this->zip, $fileWorksheetPath) - ), - null, - Settings::getLibXmlLoaderOptions() + $this->getSecurityScannerOrThrow() + ->scan( + $this->getFromZipArchive( + $this->zip, + $fileWorksheetPath + ) + ) ); $xml->setParserProperty(2, true); @@ -2001,9 +2002,8 @@ private function readRibbon(Spreadsheet $excel, string $customUITarget, ZipArchi if ($dataRels) { // exists and not empty if the ribbon have some pictures (other than internal MSO) $UIRels = simplexml_load_string( - $this->getSecurityScannerOrThrow()->scan($dataRels), - 'SimpleXMLElement', - Settings::getLibXmlLoaderOptions() + $this->getSecurityScannerOrThrow() + ->scan($dataRels) ); if (false !== $UIRels) { // we need to save id and target to avoid parsing customUI.xml and "guess" if it's a pseudo callback who load the image diff --git a/src/PhpSpreadsheet/Reader/Xlsx/Properties.php b/src/PhpSpreadsheet/Reader/Xlsx/Properties.php index fb501e84ec..1a0517b19f 100644 --- a/src/PhpSpreadsheet/Reader/Xlsx/Properties.php +++ b/src/PhpSpreadsheet/Reader/Xlsx/Properties.php @@ -4,7 +4,6 @@ use PhpOffice\PhpSpreadsheet\Document\Properties as DocumentProperties; use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; -use PhpOffice\PhpSpreadsheet\Settings; use SimpleXMLElement; class Properties @@ -23,9 +22,7 @@ private function extractPropertyData(string $propertyData): ?SimpleXMLElement { // okay to omit namespace because everything will be processed by xpath $obj = simplexml_load_string( - $this->securityScanner->scan($propertyData), - 'SimpleXMLElement', - Settings::getLibXmlLoaderOptions() + $this->securityScanner->scan($propertyData) ); return $obj === false ? null : $obj; diff --git a/src/PhpSpreadsheet/Reader/Xml.php b/src/PhpSpreadsheet/Reader/Xml.php index b712321a80..5e9830f43f 100644 --- a/src/PhpSpreadsheet/Reader/Xml.php +++ b/src/PhpSpreadsheet/Reader/Xml.php @@ -15,7 +15,6 @@ use PhpOffice\PhpSpreadsheet\Reader\Xml\Properties; use PhpOffice\PhpSpreadsheet\Reader\Xml\Style; use PhpOffice\PhpSpreadsheet\RichText\RichText; -use PhpOffice\PhpSpreadsheet\Settings; use PhpOffice\PhpSpreadsheet\Shared\Date; use PhpOffice\PhpSpreadsheet\Shared\File; use PhpOffice\PhpSpreadsheet\Spreadsheet; @@ -132,9 +131,8 @@ private function trySimpleXMLLoadStringPrivate(string $filename, string $fileOrS } if ($continue) { $xml = @simplexml_load_string( - $this->getSecurityScannerOrThrow()->scan($data), - 'SimpleXMLElement', - Settings::getLibXmlLoaderOptions() + $this->getSecurityScannerOrThrow() + ->scan($data) ); } } catch (Throwable $e) { diff --git a/src/PhpSpreadsheet/Settings.php b/src/PhpSpreadsheet/Settings.php index 7f727a8cc6..d32ef7c448 100644 --- a/src/PhpSpreadsheet/Settings.php +++ b/src/PhpSpreadsheet/Settings.php @@ -94,6 +94,8 @@ public static function htmlEntityFlags(): int * Set default options for libxml loader. * * @param ?int $options Default options for libxml loader + * + * @deprecated 3.5.0 no longer needed */ public static function setLibXmlLoaderOptions(?int $options): int { @@ -110,14 +112,12 @@ public static function setLibXmlLoaderOptions(?int $options): int * Defaults to LIBXML_DTDLOAD | LIBXML_DTDATTR when not set explicitly. * * @return int Default options for libxml loader + * + * @deprecated 3.5.0 no longer needed */ public static function getLibXmlLoaderOptions(): int { - if (self::$libXmlLoaderOptions === null) { - return self::setLibXmlLoaderOptions(null); - } - - return self::$libXmlLoaderOptions; + return self::$libXmlLoaderOptions ?? (defined('LIBXML_DTDLOAD') ? (LIBXML_DTDLOAD | LIBXML_DTDATTR) : 0); } /** diff --git a/tests/PhpSpreadsheetTests/Reader/Html/HtmlCharsetTest.php b/tests/PhpSpreadsheetTests/Reader/Html/HtmlCharsetTest.php index 2e1ef19dfd..1daacaf62b 100644 --- a/tests/PhpSpreadsheetTests/Reader/Html/HtmlCharsetTest.php +++ b/tests/PhpSpreadsheetTests/Reader/Html/HtmlCharsetTest.php @@ -40,6 +40,7 @@ public static function providerCharset(): array ['charset.UTF-16.lebom.html', 'À1'], ['charset.gb18030.html', '电视机'], ['charset.unknown.html', 'exception'], + ['xhtml4.entity.xhtml', 'exception'], ]; } } diff --git a/tests/data/Reader/HTML/charset.ISO-8859-1.html4.html b/tests/data/Reader/HTML/charset.ISO-8859-1.html4.html index 63e45ec081..6b81fc9e80 100644 --- a/tests/data/Reader/HTML/charset.ISO-8859-1.html4.html +++ b/tests/data/Reader/HTML/charset.ISO-8859-1.html4.html @@ -1,7 +1,7 @@
- +&test; |