Skip to content

Commit

Permalink
WIP: Xxe (#780)
Browse files Browse the repository at this point in the history
Changes to the xml security scanner to use libxml_disable_entity_loader() when cleanly supported and thread-safe, and to handle UTF-7 charset which otherwise permits an XXE exploit
  • Loading branch information
Mark Baker authored Nov 20, 2018
1 parent 3bea6f5 commit 0f8f071
Show file tree
Hide file tree
Showing 11 changed files with 229 additions and 147 deletions.
2 changes: 1 addition & 1 deletion composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 0 additions & 33 deletions src/PhpSpreadsheet/Reader/BaseReader.php
Original file line number Diff line number Diff line change
Expand Up @@ -221,37 +221,4 @@ protected function openFile($pFilename)
throw new Exception('Could not open file ' . $pFilename . ' for reading.');
}
}

/**
* Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
*
* @param string $xml
*
* @throws Exception
*
* @return string
*/
public function securityScan($xml)
{
$pattern = '/\\0?' . implode('\\0?', str_split('<!DOCTYPE')) . '\\0?/';
if (preg_match($pattern, $xml)) {
throw new Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
}

return $xml;
}

/**
* Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
*
* @param string $filestream
*
* @throws Exception
*
* @return string
*/
public function securityScanFile($filestream)
{
return $this->securityScan(file_get_contents($filestream));
}
}
13 changes: 10 additions & 3 deletions src/PhpSpreadsheet/Reader/Gnumeric.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Cell\DataType;
use PhpOffice\PhpSpreadsheet\NamedRange;
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PhpOffice\PhpSpreadsheet\ReferenceHelper;
use PhpOffice\PhpSpreadsheet\RichText\RichText;
use PhpOffice\PhpSpreadsheet\Settings;
Expand All @@ -30,13 +31,19 @@ class Gnumeric extends BaseReader

private $referenceHelper;

/**
* @var XmlScanner
*/
private $securityScanner;

/**
* Create a new Gnumeric.
*/
public function __construct()
{
$this->readFilter = new DefaultReadFilter();
$this->referenceHelper = ReferenceHelper::getInstance();
$this->securityScanner = new XmlScanner();
}

/**
Expand Down Expand Up @@ -77,7 +84,7 @@ public function listWorksheetNames($pFilename)
File::assertFile($pFilename);

$xml = new XMLReader();
$xml->xml($this->securityScanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions());
$xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions());
$xml->setParserProperty(2, true);

$worksheetNames = [];
Expand Down Expand Up @@ -106,7 +113,7 @@ public function listWorksheetInfo($pFilename)
File::assertFile($pFilename);

$xml = new XMLReader();
$xml->xml($this->securityScanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions());
$xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions());
$xml->setParserProperty(2, true);

$worksheetInfo = [];
Expand Down Expand Up @@ -196,7 +203,7 @@ public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)

$gFileData = $this->gzfileGetContents($pFilename);

$xml = simplexml_load_string($this->securityScan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions());
$xml = simplexml_load_string($this->securityScanner->scan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions());
$namespacesMeta = $xml->getNamespaces(true);

$gnmXML = $xml->children($namespacesMeta['gnm']);
Expand Down
26 changes: 8 additions & 18 deletions src/PhpSpreadsheet/Reader/Html.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
use DOMNode;
use DOMText;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
use PhpOffice\PhpSpreadsheet\Style\Border;
use PhpOffice\PhpSpreadsheet\Style\Color;
Expand All @@ -16,6 +17,11 @@
/** PhpSpreadsheet root directory */
class Html extends BaseReader
{
/**
* @var XmlScanner
*/
private $securityScanner;

/**
* Sample size to read to determine if it's HTML or not.
*/
Expand Down Expand Up @@ -105,6 +111,7 @@ class Html extends BaseReader
public function __construct()
{
$this->readFilter = new DefaultReadFilter();
$this->securityScanner = new XmlScanner('<!ENTITY');
}

/**
Expand Down Expand Up @@ -543,7 +550,7 @@ public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
// Create a new DOM object
$dom = new DOMDocument();
// Reload the HTML file into the DOM object
$loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanFile($pFilename), 'HTML-ENTITIES', 'UTF-8'));
$loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scanFile($pFilename), 'HTML-ENTITIES', 'UTF-8'));
if ($loaded === false) {
throw new Exception('Failed to load ' . $pFilename . ' as a DOM Document');
}
Expand Down Expand Up @@ -585,23 +592,6 @@ public function setSheetIndex($pValue)
return $this;
}

/**
* Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
*
* @param string $xml
*
* @return string
*/
public function securityScan($xml)
{
$pattern = '/\\0?' . implode('\\0?', str_split('<!ENTITY')) . '\\0?/';
if (preg_match($pattern, $xml)) {
throw new Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
}

return $xml;
}

/**
* Apply inline css inline style.
*
Expand Down
17 changes: 12 additions & 5 deletions src/PhpSpreadsheet/Reader/Ods.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Cell\DataType;
use PhpOffice\PhpSpreadsheet\Document\Properties;
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PhpOffice\PhpSpreadsheet\RichText\RichText;
use PhpOffice\PhpSpreadsheet\Settings;
use PhpOffice\PhpSpreadsheet\Shared\Date;
Expand All @@ -19,12 +20,18 @@

class Ods extends BaseReader
{
/**
* @var XmlScanner
*/
private $securityScanner;

/**
* Create a new Ods Reader instance.
*/
public function __construct()
{
$this->readFilter = new DefaultReadFilter();
$this->securityScanner = new XmlScanner();
}

/**
Expand Down Expand Up @@ -52,7 +59,7 @@ public function canRead($pFilename)
$mimeType = $zip->getFromName($stat['name']);
} elseif ($stat = $zip->statName('META-INF/manifest.xml')) {
$xml = simplexml_load_string(
$this->securityScan($zip->getFromName('META-INF/manifest.xml')),
$this->securityScanner->scan($zip->getFromName('META-INF/manifest.xml')),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
Expand Down Expand Up @@ -100,7 +107,7 @@ public function listWorksheetNames($pFilename)

$xml = new XMLReader();
$xml->xml(
$this->securityScanFile('zip://' . realpath($pFilename) . '#content.xml'),
$this->securityScanner->scanFile('zip://' . realpath($pFilename) . '#content.xml'),
null,
Settings::getLibXmlLoaderOptions()
);
Expand Down Expand Up @@ -154,7 +161,7 @@ public function listWorksheetInfo($pFilename)

$xml = new XMLReader();
$xml->xml(
$this->securityScanFile('zip://' . realpath($pFilename) . '#content.xml'),
$this->securityScanner->scanFile('zip://' . realpath($pFilename) . '#content.xml'),
null,
Settings::getLibXmlLoaderOptions()
);
Expand Down Expand Up @@ -267,7 +274,7 @@ public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
// Meta

$xml = simplexml_load_string(
$this->securityScan($zip->getFromName('meta.xml')),
$this->securityScanner->scan($zip->getFromName('meta.xml')),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
Expand Down Expand Up @@ -367,7 +374,7 @@ public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)

$dom = new \DOMDocument('1.01', 'UTF-8');
$dom->loadXML(
$this->securityScan($zip->getFromName('content.xml')),
$this->securityScanner->scan($zip->getFromName('content.xml')),
Settings::getLibXmlLoaderOptions()
);

Expand Down
87 changes: 87 additions & 0 deletions src/PhpSpreadsheet/Reader/Security/XmlScanner.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
<?php

namespace PhpOffice\PhpSpreadsheet\Reader\Security;

use PhpOffice\PhpSpreadsheet\Reader\Exception;

class XmlScanner
{
/**
* Identifies whether the thread-safe libxmlDisableEntityLoader() function is available.
*
* @var bool
*/
private $libxmlDisableEntityLoader = false;

private $pattern;

public function __construct($pattern = '<!DOCTYPE')
{
$this->pattern = $pattern;
$this->libxmlDisableEntityLoader = $this->identifyLibxmlDisableEntityLoaderAvailability();

if ($this->libxmlDisableEntityLoader) {
libxml_disable_entity_loader(true);
}
}

private function identifyLibxmlDisableEntityLoaderAvailability()
{
if (PHP_MAJOR_VERSION == 7) {
switch (PHP_MINOR_VERSION) {
case 2:
return PHP_RELEASE_VERSION >= 1;
case 1:
return PHP_RELEASE_VERSION >= 13;
case 0:
return PHP_RELEASE_VERSION >= 27;
}

return true;
}

return false;
}

/**
* Scan the XML for use of <!ENTITY to prevent XXE/XEE attacks.
*
* @param mixed $xml
*
* @throws Exception
*
* @return string
*/
public function scan($xml)
{
$pattern = '/encoding="(.*?)"/';
$result = preg_match($pattern, $xml, $matches);
$charset = $result ? $matches[1] : 'UTF-8';

if ($charset !== 'UTF-8') {
$xml = mb_convert_encoding($xml, 'UTF-8', $charset);
}

// Don't rely purely on libxml_disable_entity_loader()
$pattern = '/\\0?' . implode('\\0?', str_split($this->pattern)) . '\\0?/';
if (preg_match($pattern, $xml)) {
throw new Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
}

return $xml;
}

/**
* Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
*
* @param string $filestream
*
* @throws Exception
*
* @return string
*/
public function scanFile($filestream)
{
return $this->scan(file_get_contents($filestream));
}
}
Loading

0 comments on commit 0f8f071

Please sign in to comment.