Skip to content

Commit

Permalink
In Xml reader throw exception in case of invalid XML (#222)
Browse files Browse the repository at this point in the history
When the xml file is not a standard xml file, the `simplexml_load_string` will return false, this will cause an error on "$xml->getNamespaces(true);" . So instead of showing the error, we throw an exception.
yasar-luo authored and PowerKiKi committed Sep 20, 2017

Unverified

This commit is not signed, but one or more authors requires that any commit attributed to them is signed.
1 parent febbe87 commit 0477e6f
Showing 4 changed files with 64 additions and 16 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
### Changed

- Merge data-validations to reduce written worksheet size - @billblume [#131](https://github.com/PHPOffice/PhpSpreadSheet/issues/131)
- Throws exception if a XML file is invalid - @GreatHumorist [#222](https://github.com/PHPOffice/PhpSpreadsheet/pull/222)

### Fixed

45 changes: 30 additions & 15 deletions src/PhpSpreadsheet/Reader/Xml.php
Original file line number Diff line number Diff line change
@@ -117,6 +117,30 @@ public function canRead($pFilename)
return $valid;
}

/**
* Check if the file is a valid SimpleXML.
*
* @param string $pFilename
*
* @throws Exception
*
* @return false|\SimpleXMLElement
*/
public function trySimpleXMLLoadString($pFilename)
{
try {
$xml = simplexml_load_string(
$this->securityScan(file_get_contents($pFilename)),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
} catch (\Exception $e) {
throw new Exception('Cannot load invalid XML file: ' . $pFilename, 0, $e);
}

return $xml;
}

/**
* Reads names of the worksheets from a file, without parsing the whole file to a Spreadsheet object.
*
@@ -133,11 +157,8 @@ public function listWorksheetNames($pFilename)

$worksheetNames = [];

$xml = simplexml_load_string(
$this->securityScan(file_get_contents($pFilename)),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
$xml = $this->trySimpleXMLLoadString($pFilename);

$namespaces = $xml->getNamespaces(true);

$xml_ss = $xml->children($namespaces['ss']);
@@ -162,11 +183,8 @@ public function listWorksheetInfo($pFilename)

$worksheetInfo = [];

$xml = simplexml_load_string(
$this->securityScan(file_get_contents($pFilename)),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
$xml = $this->trySimpleXMLLoadString($pFilename);

$namespaces = $xml->getNamespaces(true);

$worksheetID = 1;
@@ -339,11 +357,8 @@ public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
}

$xml = simplexml_load_string(
$this->securityScan(file_get_contents($pFilename)),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
$xml = $this->trySimpleXMLLoadString($pFilename);

$namespaces = $xml->getNamespaces(true);

$docProps = $spreadsheet->getProperties();
26 changes: 25 additions & 1 deletion tests/PhpSpreadsheetTests/Reader/XEEValidatorTest.php
Original file line number Diff line number Diff line change
@@ -3,6 +3,8 @@
namespace PhpOffice\PhpSpreadsheetTests\Reader;

use PhpOffice\PhpSpreadsheet\Reader\BaseReader;
use PhpOffice\PhpSpreadsheet\Reader\Exception;
use PhpOffice\PhpSpreadsheet\Reader\Xml;
use PHPUnit_Framework_TestCase;

class XEEValidatorTest extends PHPUnit_Framework_TestCase
@@ -24,7 +26,29 @@ public function testInvalidXML($filename)
public function providerInvalidXML()
{
$tests = [];
foreach (glob(__DIR__ . '/../../data/Reader/XEE/XEETestInvalid*.xml') as $file) {
foreach (glob(__DIR__ . '/../../data/Reader/XEE/XEETestInvalidUTF*.xml') as $file) {
$tests[basename($file)] = [realpath($file)];
}

return $tests;
}

/**
* @dataProvider providerInvalidSimpleXML
* @expectedException \PhpOffice\PhpSpreadsheet\Reader\Exception
*
* @param $filename
*/
public function testInvalidSimpleXML($filename)
{
$xmlReader = new Xml();
$xmlReader->trySimpleXMLLoadString($filename);
}

public function providerInvalidSimpleXML()
{
$tests = [];
foreach (glob(__DIR__ . '/../../data/Reader/XEE/XEETestInvalidSimpleXML*.xml') as $file) {
$tests[basename($file)] = [realpath($file)];
}

8 changes: 8 additions & 0 deletions tests/data/Reader/XEE/XEETestInvalidSimpleXML.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<root>
<data>R&d</data>
<data>R<d</data>
<data>R>d</data>
<data>R'd</data>
<data>R"d</data>
</root>

0 comments on commit 0477e6f

Please sign in to comment.