Skip to content

Commit

Permalink
Backport security fix from PHPOffice#4119 to v1
Browse files Browse the repository at this point in the history
  • Loading branch information
acrobat committed Aug 30, 2024
1 parent 15a42d0 commit 090e16f
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 5 deletions.
23 changes: 18 additions & 5 deletions src/PhpSpreadsheet/Reader/Security/XmlScanner.php
Original file line number Diff line number Diff line change
Expand Up @@ -113,15 +113,12 @@ private static function forceString($arg): string
*/
private function toUtf8($xml)
{
$pattern = '/encoding="(.*?)"/';
$result = preg_match($pattern, $xml, $matches);
$charset = strtoupper($result ? $matches[1] : 'UTF-8');
$charset = $this->findCharSet($xml);

if ($charset !== 'UTF-8') {
$xml = self::forceString(mb_convert_encoding($xml, 'UTF-8', $charset));

$result = preg_match($pattern, $xml, $matches);
$charset = strtoupper($result ? $matches[1] : 'UTF-8');
$charset = $this->findCharSet($xml);
if ($charset !== 'UTF-8') {
throw new Reader\Exception('Suspicious Double-encoded XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
}
Expand Down Expand Up @@ -169,4 +166,20 @@ public function scanFile($filestream)
{
return $this->scan(file_get_contents($filestream));
}

private function findCharSet(string $xml): string
{
$patterns = [
'/encoding="([^"]*]?)"/',
"/encoding='([^']*?)'/",
];

foreach ($patterns as $pattern) {
if (preg_match($pattern, $xml, $matches)) {
return strtoupper($matches[1]);
}
}

return 'UTF-8';
}
}
2 changes: 2 additions & 0 deletions tests/data/Reader/Xml/XEETestInvalidUTF-7-single-quote.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?xml version="1.0" encoding='UTF-7' standalone="yes"?>
+ADw-+ACE-DOCTYPE+ACA-foo+ACA-+AFs-+ADw-+ACE-ENTITY+ACA-toreplace+ACA-+ACI-xxe+AF8-test+ACI-+AD4-+ACA-+AF0-+AD4-+AAo-+ADw-sst+ACA-xmlns+AD0-+ACI-http://schemas.openxmlformats.org/spreadsheetml/2006/main+ACI-+ACA-count+AD0-+ACI-2+ACI-+ACA-uniqueCount+AD0-+ACI-1+ACI-+AD4-+ADw-si+AD4-+ADw-t+AD4-+ACY-toreplace+ADs-+ADw-/t+AD4-+ADw-/si+AD4-+ADw-/sst+AD4-
4 changes: 4 additions & 0 deletions tests/data/Reader/Xml/XEETestValidUTF-8-single-quote.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<?xml version='1.0' encoding='UTF-8' standalone='yes'?>
<root>
test: Valid
</root>

0 comments on commit 090e16f

Please sign in to comment.