Skip to content

Commit

Permalink
Helper\Html support UTF-8 HTML input
Browse files Browse the repository at this point in the history
Assume UTF-8 encoding. Not assuming UTF-8 would mangle text such as "русский"

Fixes #444
  • Loading branch information
PowerKiKi committed May 20, 2018
1 parent 3863826 commit 1489093
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
### Fixed

- Subtotal 9 in a group that has other subtotals 9 exclude the totals of the other subtotals in the range - [#332](https://github.com/PHPOffice/PhpSpreadsheet/issues/332)
- `Helper\Html` support UTF-8 HTML input - [#444](https://github.com/PHPOffice/PhpSpreadsheet/issues/444)

## [1.2.1] - 2018-04-10

Expand Down
11 changes: 9 additions & 2 deletions src/PhpSpreadsheet/Helper/Html.php
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,13 @@ protected function initialise()
$this->stringData = '';
}

/**
* Parse HTML formatting and return the resulting RichText.
*
* @param string $html
*
* @return RichText
*/
public function toRichTextObject($html)
{
$this->initialise();
Expand All @@ -611,8 +618,8 @@ public function toRichTextObject($html)
$dom = new DOMDocument();
// Load the HTML file into the DOM object
// Note the use of error suppression, because typically this will be an html fragment, so not fully valid markup
@$dom->loadHTML($html);

$prefix = '<?xml encoding="UTF-8">';
@$dom->loadHTML($prefix . $html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
// Discard excess white space
$dom->preserveWhiteSpace = false;

Expand Down
33 changes: 33 additions & 0 deletions tests/PhpSpreadsheetTests/Helper/HtmlTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?php

namespace PhpOffice\PhpSpreadsheetTests\Helper;

use PhpOffice\PhpSpreadsheet\Helper\Html;
use PHPUnit\Framework\TestCase;

class HtmlTest extends TestCase
{
/**
* @dataProvider providerUtf8EncodingSupport
*
* @param mixed $expected
* @param mixed $input
*/
public function testUtf8EncodingSupport($expected, $input)
{
$html = new Html();
$actual = $html->toRichTextObject($input);

self::assertSame($expected, $actual->getPlainText());
}

public function providerUtf8EncodingSupport()
{
return [
['foo', 'foo'],
['können', 'können'],
['русский', 'русский'],
["foo\nbar", '<p>foo</p><p>bar</p>'],
];
}
}

0 comments on commit 1489093

Please sign in to comment.