Skip to content

Commit

Permalink
DEP Use masterminds/html5 for HTMLValue
Browse files Browse the repository at this point in the history
  • Loading branch information
emteknetnz committed Jan 16, 2023
1 parent 6d45425 commit a42e31e
Show file tree
Hide file tree
Showing 7 changed files with 179 additions and 140 deletions.
3 changes: 0 additions & 3 deletions _config/html.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@
Name: corehtml
---
SilverStripe\Core\Injector\Injector:
SilverStripe\View\Parsers\HTMLValue:
class: SilverStripe\View\Parsers\HTML4Value
# Shorthand
HTMLValue: '%$SilverStripe\View\Parsers\HTMLValue'
SilverStripe\Forms\HTMLEditor\HTMLEditorConfig:
class: SilverStripe\Forms\HTMLEditor\TinyMCEConfig
SilverStripe\Forms\HTMLEditor\TinyMCEScriptGenerator: '%$SilverStripe\Forms\HTMLEditor\TinyMCECombinedGenerator'
Expand Down
1 change: 1 addition & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"embed/embed": "^4.4.7",
"league/csv": "^9.8.0",
"m1/env": "^2.2.0",
"masterminds/html5": "^2.7",
"monolog/monolog": "^3.2.0",
"nikic/php-parser": "^4.15.0",
"psr/container": "^2.0",
Expand Down
31 changes: 0 additions & 31 deletions src/View/Parsers/HTML4Value.php

This file was deleted.

32 changes: 26 additions & 6 deletions src/View/Parsers/HTMLValue.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use SilverStripe\Core\Convert;
use SilverStripe\View\ViewableData;
use Masterminds\HTML5;
use DOMNodeList;
use DOMXPath;
use DOMDocument;
Expand All @@ -12,14 +13,10 @@
* This class handles the converting of HTML fragments between a string and a DOMDocument based
* representation.
*
* It's designed to allow dependency injection to replace the standard HTML4 version with one that
* handles XHTML or HTML5 instead
*
* @mixin DOMDocument
*/
abstract class HTMLValue extends ViewableData
class HTMLValue extends ViewableData
{

public function __construct($fragment = null)
{
if ($fragment) {
Expand All @@ -28,7 +25,25 @@ public function __construct($fragment = null)
parent::__construct();
}

abstract public function setContent($fragment);
/**
* @param string $content
* @return bool
*/
public function setContent($content)
{
$content = preg_replace('#</?(html|head|body)[^>]*>#si', '', $content);
$html5 = new HTML5();
$document = $html5->loadHTML(
'<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head>' .
"<body>$content</body></html>"
);
if ($document) {
$this->setDocument($document);
return true;
}
$this->valid = false;
return false;
}

/**
* @return string
Expand Down Expand Up @@ -77,6 +92,11 @@ public function getContent()
// Possible alternative solution: http://stackoverflow.com/questions/2142120/php-encoding-with-domdocument
$from = mb_convert_encoding('&nbsp;', 'utf-8', 'html-entities');
$res = str_replace($from ?? '', '&nbsp;', $res ?? '');

// remove stray closing tags which are not required
foreach (['meta', 'br', 'hr', 'img'] as $tag) {
$res = str_replace("</$tag>", '', $res);
}

return $res;
}
Expand Down
4 changes: 2 additions & 2 deletions src/View/Parsers/ShortcodeParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -658,8 +658,8 @@ public function parse($content)
// use a proper DOM
list($content, $tags) = $this->replaceElementTagsWithMarkers($content);

/** @var HTMLValue $htmlvalue */
$htmlvalue = Injector::inst()->create('HTMLValue', $content);
/** @var HTMLValue $htmlvalue */
$htmlvalue = Injector::inst()->create(HTMLValue::class, $content);

// Now parse the result into a DOM
if (!$htmlvalue->isValid()) {
Expand Down
98 changes: 0 additions & 98 deletions tests/php/View/Parsers/HTML4ValueTest.php

This file was deleted.

150 changes: 150 additions & 0 deletions tests/php/View/Parsers/HTMLValueTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
<?php

namespace SilverStripe\View\Tests\Parsers;

use SilverStripe\Dev\SapphireTest;
use SilverStripe\View\Parsers\HTMLValue;
use SilverStripe\ORM\FieldType\DBHTMLText;
use SilverStripe\View\Parsers\ShortcodeParser;
use SilverStripe\Core\Convert;

class HTMLValueTest extends SapphireTest
{
public function testInvalidHTMLParsing()
{
$value = new HTMLValue();

$invalid = [
'<p>Enclosed Value</p><p>a' => '<p>Enclosed Value</p><p>a</p>',
'<meta content="text/html"></meta>' => '<meta content="text/html">',
'<p><div class="example"></div><p>' => '<p></p><div class="example"></div><p></p>'
];

foreach ($invalid as $input => $expected) {
$value->setContent($input);
$this->assertEquals($expected, $value->getContent(), 'Invalid HTML can be parsed');
}
}

public function testUtf8Saving()
{
$value = new HTMLValue();

$value->setContent('<p>ö ß ā い 家</p>');
$this->assertEquals('<p>ö ß ā い 家</p>', $value->getContent());
}

public function testWhitespaceHandling()
{
$value = new HTMLValue();

$value->setContent('<p></p> <p></p>');
$this->assertEquals('<p></p> <p></p>', $value->getContent());
}

public function testInvalidHTMLTagNames()
{
$value = new HTMLValue();

$invalid = [
'<p><div><a href="test-link"></p></div>',
'<html><div><a href="test-link"></a></a></html_>'
];

foreach ($invalid as $input) {
$value->setContent($input);

$this->assertEquals(
'test-link',
$value->getElementsByTagName('a')->item(0)->getAttribute('href'),
'Link data can be extraced from malformed HTML'
);
}
}

public function testMixedNewlines()
{
$value = new HTMLValue();

$value->setContent("<p>paragraph</p>\n<ul><li>1</li>\r\n</ul>");
$this->assertEquals(
"<p>paragraph</p>\n<ul><li>1</li>\n</ul>",
$value->getContent(),
'Newlines get converted'
);
}

public function testShortcodeValue()
{
ShortcodeParser::get('default')->register(
'test_shortcode',
function () {
return 'bit of test shortcode output';
}
);
$content = DBHTMLText::create('Test', ['shortcodes' => true])
->setValue('<p>Some content with a [test_shortcode] and a <br /> followed by an <hr> in it.</p>')
->forTemplate();
$this->assertStringContainsString(
// hr is flow content, not phrasing content, so must be corrected to be outside the p tag.
'<p>Some content with a bit of test shortcode output and a <br> followed by an </p><hr> in it.',
$content
);
}

public function testEntities()
{
$content = '<a href="http://domain.test/path?two&vars">ampersand &amp; test & link</a>';
$output = new HTMLValue($content);
$output = $output->getContent();
$this->assertEquals(
'<a href="http://domain.test/path?two&amp;vars">ampersand &amp; test &amp; link</a>',
$output
);
}

public function testShortcodeEntities()
{
ShortcodeParser::get('default')->register(
'sitetree_link_test',
// A mildly stubbed copy from SilverStripe\CMS\Model\SiteTree::link_shortcode_handler
function ($arguments, $content = null, $parser = null) {
$link = Convert::raw2att('https://google.com/search?q=unit&test');
if ($content) {
$link = sprintf('<a href="%s">%s</a>', $link, $parser->parse($content));
}
return $link;
}
);
$content = [
'[sitetree_link_test,id=2]' => 'https://google.com/search?q=unit&amp;test',
// the random [ triggers the shortcode parser, which seems to be where problems arise.
'<a href="https://google.com/search?q=unit&test"> [ non shortcode link</a>' =>
'<a href="https://google.com/search?q=unit&amp;test"> [ non shortcode link</a>',
'[sitetree_link_test,id=1]test link[/sitetree_link_test]' =>
'<a href="https://google.com/search?q=unit&amp;test">test link</a>'
];
foreach ($content as $input => $expected) {
$output = DBHTMLText::create('Test', ['shortcodes' => true])
->setValue($input)
->forTemplate();
$this->assertEquals($expected, $output);
}
}

public function testValidHTMLInNoscriptTags()
{
$value = new HTMLValue();

$noscripts = [
'<noscript><p>Enclosed Value</p></noscript>',
'<noscript><span class="test">Enclosed Value</span></noscript>',
'<noscript><img src="/test.jpg" alt="test"></noscript>',
];

foreach ($noscripts as $noscript) {
$value->setContent($noscript);
$this->assertEquals($noscript, $value->getContent(), 'Child tags are left untouched in noscript tags.');
}
}
}

0 comments on commit a42e31e

Please sign in to comment.