Skip to content

Commit

Permalink
Merge pull request #201 from Kdecherf/referrerpolicy
Browse files Browse the repository at this point in the history
Add support of referrerpolicy for img tags
  • Loading branch information
j0k3r authored May 11, 2019
2 parents 55275bd + b92506b commit c7fcea0
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 0 deletions.
17 changes: 17 additions & 0 deletions src/Graby.php
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ class Graby
private $configBuilder;
private $punycode;

private $imgNoReferrer = false;

/**
* @param array $config
* @param Client|null $client Guzzle client
Expand Down Expand Up @@ -171,6 +173,13 @@ public function fetchContent($url)
return $infos;
}

public function toggleImgNoReferrer($toggle)
{
if (\is_bool($toggle)) {
$this->imgNoReferrer = $toggle;
}
}

/**
* Cleanup HTML from a DOMElement or a string.
*
Expand Down Expand Up @@ -228,6 +237,14 @@ public function cleanupHtml($contentBlock, $url)
$contentBlock = $contentBlock->firstChild;
}

// set or replace referrerpolicy to no-referrer in img tags
if ($this->imgNoReferrer) {
$imgTags = $contentBlock->getElementsByTagName('img');
foreach ($imgTags as $img) {
$img->setAttribute('referrerpolicy', 'no-referrer');
}
}

// convert content block to HTML string
// Need to preserve things like body: //img[@id='feature']
if (\in_array(strtolower($contentBlock->tagName), ['div', 'article', 'section', 'header', 'footer', 'li', 'td'], true)) {
Expand Down
50 changes: 50 additions & 0 deletions tests/GrabyTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -1707,6 +1707,56 @@ public function testIfPageContainsWithNextPageLink()
$this->assertSame(200, $res['status']);
}

public function testImgNoReferrer()
{
$response = $this->getMockBuilder('GuzzleHttp\Message\Response')
->disableOriginalConstructor()
->getMock();

$response->expects($this->any())
->method('getEffectiveUrl')
->willReturn('http://example.com');

$response->expects($this->any())
->method('getHeaders')
->willReturn(['Content-Type' => 'text/html']);

$response->expects($this->any())
->method('getStatusCode')
->willReturn(200);

$response->expects($this->any())
->method('getBody')
->willReturn('<html><body><h1>Hello world</h1><article><p><img src="http://example.com/hello.jpg"> ' . str_repeat('This is an awesome text with some links, here there are the awesome', 7) . '</p></article></body></html>');

$client = $this->getMockBuilder('GuzzleHttp\Client')
->disableOriginalConstructor()
->getMock();

$client->expects($this->any())
->method('get')
->willReturn($response);

$graby = new Graby([], $client);

$graby->toggleImgNoReferrer(true);
$res = $graby->fetchContent('example.com');

$doc = new \DomDocument();
$doc->loadXML($res['html']);

$this->assertTrue($doc->getElementsByTagName('img')->item(0)->hasAttribute('referrerpolicy'));
$this->assertSame('no-referrer', $doc->getElementsByTagName('img')->item(0)->getAttribute('referrerpolicy'));

$graby->toggleImgNoReferrer(false);
$res = $graby->fetchContent('example.com');

$doc = new \DomDocument();
$doc->loadXML($res['html']);

$this->assertFalse($doc->getElementsByTagName('img')->item(0)->hasAttribute('referrerpolicy'));
}

/**
* Return an instance of graby with a mocked Guzzle client returning data from a predefined file.
*/
Expand Down

0 comments on commit c7fcea0

Please sign in to comment.