Skip to content

Commit

Permalink
Merge pull request #218 from Simounet/feat/jsonld-multiple-authors
Browse files Browse the repository at this point in the history
JSON LD multiple authors
  • Loading branch information
j0k3r authored Nov 12, 2019
2 parents 590b71d + 16a82d8 commit c27bcc8
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 24 deletions.
31 changes: 22 additions & 9 deletions src/Extractor/ContentExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,7 @@ class ContentExtractor
private $configBuilder;

/**
* @param array $config
* @param LoggerInterface|null $logger
* @param ConfigBuilder|null $configBuilder
* @param array $config
*/
public function __construct($config = [], LoggerInterface $logger = null, ConfigBuilder $configBuilder = null)
{
Expand Down Expand Up @@ -730,8 +728,6 @@ protected function addAuthor($authorDirty)
/**
* Check if given node list exists and has length more than 0.
*
* @param \DOMNodeList $elems
*
* @return bool
*/
private function hasElements(\DOMNodeList $elems)
Expand All @@ -742,8 +738,7 @@ private function hasElements(\DOMNodeList $elems)
/**
* Remove elements.
*
* @param \DOMNodeList $elems
* @param string $logMessage
* @param string $logMessage
*/
private function removeElements(\DOMNodeList $elems, $logMessage = null)
{
Expand Down Expand Up @@ -1250,6 +1245,24 @@ private function extractOpenGraph(\DOMXPath $xpath)
}
}

/**
* Clean extract of JSON-LD authors.
*/
private function extractAuthorsFromJsonLdArray(array $authors)
{
if (isset($authors['name'])) {
return $authors['name'];
}

return array_map(function ($author) {
if (isset($author['name']) && \is_string($author['name'])) {
return $author['name'];
}

return false;
}, $authors);
}

/**
* Extract data from JSON-LD information.
*
Expand Down Expand Up @@ -1308,8 +1321,8 @@ private function extractJsonLdInformation(\DOMXPath $xpath)
$candidateNames[] = $data['name'];
}

if (!empty($data['author']['name'])) {
$authors = $data['author']['name'];
if (!empty($data['author'])) {
$authors = \is_array($data['author']) ? $this->extractAuthorsFromJsonLdArray($data['author']) : $data['author'];

if (false === \is_array($authors)) {
$authors = [$authors];
Expand Down
7 changes: 2 additions & 5 deletions src/Extractor/HttpClient.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,8 @@ class HttpClient
private $responseHistory;

/**
* @param Client $client Http client
* @param array $config
* @param LoggerInterface|null $logger
* @param Client $client Http client
* @param array $config
*/
public function __construct(Client $client, $config = [], LoggerInterface $logger = null)
{
Expand Down Expand Up @@ -567,8 +566,6 @@ private function getUglyURL($url, $html)
* Format all headers to avoid unecessary array level.
* Also lower the header name.
*
* @param ResponseInterface $response
*
* @return array
*/
private function formatHeaders(ResponseInterface $response)
Expand Down
7 changes: 2 additions & 5 deletions src/Graby.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,8 @@ class Graby
private $imgNoReferrer = false;

/**
* @param array $config
* @param Client|null $client Http client
* @param ConfigBuilder|null $configBuilder
* @param array $config
* @param Client|null $client Http client
*/
public function __construct($config = [], Client $client = null, ConfigBuilder $configBuilder = null)
{
Expand Down Expand Up @@ -128,8 +127,6 @@ public function __construct($config = [], Client $client = null, ConfigBuilder $

/**
* Redefine all loggers.
*
* @param LoggerInterface $logger
*/
public function setLogger(LoggerInterface $logger)
{
Expand Down
5 changes: 1 addition & 4 deletions src/SiteConfig/ConfigBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ class ConfigBuilder
private $cache = [];

/**
* @param array $config
* @param LoggerInterface|null $logger
* @param array $config
*/
public function __construct($config = [], LoggerInterface $logger = null)
{
Expand Down Expand Up @@ -336,8 +335,6 @@ public function mergeConfig(SiteConfig $currentConfig, SiteConfig $newConfig)
/**
* Parse line from the config file to build the config.
*
* @param array $lines
*
* @return SiteConfig
*/
public function parseLines(array $lines)
Expand Down
17 changes: 17 additions & 0 deletions tests/Extractor/ContentExtractorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -946,6 +946,23 @@ public function testJsonLd()
$this->assertContains('<p>hihi</p>', $content_block->ownerDocument->saveXML($content_block));
}

public function testJsonLdWithMultipleAuthors()
{
$contentExtractor = new ContentExtractor(self::$contentExtractorConfig);

$res = $contentExtractor->process(
'<script type="application/ld+json">{"@context":"https://schema.org","@type":"NewsArticle","author":[{"@type":"Person","name":"Elisa Thevenet"},{"@type":"Person","name":"Humphrey Bogart"}]}</script>',
'https://nativead.io/jsonld'
);

$content_block = $contentExtractor->getContent();

$this->assertSame([
'Elisa Thevenet',
'Humphrey Bogart',
], $contentExtractor->getAuthors());
}

public function testNoDefinedHtml()
{
$contentExtractor = new ContentExtractor(self::$contentExtractorConfig);
Expand Down
2 changes: 1 addition & 1 deletion tests/GrabyTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -1192,7 +1192,7 @@ public function dataAuthors()
[
'https://www.liberation.fr/planete/2017/04/05/donald-trump-et-xi-jinping-tentative-de-flirt-en-floride_1560768',
'liberation-authors.html',
['Raphaël Balenieri, correspondant à Pékin', 'Frédéric Autran, correspondant à New York'],
['Raphaël Balenieri', 'Frédéric Autran'],
],
];
}
Expand Down

0 comments on commit c27bcc8

Please sign in to comment.