diff --git a/src/Extractor/ContentExtractor.php b/src/Extractor/ContentExtractor.php index 8b7c514f..8c750e4d 100644 --- a/src/Extractor/ContentExtractor.php +++ b/src/Extractor/ContentExtractor.php @@ -37,9 +37,7 @@ class ContentExtractor private $configBuilder; /** - * @param array $config - * @param LoggerInterface|null $logger - * @param ConfigBuilder|null $configBuilder + * @param array $config */ public function __construct($config = [], LoggerInterface $logger = null, ConfigBuilder $configBuilder = null) { @@ -730,8 +728,6 @@ protected function addAuthor($authorDirty) /** * Check if given node list exists and has length more than 0. * - * @param \DOMNodeList $elems - * * @return bool */ private function hasElements(\DOMNodeList $elems) @@ -742,8 +738,7 @@ private function hasElements(\DOMNodeList $elems) /** * Remove elements. * - * @param \DOMNodeList $elems - * @param string $logMessage + * @param string $logMessage */ private function removeElements(\DOMNodeList $elems, $logMessage = null) { @@ -1250,6 +1245,24 @@ private function extractOpenGraph(\DOMXPath $xpath) } } + /** + * Clean extract of JSON-LD authors. + */ + private function extractAuthorsFromJsonLdArray(array $authors) + { + if (isset($authors['name'])) { + return $authors['name']; + } + + return array_map(function ($author) { + if (isset($author['name']) && \is_string($author['name'])) { + return $author['name']; + } + + return false; + }, $authors); + } + /** * Extract data from JSON-LD information. * @@ -1308,8 +1321,8 @@ private function extractJsonLdInformation(\DOMXPath $xpath) $candidateNames[] = $data['name']; } - if (!empty($data['author']['name'])) { - $authors = $data['author']['name']; + if (!empty($data['author'])) { + $authors = \is_array($data['author']) ? $this->extractAuthorsFromJsonLdArray($data['author']) : $data['author']; if (false === \is_array($authors)) { $authors = [$authors]; diff --git a/src/Extractor/HttpClient.php b/src/Extractor/HttpClient.php index 18cd5052..9c67ca99 100644 --- a/src/Extractor/HttpClient.php +++ b/src/Extractor/HttpClient.php @@ -43,9 +43,8 @@ class HttpClient private $responseHistory; /** - * @param Client $client Http client - * @param array $config - * @param LoggerInterface|null $logger + * @param Client $client Http client + * @param array $config */ public function __construct(Client $client, $config = [], LoggerInterface $logger = null) { @@ -567,8 +566,6 @@ private function getUglyURL($url, $html) * Format all headers to avoid unecessary array level. * Also lower the header name. * - * @param ResponseInterface $response - * * @return array */ private function formatHeaders(ResponseInterface $response) diff --git a/src/Graby.php b/src/Graby.php index dd65b71e..a4ec171f 100644 --- a/src/Graby.php +++ b/src/Graby.php @@ -44,9 +44,8 @@ class Graby private $imgNoReferrer = false; /** - * @param array $config - * @param Client|null $client Http client - * @param ConfigBuilder|null $configBuilder + * @param array $config + * @param Client|null $client Http client */ public function __construct($config = [], Client $client = null, ConfigBuilder $configBuilder = null) { @@ -128,8 +127,6 @@ public function __construct($config = [], Client $client = null, ConfigBuilder $ /** * Redefine all loggers. - * - * @param LoggerInterface $logger */ public function setLogger(LoggerInterface $logger) { diff --git a/src/SiteConfig/ConfigBuilder.php b/src/SiteConfig/ConfigBuilder.php index d2f1d5d2..469202e8 100644 --- a/src/SiteConfig/ConfigBuilder.php +++ b/src/SiteConfig/ConfigBuilder.php @@ -16,8 +16,7 @@ class ConfigBuilder private $cache = []; /** - * @param array $config - * @param LoggerInterface|null $logger + * @param array $config */ public function __construct($config = [], LoggerInterface $logger = null) { @@ -336,8 +335,6 @@ public function mergeConfig(SiteConfig $currentConfig, SiteConfig $newConfig) /** * Parse line from the config file to build the config. * - * @param array $lines - * * @return SiteConfig */ public function parseLines(array $lines) diff --git a/tests/Extractor/ContentExtractorTest.php b/tests/Extractor/ContentExtractorTest.php index a45d1d6b..daf8a40c 100644 --- a/tests/Extractor/ContentExtractorTest.php +++ b/tests/Extractor/ContentExtractorTest.php @@ -946,6 +946,23 @@ public function testJsonLd() $this->assertContains('

hihi

', $content_block->ownerDocument->saveXML($content_block)); } + public function testJsonLdWithMultipleAuthors() + { + $contentExtractor = new ContentExtractor(self::$contentExtractorConfig); + + $res = $contentExtractor->process( + '', + 'https://nativead.io/jsonld' + ); + + $content_block = $contentExtractor->getContent(); + + $this->assertSame([ + 'Elisa Thevenet', + 'Humphrey Bogart', + ], $contentExtractor->getAuthors()); + } + public function testNoDefinedHtml() { $contentExtractor = new ContentExtractor(self::$contentExtractorConfig); diff --git a/tests/GrabyTest.php b/tests/GrabyTest.php index e7a29bcb..37d620bb 100644 --- a/tests/GrabyTest.php +++ b/tests/GrabyTest.php @@ -1192,7 +1192,7 @@ public function dataAuthors() [ 'https://www.liberation.fr/planete/2017/04/05/donald-trump-et-xi-jinping-tentative-de-flirt-en-floride_1560768', 'liberation-authors.html', - ['Raphaël Balenieri, correspondant à Pékin', 'Frédéric Autran, correspondant à New York'], + ['Raphaël Balenieri', 'Frédéric Autran'], ], ]; }