From 23aed1e7ca19d2eae8cb38c4c71703a2249b7265 Mon Sep 17 00:00:00 2001 From: girishkrishaweb Date: Fri, 7 Jan 2022 14:24:21 +0530 Subject: [PATCH] :bug: Fix 502 bad gateway error j0k3r#278 Fix PHP Error/Warnning j0k3r/graby#278 Move http_header definition before condition j0k3r/graby#278 Move http_header array_merge before condition j0k3r/graby#278 Fix cs fixer & PHPStan error j0k3r#278 --- src/Extractor/ContentExtractor.php | 8 ++++---- src/SiteConfig/ConfigBuilder.php | 13 +++++++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/Extractor/ContentExtractor.php b/src/Extractor/ContentExtractor.php index 6fb80a02..d98d9096 100644 --- a/src/Extractor/ContentExtractor.php +++ b/src/Extractor/ContentExtractor.php @@ -201,8 +201,8 @@ public function process(string $html, string $url, SiteConfig $siteConfig = null $this->readability = $this->getReadability($html, $url, $parser, $this->siteConfig->tidy() && $smartTidy); $tidied = $this->readability->tidied; - $this->logger->info('Body size after Readability: {length}', ['length' => \strlen((string) $this->readability->dom->saveXML())]); - $this->logger->debug('Body after Readability', ['dom_saveXML' => $this->readability->dom->saveXML()]); + $this->logger->info('Body size after Readability: {length}', ['length' => \strlen((string) $this->readability->dom->saveXML($this->readability->dom->documentElement))]); + $this->logger->debug('Body after Readability', ['dom_saveXML' => $this->readability->dom->saveXML($this->readability->dom->documentElement)]); // we use xpath to find elements in the given HTML document $this->xpath = new \DOMXPath($this->readability->dom); @@ -373,11 +373,11 @@ public function process(string $html, string $url, SiteConfig $siteConfig = null $this->removeElements($elems, 'Stripping {length} empty a elements'); - $this->logger->debug('DOM after site config stripping', ['dom_saveXML' => $this->readability->dom->saveXML()]); + $this->logger->debug('DOM after site config stripping', ['dom_saveXML' => $this->readability->dom->saveXML($this->readability->dom->documentElement)]); // try to get body foreach ($this->siteConfig->body as $pattern) { - $this->logger->info('Trying {pattern} for body (content length: {content_length})', ['pattern' => $pattern, 'content_length' => \strlen((string) $this->readability->dom->saveXML())]); + $this->logger->info('Trying {pattern} for body (content length: {content_length})', ['pattern' => $pattern, 'content_length' => \strlen((string) $this->readability->dom->saveXML($this->readability->dom->documentElement))]); $res = $this->extractBody( true, diff --git a/src/SiteConfig/ConfigBuilder.php b/src/SiteConfig/ConfigBuilder.php index 73e37311..43b1c11d 100644 --- a/src/SiteConfig/ConfigBuilder.php +++ b/src/SiteConfig/ConfigBuilder.php @@ -327,6 +327,15 @@ public function mergeConfig(SiteConfig $currentConfig, SiteConfig $newConfig) // find_string: http_header = array_merge($newConfig->http_header, $currentConfig->http_header); + + if (\count($currentConfig->find_string) !== \count($currentConfig->replace_string)) { + return $currentConfig; + } + $findReplaceCurrentConfig = array_combine($currentConfig->find_string, $currentConfig->replace_string); $findReplaceNewConfig = array_combine($newConfig->find_string, $newConfig->replace_string); $findReplaceMerged = array_merge((array) $findReplaceCurrentConfig, (array) $findReplaceNewConfig); @@ -340,10 +349,6 @@ public function mergeConfig(SiteConfig $currentConfig, SiteConfig $newConfig) $currentConfig->replace_string[] = $replaceString; } - // merge http_header array from currentConfig into newConfig - // because final values override former values in case of named keys - $currentConfig->http_header = array_merge($newConfig->http_header, $currentConfig->http_header); - return $currentConfig; }