Skip to content

Commit

Permalink
🐛 Fix 502 bad gateway error #278
Browse files Browse the repository at this point in the history
Fix PHP Error/Warnning #278

Move http_header definition before condition #278

Move http_header array_merge before condition #278

Fix cs fixer & PHPStan error #278
  • Loading branch information
girishpanchal30 committed Jan 7, 2022
1 parent 026abfe commit 23aed1e
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 8 deletions.
8 changes: 4 additions & 4 deletions src/Extractor/ContentExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,8 @@ public function process(string $html, string $url, SiteConfig $siteConfig = null
$this->readability = $this->getReadability($html, $url, $parser, $this->siteConfig->tidy() && $smartTidy);
$tidied = $this->readability->tidied;

$this->logger->info('Body size after Readability: {length}', ['length' => \strlen((string) $this->readability->dom->saveXML())]);
$this->logger->debug('Body after Readability', ['dom_saveXML' => $this->readability->dom->saveXML()]);
$this->logger->info('Body size after Readability: {length}', ['length' => \strlen((string) $this->readability->dom->saveXML($this->readability->dom->documentElement))]);
$this->logger->debug('Body after Readability', ['dom_saveXML' => $this->readability->dom->saveXML($this->readability->dom->documentElement)]);

// we use xpath to find elements in the given HTML document
$this->xpath = new \DOMXPath($this->readability->dom);
Expand Down Expand Up @@ -373,11 +373,11 @@ public function process(string $html, string $url, SiteConfig $siteConfig = null

$this->removeElements($elems, 'Stripping {length} empty a elements');

$this->logger->debug('DOM after site config stripping', ['dom_saveXML' => $this->readability->dom->saveXML()]);
$this->logger->debug('DOM after site config stripping', ['dom_saveXML' => $this->readability->dom->saveXML($this->readability->dom->documentElement)]);

// try to get body
foreach ($this->siteConfig->body as $pattern) {
$this->logger->info('Trying {pattern} for body (content length: {content_length})', ['pattern' => $pattern, 'content_length' => \strlen((string) $this->readability->dom->saveXML())]);
$this->logger->info('Trying {pattern} for body (content length: {content_length})', ['pattern' => $pattern, 'content_length' => \strlen((string) $this->readability->dom->saveXML($this->readability->dom->documentElement))]);

$res = $this->extractBody(
true,
Expand Down
13 changes: 9 additions & 4 deletions src/SiteConfig/ConfigBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,15 @@ public function mergeConfig(SiteConfig $currentConfig, SiteConfig $newConfig)
// find_string: <other-img
// replace_string: <img
// To fix that issue, we combine find & replace as key & value in one array, we merge them and then rebuild find & replace string in the current config

// merge http_header array from currentConfig into newConfig
// because final values override former values in case of named keys
$currentConfig->http_header = array_merge($newConfig->http_header, $currentConfig->http_header);

if (\count($currentConfig->find_string) !== \count($currentConfig->replace_string)) {
return $currentConfig;
}

$findReplaceCurrentConfig = array_combine($currentConfig->find_string, $currentConfig->replace_string);
$findReplaceNewConfig = array_combine($newConfig->find_string, $newConfig->replace_string);
$findReplaceMerged = array_merge((array) $findReplaceCurrentConfig, (array) $findReplaceNewConfig);
Expand All @@ -340,10 +349,6 @@ public function mergeConfig(SiteConfig $currentConfig, SiteConfig $newConfig)
$currentConfig->replace_string[] = $replaceString;
}

// merge http_header array from currentConfig into newConfig
// because final values override former values in case of named keys
$currentConfig->http_header = array_merge($newConfig->http_header, $currentConfig->http_header);

return $currentConfig;
}

Expand Down

0 comments on commit 23aed1e

Please sign in to comment.