diff --git a/.github/workflows/coding-standards.yml b/.github/workflows/coding-standards.yml
index 5d8714c4..589bb9cc 100644
--- a/.github/workflows/coding-standards.yml
+++ b/.github/workflows/coding-standards.yml
@@ -3,10 +3,12 @@ name: "CS"
on:
pull_request:
branches:
- - master
+ - "master"
+ - "2.x"
push:
branches:
- - master
+ - "master"
+ - "2.x"
jobs:
coding-standards:
@@ -16,11 +18,11 @@ jobs:
strategy:
matrix:
php:
- - "7.3"
+ - "7.4"
steps:
- name: "Checkout"
- uses: "actions/checkout@v2"
+ uses: "actions/checkout@v4"
- name: "Install PHP"
uses: "shivammathur/setup-php@v2"
@@ -33,7 +35,7 @@ jobs:
COMPOSER_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: "Install dependencies with Composer"
- uses: "ramsey/composer-install@v1"
+ uses: "ramsey/composer-install@v2"
- name: "Run PHP CS Fixer"
run: "php vendor/bin/php-cs-fixer fix --verbose --dry-run --format=checkstyle | cs2pr"
diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml
index a59ecd7c..7d08d8d3 100644
--- a/.github/workflows/continuous-integration.yml
+++ b/.github/workflows/continuous-integration.yml
@@ -4,9 +4,11 @@ on:
pull_request:
branches:
- "master"
+ - "2.x"
push:
branches:
- "master"
+ - "2.x"
env:
fail-fast: true
@@ -24,10 +26,13 @@ jobs:
- "7.3"
- "7.4"
- "8.0"
+ - "8.1"
+ - "8.2"
+ - "8.3"
steps:
- name: "Checkout"
- uses: "actions/checkout@v2"
+ uses: "actions/checkout@v4"
with:
fetch-depth: 2
@@ -43,7 +48,7 @@ jobs:
COMPOSER_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: "Install dependencies with Composer"
- uses: "ramsey/composer-install@v1"
+ uses: "ramsey/composer-install@v2"
- name: "Setup logs"
run: "mkdir -p build/logs"
@@ -62,7 +67,7 @@ jobs:
steps:
- name: "Checkout"
- uses: "actions/checkout@v2"
+ uses: "actions/checkout@v4"
with:
fetch-depth: 2
@@ -78,7 +83,7 @@ jobs:
COMPOSER_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: "Install dependencies with Composer"
- uses: "ramsey/composer-install@v1"
+ uses: "ramsey/composer-install@v2"
- name: "Setup logs"
run: "mkdir -p build/logs"
@@ -108,7 +113,7 @@ jobs:
steps:
- name: "Checkout"
- uses: "actions/checkout@v2"
+ uses: "actions/checkout@v4"
with:
fetch-depth: 2
@@ -124,7 +129,7 @@ jobs:
COMPOSER_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: "Install dependencies with Composer"
- uses: "ramsey/composer-install@v1"
+ uses: "ramsey/composer-install@v2"
with:
dependency-versions: "lowest"
@@ -145,7 +150,7 @@ jobs:
steps:
- name: "Checkout"
- uses: "actions/checkout@v2"
+ uses: "actions/checkout@v4"
with:
fetch-depth: 2
@@ -161,7 +166,7 @@ jobs:
COMPOSER_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: "Install dependencies with Composer"
- uses: "ramsey/composer-install@v1"
+ uses: "ramsey/composer-install@v2"
- name: "Setup adapter: Guzzle 5"
run: |
@@ -185,7 +190,7 @@ jobs:
steps:
- name: "Checkout"
- uses: "actions/checkout@v2"
+ uses: "actions/checkout@v4"
with:
fetch-depth: 2
@@ -201,7 +206,7 @@ jobs:
COMPOSER_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: "Install dependencies with Composer"
- uses: "ramsey/composer-install@v1"
+ uses: "ramsey/composer-install@v2"
- name: "Setup adapter: Guzzle 7"
run: |
@@ -225,7 +230,7 @@ jobs:
steps:
- name: "Checkout"
- uses: "actions/checkout@v2"
+ uses: "actions/checkout@v4"
with:
fetch-depth: 2
@@ -241,13 +246,12 @@ jobs:
COMPOSER_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: "Install dependencies with Composer"
- uses: "ramsey/composer-install@v1"
+ uses: "ramsey/composer-install@v2"
- name: "Setup adapter: cURL"
run: |
composer remove php-http/guzzle6-adapter --dev -n
composer require php-http/curl-client --dev -n
- composer require zendframework/zend-diactoros --dev -n
- name: "Setup logs"
run: "mkdir -p build/logs"
@@ -266,7 +270,7 @@ jobs:
steps:
- name: "Checkout"
- uses: "actions/checkout@v2"
+ uses: "actions/checkout@v4"
with:
fetch-depth: 2
@@ -282,7 +286,7 @@ jobs:
COMPOSER_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: "Install dependencies with Composer"
- uses: "ramsey/composer-install@v1"
+ uses: "ramsey/composer-install@v2"
- name: "Setup logs"
run: "mkdir -p build/logs"
diff --git a/composer.json b/composer.json
index bfa5f0ad..7c61e743 100644
--- a/composer.json
+++ b/composer.json
@@ -19,18 +19,18 @@
"ext-curl": "*",
"ext-tidy": "*",
"fossar/htmlawed": "^1.2.7",
- "http-interop/http-factory-guzzle": "^1.0",
- "j0k3r/graby-site-config": "^1.0.110",
+ "http-interop/http-factory-guzzle": "^1.1",
+ "j0k3r/graby-site-config": "^1.0.181",
"j0k3r/httplug-ssrf-plugin": "^2.0",
- "j0k3r/php-readability": "^1.2.3",
+ "j0k3r/php-readability": "^1.2.10",
"monolog/monolog": "^1.18.0|^2.0",
- "php-http/client-common": "^2.3",
- "php-http/discovery": "^1.12",
- "php-http/httplug": "^2.2",
- "php-http/message": "^1.9",
- "simplepie/simplepie": "^1.5",
- "smalot/pdfparser": "^1.0",
- "symfony/options-resolver": "^3.4|^4.4|^5.3|^6.0",
+ "php-http/client-common": "^2.7",
+ "php-http/discovery": "^1.19",
+ "php-http/httplug": "^2.4",
+ "php-http/message": "^1.14",
+ "simplepie/simplepie": "^1.7",
+ "smalot/pdfparser": "^1.1",
+ "symfony/options-resolver": "^3.4|^4.4|^5.3|^6.0|^7.0",
"true/punycode": "^2.1",
"guzzlehttp/psr7": "^1.5.0|^2.0"
},
@@ -43,7 +43,7 @@
"phpstan/phpstan": "^0.12",
"phpstan/phpstan-deprecation-rules": "^0.12",
"phpstan/phpstan-phpunit": "^0.12",
- "symfony/phpunit-bridge": "^5.3"
+ "symfony/phpunit-bridge": "^6.4.1"
},
"extra": {
"branch-alias": {
@@ -61,6 +61,10 @@
}
},
"config": {
- "sort-packages": true
+ "sort-packages": true,
+ "allow-plugins": {
+ "php-http/discovery": true,
+ "phpstan/extension-installer": true
+ }
}
}
diff --git a/phpunit.xml b/phpunit.xml
index f3063b5a..7b33c79f 100644
--- a/phpunit.xml
+++ b/phpunit.xml
@@ -38,6 +38,10 @@
+
+
+
+
diff --git a/src/Extractor/ContentExtractor.php b/src/Extractor/ContentExtractor.php
index 5f09582e..7875d38f 100644
--- a/src/Extractor/ContentExtractor.php
+++ b/src/Extractor/ContentExtractor.php
@@ -21,14 +21,14 @@ class ContentExtractor
private $xpath;
private $html;
private $config;
- private $siteConfig = null;
- private $title = null;
- private $language = null;
+ private $siteConfig;
+ private $title;
+ private $language;
private $authors = [];
- private $body = null;
- private $image = null;
+ private $body;
+ private $image;
private $nativeAd = false;
- private $date = null;
+ private $date;
private $success = false;
private $nextPageUrl;
/** @var LoggerInterface */
diff --git a/src/Extractor/HttpClient.php b/src/Extractor/HttpClient.php
index e1f774a6..3f509d91 100644
--- a/src/Extractor/HttpClient.php
+++ b/src/Extractor/HttpClient.php
@@ -13,8 +13,8 @@
use Http\Client\Common\Plugin\RedirectPlugin;
use Http\Client\Common\PluginClient;
use Http\Client\Exception\TransferException;
-use Http\Client\HttpClient as Client;
use Http\Discovery\Psr17FactoryDiscovery;
+use Psr\Http\Client\ClientInterface;
use Psr\Http\Message\ResponseInterface;
use Psr\Log\LoggerInterface;
use Psr\Log\NullLogger;
@@ -42,11 +42,7 @@ class HttpClient
*/
private $responseHistory;
- /**
- * @param Client $client Http client
- * @param array $config
- */
- public function __construct(Client $client, $config = [], LoggerInterface $logger = null)
+ public function __construct(ClientInterface $client, $config = [], LoggerInterface $logger = null)
{
$resolver = new OptionsResolver();
$resolver->setDefaults([
@@ -254,7 +250,7 @@ public function fetch($url, $skipTypeVerification = false, $httpHeader = [])
// check for
// for AJAX sites, e.g. Blogger with its dynamic views templates.
// Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
- if (false === strpos($effectiveUrl, '_escaped_fragment_')) {
+ if (!str_contains($effectiveUrl, '_escaped_fragment_')) {
$redirectURL = $this->getMetaRefreshURL($effectiveUrl, $body) ?: $this->getUglyURL($effectiveUrl, $body);
if (false !== $redirectURL) {
@@ -296,7 +292,7 @@ private function cleanupUrl($url)
{
// rewrite part of urls to something more readable
foreach ($this->config['rewrite_url'] as $find => $action) {
- if (false !== strpos($url, $find) && \is_array($action)) {
+ if (str_contains($url, $find) && \is_array($action)) {
$url = strtr($url, $action);
}
}
diff --git a/src/Graby.php b/src/Graby.php
index 4b0e9200..13bd64f2 100644
--- a/src/Graby.php
+++ b/src/Graby.php
@@ -9,11 +9,11 @@
use GuzzleHttp\Psr7\Uri;
use GuzzleHttp\Psr7\UriResolver;
use Http\Client\Common\PluginClient;
-use Http\Client\HttpClient as Client;
-use Http\Discovery\HttpClientDiscovery;
+use Http\Discovery\Psr18ClientDiscovery;
use Http\Message\CookieJar;
use Monolog\Handler\StreamHandler;
use Monolog\Logger;
+use Psr\Http\Client\ClientInterface;
use Psr\Log\LoggerInterface;
use Psr\Log\NullLogger;
use Readability\Readability;
@@ -34,21 +34,17 @@ class Graby
private $config = [];
- private $httpClient = null;
- private $extractor = null;
+ private $httpClient;
+ private $extractor;
/** @var ConfigBuilder */
private $configBuilder;
private $punycode;
private $imgNoReferrer = false;
- private $prefetchedContent = null;
+ private $prefetchedContent;
- /**
- * @param array $config
- * @param Client|null $client Http client
- */
- public function __construct($config = [], Client $client = null, ConfigBuilder $configBuilder = null)
+ public function __construct($config = [], ClientInterface $client = null, ConfigBuilder $configBuilder = null)
{
$resolver = new OptionsResolver();
$resolver->setDefaults([
@@ -118,7 +114,7 @@ public function __construct($config = [], Client $client = null, ConfigBuilder $
);
$this->httpClient = new HttpClient(
- $client ?: new PluginClient(HttpClientDiscovery::find(), [new CookiePlugin(new CookieJar())]),
+ $client ?: new PluginClient(Psr18ClientDiscovery::find(), [new CookiePlugin(new CookieJar())]),
$this->config['http_client'],
$this->logger
);
@@ -150,8 +146,6 @@ public function reloadConfigFiles()
* Return a config.
*
* @param string $key
- *
- * @return mixed
*/
public function getConfig($key)
{
@@ -226,7 +220,7 @@ public function cleanupHtml($contentBlock, $url)
}
// footnotes
- if ('footnotes' === $this->config['content_links'] && false === strpos($url, 'wikipedia.org')) {
+ if ('footnotes' === $this->config['content_links'] && !str_contains($url, 'wikipedia.org')) {
$this->extractor->readability->addFootnotes($contentBlock);
}
@@ -343,13 +337,6 @@ private function doFetchContent($url)
$this->logger->debug('HTML after regex empty nodes stripping', ['html' => $html]);
- // some non utf8 enconding might be breaking after converting to utf8
- // when it happen the string (usually) starts with this character
- // in that case, we'll take the default response instead of the utf8 forced one
- if (0 === strpos(utf8_encode($response['body']), 'ÿþ')) {
- $html = $response['body'];
- }
-
// check site config for single page URL - fetch it if found
$isSinglePage = false;
if ($this->config['singlepage'] && null === $this->prefetchedContent && ($singlePageResponse = $this->getSinglePage($html, $effectiveUrl))) {
diff --git a/src/HttpClient/Plugin/CookiePlugin.php b/src/HttpClient/Plugin/CookiePlugin.php
index 7da4be90..a92aa9e7 100644
--- a/src/HttpClient/Plugin/CookiePlugin.php
+++ b/src/HttpClient/Plugin/CookiePlugin.php
@@ -31,9 +31,6 @@ public function __construct(CookieJar $cookieJar)
$this->cookieJar = $cookieJar;
}
- /**
- * {@inheritdoc}
- */
public function handleRequest(RequestInterface $request, callable $next, callable $first): Promise
{
$cookies = [];
diff --git a/src/SiteConfig/ConfigBuilder.php b/src/SiteConfig/ConfigBuilder.php
index 577103f4..ae527730 100644
--- a/src/SiteConfig/ConfigBuilder.php
+++ b/src/SiteConfig/ConfigBuilder.php
@@ -206,6 +206,7 @@ public function buildForHost($host, $addToCache = true)
* @return false|SiteConfig
*
* @deprecated Use either buildForHost() / buildFromUrl() for the merged config or loadSiteConfig() to get the config for a site
+ *
* @codeCoverageIgnore
*/
public function build($host, $exactHostMatch = false)
diff --git a/src/SiteConfig/SiteConfig.php b/src/SiteConfig/SiteConfig.php
index 65f63c23..4ec84dfe 100644
--- a/src/SiteConfig/SiteConfig.php
+++ b/src/SiteConfig/SiteConfig.php
@@ -54,7 +54,7 @@ class SiteConfig
*
* @var ?string
*/
- public $src_lazy_load_attr = null;
+ public $src_lazy_load_attr;
/**
* Strip elements which contain these strings (0 or more) in the id or class attribute.
@@ -89,7 +89,7 @@ class SiteConfig
*
* @var ?bool
*/
- public $tidy = null;
+ public $tidy;
/**
* Autodetect title/body if xpath expressions fail to produce results.
@@ -103,14 +103,14 @@ class SiteConfig
*
* @var ?bool
*/
- public $autodetect_on_failure = null;
+ public $autodetect_on_failure;
/**
* Clean up content block - attempt to remove elements that appear to be superfluous.
*
* @var ?bool
*/
- public $prune = null;
+ public $prune;
/**
* Test URL - if present, can be used to test the config above.
@@ -149,7 +149,7 @@ class SiteConfig
*
* @var ?string
*/
- public $parser = null;
+ public $parser;
/**
* Strings to search for in HTML before processing begins (used with $replace_string).
@@ -170,7 +170,7 @@ class SiteConfig
*
* @var ?string
*/
- public $cache_key = null;
+ public $cache_key;
/**
* If fetching the site's content requires to authentify.
diff --git a/tests/Extractor/ContentExtractorTest.php b/tests/Extractor/ContentExtractorTest.php
index fd4d8906..e64c885e 100644
--- a/tests/Extractor/ContentExtractorTest.php
+++ b/tests/Extractor/ContentExtractorTest.php
@@ -847,7 +847,7 @@ public function testLogMessage(): void
$this->assertSame('Trying {pattern} for language', $records[4]['message']);
$this->assertSame('Trying {pattern} for language', $records[5]['message']);
$this->assertSame('Using Readability', $records[6]['message']);
- $this->assertSame('Date is bad (strtotime failed): {date}', $records[7]['message']);
+ $this->assertSame('Date is bad (wrong year): {date}', $records[7]['message']);
$this->assertSame('Attempting to parse HTML with {parser}', $records[9]['message']);
}
diff --git a/tests/GrabyFunctionalTest.php b/tests/GrabyFunctionalTest.php
index 74357cef..b49e2edb 100644
--- a/tests/GrabyFunctionalTest.php
+++ b/tests/GrabyFunctionalTest.php
@@ -163,7 +163,6 @@ public function dataWithAccent(): array
return [
// ['http://pérotin.com/post/2015/08/31/Le-cadran-solaire-amoureux'],
['https://en.wikipedia.org/wiki/Café'],
- ['http://www.atterres.org/article/budget-2016-les-10-méprises-libérales-du-gouvernement'],
];
}
diff --git a/tests/GrabyTest.php b/tests/GrabyTest.php
index 1e3c6dfd..e8acbc94 100644
--- a/tests/GrabyTest.php
+++ b/tests/GrabyTest.php
@@ -413,6 +413,7 @@ public function dataForSinglePage(): array
/**
* @group dns-sensitive
+ *
* @dataProvider dataForSinglePage
*/
public function testSinglePage(string $url, string $expectedUrl, string $singlePageUrl): void
diff --git a/tests/SiteConfig/ConfigBuilderTest.php b/tests/SiteConfig/ConfigBuilderTest.php
index e34e9ef2..10c43557 100644
--- a/tests/SiteConfig/ConfigBuilderTest.php
+++ b/tests/SiteConfig/ConfigBuilderTest.php
@@ -181,7 +181,7 @@ public function dataForBuild(): array
/**
* @dataProvider dataForBuild
*/
- public function testBuildSiteConfig(string $host, bool $expectedRes, ?string $matchedHost = null): void
+ public function testBuildSiteConfig(string $host, bool $expectedRes, string $matchedHost = null): void
{
$configBuilder = new ConfigBuilder([
'site_config' => [__DIR__ . '/../fixtures/site_config'],
diff --git a/tests/fixtures/sites/blogger.test b/tests/fixtures/sites/blogger.test
index 141c0ff8..0c938931 100644
--- a/tests/fixtures/sites/blogger.test
+++ b/tests/fixtures/sites/blogger.test
@@ -3285,23 +3285,14 @@ flickering/performance issues. Note: put this last, else text glitches. -->