From 83ee1366910449c3913ac362d7ffac367fb301dc Mon Sep 17 00:00:00 2001 From: kkamara Date: Sat, 16 Mar 2024 21:33:19 +0000 Subject: [PATCH] Allow for multiple instances of web scraping sessions --- .env.example | 2 + app/Console/Commands/BrowserScrape.php | 11 ++--- app/Console/Commands/TestCrawler.php | 67 -------------------------- config/app.php | 2 + readme.md | 22 ++++----- 5 files changed, 19 insertions(+), 85 deletions(-) delete mode 100644 app/Console/Commands/TestCrawler.php diff --git a/.env.example b/.env.example index 40ae92e..42496d3 100644 --- a/.env.example +++ b/.env.example @@ -5,6 +5,8 @@ APP_DEBUG=true APP_TIMEZONE=UTC APP_URL=http://localhost +SELENIUM_GRID_PORT=4444 + APP_LOCALE=en APP_FALLBACK_LOCALE=en APP_FAKER_LOCALE=en_US diff --git a/app/Console/Commands/BrowserScrape.php b/app/Console/Commands/BrowserScrape.php index 55649b7..140983e 100644 --- a/app/Console/Commands/BrowserScrape.php +++ b/app/Console/Commands/BrowserScrape.php @@ -24,17 +24,14 @@ class BrowserScrape extends Command /** * @var Client */ - protected Client $client; + private Client $client; public function __construct() { parent::__construct(); - $this->client = Client::createChromeClient(null, [ - '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36', - '--window-size=1200,1100', - // '--headless', - // '--disable-gpu', - ]); + $this->client = Client::createSeleniumClient( + 'http://localhost:'.config('app.selenium_grid_port').'/wd/hub' + ); } /** diff --git a/app/Console/Commands/TestCrawler.php b/app/Console/Commands/TestCrawler.php deleted file mode 100644 index 526c5ce..0000000 --- a/app/Console/Commands/TestCrawler.php +++ /dev/null @@ -1,67 +0,0 @@ -client = Client::createChromeClient(null, [ - '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36', - '--window-size=1200,1100', - // '--headless', - // '--disable-gpu', - ]); - } - - /** - * @return void - */ - private function getInput() { - $words = $this->ask('>>'); - $this->info($words); - } - - /** - * Execute the console command. - * - * @return int - */ - public function handle() - { - $this->client - ->get('https://www.imdb.com/search/name/?birth_monthday=12-10'); - $crawler = $this->client->getCrawler(); - $preferences = $crawler->filterXPath('//button[@data-testid="accept-button"]'); - $preferences->click(); - $element = $crawler->filterXPath('//h3[text()="1. Kenneth Branagh"]'); - $element->click(); - $this->client->takeScreenshot($saveAs = 'screenshot.jpg'); - - - return 0; - } -} diff --git a/config/app.php b/config/app.php index f467267..2b2243c 100644 --- a/config/app.php +++ b/config/app.php @@ -15,6 +15,8 @@ 'name' => env('APP_NAME', 'Laravel'), + 'selenium_grid_port' => env('SELENIUM_GRID_PORT', 4444), + /* |-------------------------------------------------------------------------- | Application Environment diff --git a/readme.md b/readme.md index fb2fc04..7a19e25 100644 --- a/readme.md +++ b/readme.md @@ -45,6 +45,11 @@ cp .env.example .env composer install # install chromedriver for Panther client. vendor/bin/bdi detect drivers +``` + +#### Add ./drivers/ to your environment Path. + +```bash # Run composer install again. composer install php artisan key:generate @@ -55,19 +60,14 @@ npm install npm run build ``` -#### The following installation step may or may not be required. +#### Download Selenium Server jar file + +[Download Selenium Server jar file](https://www.selenium.dev/documentation/grid/getting_started/). -[Installing web drivers](https://symfony.com/doc/current/testing/end_to_end.html#installing-web-drivers). +Run the following in a new terminal. ```bash -# chromedriver_mac64 -# chromedriver_win32 -# See https://chromedriver.storage.googleapis.com -# for drivers list. -wget https://chromedriver.storage.googleapis.com/2.37/chromedriver_linux64.zip -unzip chromedriver_linux64.zip -sudo mv chromedriver /usr/bin/chromedriver -chromedriver --version +java -jar selenium-server-4.18.1.jar standalone ``` ## Usage @@ -91,7 +91,7 @@ export PANTHER_DEVTOOLS='' # enabled ## Adding a new command ```bash -php artisan make:crawler crawler_test +php artisan make:crawler TestCrawler ``` ## Misc