From 7e2d88f88ed14e574d93874912de6a55fbc74416 Mon Sep 17 00:00:00 2001 From: Oliver Cullimore Date: Sun, 29 Oct 2023 19:48:01 +0000 Subject: [PATCH 1/2] fix: Home Assistant custom component fix for Selenium based councils --- poetry.lock | 105 +++++++++++++++++- pyproject.toml | 1 + uk_bin_collection/uk_bin_collection/common.py | 26 ++++- .../councils/BroxtoweBoroughCouncil.py | 14 +-- .../councils/BuckinghamshireCouncil.py | 16 +-- .../DerbyshireDalesDistrictCouncil.py | 14 +-- .../councils/EastLindseyDistrictCouncil.py | 11 +- .../councils/EastSuffolkCouncil.py | 18 +-- .../councils/GatesheadCouncil.py | 11 +- .../councils/HighPeakCouncil.py | 14 +-- .../councils/NeathPortTalbotCouncil.py | 11 +- .../councils/NorthNorfolkDistrictCouncil.py | 14 +-- .../councils/NorthumberlandCouncil.py | 18 +-- .../councils/PrestonCityCouncil.py | 19 +--- .../ReigateAndBansteadBoroughCouncil.py | 21 ++-- .../councils/RushcliffeBoroughCouncil.py | 14 +-- .../StaffordshireMoorlandsDistrictCouncil.py | 11 +- .../councils/WakefieldCityCouncil.py | 12 +- .../councils/WestLothianCouncil.py | 11 +- 19 files changed, 167 insertions(+), 194 deletions(-) diff --git a/poetry.lock b/poetry.lock index a00537fe58..05f7462081 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,9 +1,10 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry and should not be changed by hand. [[package]] name = "allure-pytest-bdd" version = "2.13.2" description = "Allure pytest-bdd integration" +category = "dev" optional = false python-versions = "*" files = [ @@ -20,6 +21,7 @@ pytest-bdd = ">=3.0.0" name = "allure-python-commons" version = "2.13.2" description = "Common module for integrate allure with python-based frameworks" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -35,6 +37,7 @@ pluggy = ">=0.4.0" name = "astroid" version = "3.0.0" description = "An abstract syntax tree for Python with inference support." +category = "dev" optional = false python-versions = ">=3.8.0" files = [ @@ -49,6 +52,7 @@ typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""} name = "attrs" version = "23.1.0" description = "Classes Without Boilerplate" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -67,6 +71,7 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pyte name = "beautifulsoup4" version = "4.12.2" description = "Screen-scraping library" +category = "main" optional = false python-versions = ">=3.6.0" files = [ @@ -85,6 +90,7 @@ lxml = ["lxml"] name = "black" version = "23.9.1" description = "The uncompromising code formatter." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -131,6 +137,7 @@ uvloop = ["uvloop (>=0.15.2)"] name = "bs4" version = "0.0.1" description = "Dummy package for Beautiful Soup" +category = "main" optional = false python-versions = "*" files = [ @@ -144,6 +151,7 @@ beautifulsoup4 = "*" name = "certifi" version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -155,6 +163,7 @@ files = [ name = "cffi" version = "1.16.0" description = "Foreign Function Interface for Python calling C code." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -219,6 +228,7 @@ pycparser = "*" name = "charset-normalizer" version = "3.3.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -318,6 +328,7 @@ files = [ name = "click" version = "8.1.7" description = "Composable command line interface toolkit" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -332,6 +343,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." +category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -343,6 +355,7 @@ files = [ name = "coverage" version = "7.3.2" description = "Code coverage measurement for Python" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -407,6 +420,7 @@ toml = ["tomli"] name = "dill" version = "0.3.7" description = "serialize all of Python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -421,6 +435,7 @@ graph = ["objgraph (>=1.7.2)"] name = "exceptiongroup" version = "1.1.3" description = "Backport of PEP 654 (exception groups)" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -435,6 +450,7 @@ test = ["pytest (>=6)"] name = "execnet" version = "2.0.2" description = "execnet: rapid multi-Python deployment" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -449,6 +465,7 @@ testing = ["hatch", "pre-commit", "pytest", "tox"] name = "flake8" version = "6.1.0" description = "the modular source code checker: pep8 pyflakes and co" +category = "dev" optional = false python-versions = ">=3.8.1" files = [ @@ -465,6 +482,7 @@ pyflakes = ">=3.1.0,<3.2.0" name = "h11" version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -476,6 +494,7 @@ files = [ name = "holidays" version = "0.34" description = "Generate and work with holidays in Python" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -490,6 +509,7 @@ python-dateutil = "*" name = "idna" version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -501,6 +521,7 @@ files = [ name = "iniconfig" version = "2.0.0" description = "brain-dead simple config-ini parsing" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -512,6 +533,7 @@ files = [ name = "isort" version = "5.12.0" description = "A Python utility / library to sort Python imports." +category = "dev" optional = false python-versions = ">=3.8.0" files = [ @@ -529,6 +551,7 @@ requirements-deprecated-finder = ["pip-api", "pipreqs"] name = "jsonschema" version = "4.19.1" description = "An implementation of JSON Schema validation for Python" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -550,6 +573,7 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- name = "jsonschema-specifications" version = "2023.7.1" description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -564,6 +588,7 @@ referencing = ">=0.28.0" name = "lxml" version = "4.9.3" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" files = [ @@ -671,6 +696,7 @@ source = ["Cython (>=0.29.35)"] name = "mako" version = "1.2.4" description = "A super-fast templating language that borrows the best ideas from the existing templating languages." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -690,6 +716,7 @@ testing = ["pytest"] name = "markupsafe" version = "2.1.3" description = "Safely add untrusted strings to HTML/XML markup." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -759,6 +786,7 @@ files = [ name = "mccabe" version = "0.7.0" description = "McCabe checker, plugin for flake8" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -770,6 +798,7 @@ files = [ name = "mypy-extensions" version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -781,6 +810,7 @@ files = [ name = "numpy" version = "1.25.2" description = "Fundamental package for array computing in Python" +category = "main" optional = false python-versions = ">=3.9" files = [ @@ -815,6 +845,7 @@ files = [ name = "numpy" version = "1.26.1" description = "Fundamental package for array computing in Python" +category = "main" optional = false python-versions = "<3.13,>=3.9" files = [ @@ -856,6 +887,7 @@ files = [ name = "outcome" version = "1.2.0" description = "Capture the outcome of Python function calls." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -870,6 +902,7 @@ attrs = ">=19.2.0" name = "packaging" version = "23.2" description = "Core utilities for Python packages" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -881,6 +914,7 @@ files = [ name = "pandas" version = "2.1.0" description = "Powerful data structures for data analysis, time series, and statistics" +category = "main" optional = false python-versions = ">=3.9" files = [ @@ -942,6 +976,7 @@ xml = ["lxml (>=4.8.0)"] name = "parse" version = "1.19.1" description = "parse() is the opposite of format()" +category = "dev" optional = false python-versions = "*" files = [ @@ -953,6 +988,7 @@ files = [ name = "parse-type" version = "0.6.2" description = "Simplifies to build parse types based on the parse module" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*" files = [ @@ -973,6 +1009,7 @@ testing = ["pytest (<5.0)", "pytest (>=5.0)", "pytest-html (>=1.19.0)"] name = "pathspec" version = "0.11.2" description = "Utility library for gitignore style pattern matching of file paths." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -984,6 +1021,7 @@ files = [ name = "platformdirs" version = "3.11.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -999,6 +1037,7 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-co name = "pluggy" version = "1.3.0" description = "plugin and hook calling mechanisms for python" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1014,6 +1053,7 @@ testing = ["pytest", "pytest-benchmark"] name = "psutil" version = "5.9.6" description = "Cross-platform lib for process and system monitoring in Python." +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -1042,6 +1082,7 @@ test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] name = "pycodestyle" version = "2.11.1" description = "Python style guide checker" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1053,6 +1094,7 @@ files = [ name = "pycparser" version = "2.21" description = "C parser in Python" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1064,6 +1106,7 @@ files = [ name = "pyflakes" version = "3.1.0" description = "passive checker of Python programs" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1075,6 +1118,7 @@ files = [ name = "pylint" version = "3.0.1" description = "python code static checker" +category = "dev" optional = false python-versions = ">=3.8.0" files = [ @@ -1087,8 +1131,8 @@ astroid = ">=3.0.0,<=3.1.0-dev0" colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} dill = [ {version = ">=0.2", markers = "python_version < \"3.11\""}, + {version = ">=0.3.6", markers = "python_version >= \"3.11\""}, {version = ">=0.3.7", markers = "python_version >= \"3.12\""}, - {version = ">=0.3.6", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, ] isort = ">=4.2.5,<6" mccabe = ">=0.6,<0.8" @@ -1104,6 +1148,7 @@ testutils = ["gitpython (>3)"] name = "pysocks" version = "1.7.1" description = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1116,6 +1161,7 @@ files = [ name = "pytest" version = "7.4.2" description = "pytest: simple powerful testing with Python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1138,6 +1184,7 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no name = "pytest-bdd" version = "7.0.0" description = "BDD for pytest" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1156,6 +1203,7 @@ typing-extensions = "*" name = "pytest-xdist" version = "3.3.1" description = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1177,6 +1225,7 @@ testing = ["filelock"] name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ @@ -1187,10 +1236,26 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "python-dotenv" +version = "1.0.0" +description = "Read key-value pairs from a .env file and set them as environment variables" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "python-dotenv-1.0.0.tar.gz", hash = "sha256:a8df96034aae6d2d50a4ebe8216326c61c3eb64836776504fcca410e5937a3ba"}, + {file = "python_dotenv-1.0.0-py3-none-any.whl", hash = "sha256:f5971a9226b701070a4bf2c38c89e5a3f0d64de8debda981d1db98583009122a"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + [[package]] name = "pytz" version = "2023.3.post1" description = "World timezone definitions, modern and historical" +category = "main" optional = false python-versions = "*" files = [ @@ -1202,6 +1267,7 @@ files = [ name = "referencing" version = "0.30.2" description = "JSON Referencing + Python" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1217,6 +1283,7 @@ rpds-py = ">=0.7.0" name = "requests" version = "2.31.0" description = "Python HTTP for Humans." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1238,6 +1305,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "rpds-py" version = "0.10.6" description = "Python bindings to Rust's persistent data structures (rpds)" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1346,6 +1414,7 @@ files = [ name = "selenium" version = "4.14.0" description = "" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1363,6 +1432,7 @@ urllib3 = {version = ">=1.26,<3", extras = ["socks"]} name = "setuptools" version = "68.2.2" description = "Easily download, build, install, upgrade, and uninstall Python packages" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1379,6 +1449,7 @@ testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jar name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -1390,6 +1461,7 @@ files = [ name = "sniffio" version = "1.3.0" description = "Sniff out which async library your code is running under" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1401,6 +1473,7 @@ files = [ name = "sortedcontainers" version = "2.4.0" description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" +category = "main" optional = false python-versions = "*" files = [ @@ -1412,6 +1485,7 @@ files = [ name = "soupsieve" version = "2.5" description = "A modern CSS selector implementation for Beautiful Soup." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1423,6 +1497,7 @@ files = [ name = "tomli" version = "2.0.1" description = "A lil' TOML parser" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1434,6 +1509,7 @@ files = [ name = "tomlkit" version = "0.12.1" description = "Style preserving TOML library" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1445,6 +1521,7 @@ files = [ name = "trio" version = "0.22.2" description = "A friendly Python library for async concurrency and I/O" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1465,6 +1542,7 @@ sortedcontainers = "*" name = "trio-websocket" version = "0.11.1" description = "WebSocket library for Trio" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1481,6 +1559,7 @@ wsproto = ">=0.14" name = "typing-extensions" version = "4.8.0" description = "Backported and Experimental Type Hints for Python 3.8+" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1492,6 +1571,7 @@ files = [ name = "tzdata" version = "2023.3" description = "Provider of IANA time zone data" +category = "main" optional = false python-versions = ">=2" files = [ @@ -1503,6 +1583,7 @@ files = [ name = "urllib3" version = "1.26.18" description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -1518,10 +1599,28 @@ brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotl secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] +[[package]] +name = "webdriver-manager" +version = "4.0.1" +description = "Library provides the way to automatically manage drivers for different browsers" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "webdriver_manager-4.0.1-py2.py3-none-any.whl", hash = "sha256:d7970052295bb9cda2c1a24cf0b872dd2c41ababcc78f7b6b8dc37a41e979a7e"}, + {file = "webdriver_manager-4.0.1.tar.gz", hash = "sha256:25ec177c6a2ce9c02fb8046f1b2732701a9418d6a977967bb065d840a3175d87"}, +] + +[package.dependencies] +packaging = "*" +python-dotenv = "*" +requests = "*" + [[package]] name = "wsproto" version = "1.2.0" description = "WebSockets state-machine based protocol implementation" +category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -1535,4 +1634,4 @@ h11 = ">=0.9.0,<1" [metadata] lock-version = "2.0" python-versions = ">=3.10" -content-hash = "1de4553dfbae845040b325af39836df984ba634280e32d6a20bc66b42fa59b4a" +content-hash = "f96b514762b5ab7e2f676f4533e7cd508320fb532dd94dc15b21a7367c483c15" diff --git a/pyproject.toml b/pyproject.toml index 86793a47bc..2f0de24e3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ requests = "*" selenium = "*" lxml = "*" urllib3 = "*" +webdriver-manager = "^4.0.1" [tool.commitizen] major_version_zero = true diff --git a/uk_bin_collection/uk_bin_collection/common.py b/uk_bin_collection/uk_bin_collection/common.py index 338da375d0..035bf46e40 100644 --- a/uk_bin_collection/uk_bin_collection/common.py +++ b/uk_bin_collection/uk_bin_collection/common.py @@ -1,13 +1,15 @@ import calendar +import holidays import json import os +import pandas as pd import re +import requests from datetime import datetime from enum import Enum - -import holidays -import pandas as pd -import requests +from selenium import webdriver +from selenium.webdriver.chrome.service import Service as ChromeService +from webdriver_manager.chrome import ChromeDriverManager date_format = "%d/%m/%Y" days_of_week = { @@ -217,3 +219,19 @@ def write_output_json(council: str, content: str): def validate_dates(bin_dates: dict) -> dict: raise NotImplementedError() # If a date is in December and the next is in January, increase the year + + +def create_webdriver() -> webdriver.Chrome: + """ + Create and return a headless Selenium webdriver + :rtype: webdriver.Chrome + """ + # Set up Selenium to run 'headless' + options = webdriver.ChromeOptions() + # options.add_argument("--headless") + options.add_argument("--no-sandbox") + options.add_argument("--disable-gpu") + options.add_argument("--disable-dev-shm-usage") + options.add_experimental_option("excludeSwitches", ["enable-logging"]) + # Return a Selenium webdriver + return webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options) diff --git a/uk_bin_collection/uk_bin_collection/councils/BroxtoweBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BroxtoweBoroughCouncil.py index f3e774ca2c..1614a100b0 100644 --- a/uk_bin_collection/uk_bin_collection/councils/BroxtoweBoroughCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/BroxtoweBoroughCouncil.py @@ -1,9 +1,9 @@ from bs4 import BeautifulSoup -from selenium import webdriver from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import Select from selenium.webdriver.support.wait import WebDriverWait -from selenium.webdriver.support import expected_conditions as EC + from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import \ AbstractGetBinDataClass @@ -27,16 +27,8 @@ def parse_data(self, page: str, **kwargs) -> dict: check_uprn(user_uprn) check_postcode(user_postcode) - # Set up Selenium to run 'headless' - options = webdriver.ChromeOptions() - options.add_argument("--headless") - options.add_argument("--no-sandbox") - options.add_argument("--disable-gpu") - options.add_argument("--disable-dev-shm-usage") - options.add_experimental_option("excludeSwitches", ["enable-logging"]) - # Create Selenium webdriver - driver = webdriver.Chrome(options=options) + driver = create_webdriver() driver.get(page) # Populate postcode field diff --git a/uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py index 1144480525..aa034adec4 100644 --- a/uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py @@ -1,11 +1,9 @@ -import time - import pandas as pd -from selenium import webdriver -from selenium.webdriver.chrome.options import Options +import time from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.ui import Select + from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import \ AbstractGetBinDataClass @@ -40,16 +38,8 @@ def parse_data(self, page: str, **kwargs) -> dict: user_postcode = kwargs.get("postcode") user_paon = kwargs.get("paon") - # Set up Selenium to run 'headless' - options = webdriver.ChromeOptions() - options.add_argument("--headless") - options.add_argument("--no-sandbox") - options.add_argument("--disable-gpu") - options.add_argument("--disable-dev-shm-usage") - options.add_experimental_option("excludeSwitches", ["enable-logging"]) - # Create Selenium webdriver - driver = webdriver.Chrome(options=options) + driver = create_webdriver() driver.get(page) # Enter postcode in text box and wait diff --git a/uk_bin_collection/uk_bin_collection/councils/DerbyshireDalesDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/DerbyshireDalesDistrictCouncil.py index f43f2fcb34..4fe73a18e6 100644 --- a/uk_bin_collection/uk_bin_collection/councils/DerbyshireDalesDistrictCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/DerbyshireDalesDistrictCouncil.py @@ -1,9 +1,9 @@ from bs4 import BeautifulSoup -from selenium import webdriver from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import Select from selenium.webdriver.support.wait import WebDriverWait -from selenium.webdriver.support import expected_conditions as EC + from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import \ AbstractGetBinDataClass @@ -27,16 +27,8 @@ def parse_data(self, page: str, **kwargs) -> dict: check_uprn(user_uprn) check_postcode(user_postcode) - # Set up Selenium to run 'headless' - options = webdriver.ChromeOptions() - options.add_argument("--headless") - options.add_argument("--no-sandbox") - options.add_argument("--disable-gpu") - options.add_argument("--disable-dev-shm-usage") - options.add_experimental_option("excludeSwitches", ["enable-logging"]) - # Create Selenium webdriver - driver = webdriver.Chrome(options=options) + driver = create_webdriver() driver.get(page) # Populate postcode field diff --git a/uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py index 34fc229d42..24e0cae5f7 100644 --- a/uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py @@ -1,5 +1,4 @@ from bs4 import BeautifulSoup -from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait @@ -24,16 +23,8 @@ def parse_data(self, page: str, **kwargs) -> dict: check_paon(user_paon) check_postcode(user_postcode) - # Set up Selenium to run 'headless' - options = webdriver.ChromeOptions() - options.add_argument("--headless") - options.add_argument("--no-sandbox") - options.add_argument("--disable-gpu") - options.add_argument("--disable-dev-shm-usage") - options.add_experimental_option("excludeSwitches", ["enable-logging"]) - # Create Selenium webdriver - driver = webdriver.Chrome(options=options) + driver = create_webdriver() driver.get("https://www.e-lindsey.gov.uk/article/6714/Your-Waste-Collection-Days") # Wait for the postcode field to appear then populate it diff --git a/uk_bin_collection/uk_bin_collection/councils/EastSuffolkCouncil.py b/uk_bin_collection/uk_bin_collection/councils/EastSuffolkCouncil.py index c3497ab7e7..cd7f61f1f2 100644 --- a/uk_bin_collection/uk_bin_collection/councils/EastSuffolkCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/EastSuffolkCouncil.py @@ -1,8 +1,8 @@ from bs4 import BeautifulSoup -from selenium import webdriver -from selenium.webdriver.support.ui import WebDriverWait, Select from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait, Select + from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import \ AbstractGetBinDataClass @@ -17,21 +17,13 @@ class CouncilClass(AbstractGetBinDataClass): """ def parse_data(self, page: str, **kwargs) -> dict: - # Set up Selenium to run 'headless' - options = webdriver.ChromeOptions() - options.add_argument("--headless") - options.add_argument("--no-sandbox") - options.add_argument("--disable-gpu") - options.add_argument("--disable-dev-shm-usage") - options.add_experimental_option("excludeSwitches", ["enable-logging"]) - user_uprn = kwargs.get("uprn") user_postcode = kwargs.get("postcode") check_uprn(user_uprn) check_postcode(user_postcode) # Create Selenium webdriver - driver = webdriver.Chrome(options=options) + driver = create_webdriver() driver.get("https://my.eastsuffolk.gov.uk/service/Bin_collection_dates_finder") # Wait for iframe to load and switch to it @@ -42,7 +34,7 @@ def parse_data(self, page: str, **kwargs) -> dict: EC.presence_of_element_located((By.ID, "alt_postcode_search")) ) # Enter postcode - postcode.send_keys(user_postcode) + postcode.send_keys(user_postcode.replace(" ", "")) # Wait for address selection dropdown to appear address = Select( @@ -74,8 +66,6 @@ def parse_data(self, page: str, **kwargs) -> dict: ) ) - - # Make a BS4 object soup = BeautifulSoup(data_table.get_attribute("innerHTML"), features="html.parser") diff --git a/uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py b/uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py index 167a0026f1..471274120c 100644 --- a/uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py @@ -1,5 +1,4 @@ from bs4 import BeautifulSoup -from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait @@ -24,16 +23,8 @@ def parse_data(self, page: str, **kwargs) -> dict: check_paon(user_paon) check_postcode(user_postcode) - # Set up Selenium to run 'headless' - options = webdriver.ChromeOptions() - options.add_argument("--headless") - options.add_argument("--no-sandbox") - options.add_argument("--disable-gpu") - options.add_argument("--disable-dev-shm-usage") - options.add_experimental_option("excludeSwitches", ["enable-logging"]) - # Create Selenium webdriver - driver = webdriver.Chrome(options=options) + driver = create_webdriver() driver.get("https://www.gateshead.gov.uk/article/3150/Bin-collection-day-checker") # Wait for the postcode field to appear then populate it diff --git a/uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py b/uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py index c783af1ecb..9ff3c9907a 100644 --- a/uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py @@ -1,12 +1,10 @@ -import time - from bs4 import BeautifulSoup -from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import Select from selenium.webdriver.support.wait import WebDriverWait + from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import \ AbstractGetBinDataClass @@ -62,16 +60,8 @@ def parse_data(self, page: str, **kwargs) -> dict: user_postcode = kwargs.get("postcode") user_paon = kwargs.get("paon") - # Set up Selenium to run 'headless' - options = webdriver.ChromeOptions() - options.add_argument("--headless") - options.add_argument("--no-sandbox") - options.add_argument("--disable-gpu") - options.add_argument("--disable-dev-shm-usage") - options.add_experimental_option("excludeSwitches", ["enable-logging"]) - # Create Selenium webdriver - driver = webdriver.Chrome(options=options) + driver = create_webdriver() driver.get(page) # Hide Cookies diff --git a/uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py b/uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py index 97725eb040..394cd69441 100644 --- a/uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py @@ -1,6 +1,5 @@ import time from bs4 import BeautifulSoup -from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import Select @@ -26,16 +25,8 @@ def parse_data(self, page: str, **kwargs) -> dict: check_uprn(user_uprn) check_postcode(user_postcode) - # Set up Selenium to run 'headless' - options = webdriver.ChromeOptions() - options.add_argument("--headless") - options.add_argument("--no-sandbox") - options.add_argument("--disable-gpu") - options.add_argument("--disable-dev-shm-usage") - options.add_experimental_option("excludeSwitches", ["enable-logging"]) - # Create Selenium webdriver - driver = webdriver.Chrome(options=options) + driver = create_webdriver() driver.get("https://www.npt.gov.uk/2195") # Accept cookies banner diff --git a/uk_bin_collection/uk_bin_collection/councils/NorthNorfolkDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/NorthNorfolkDistrictCouncil.py index 563428ac9d..a6dc199140 100644 --- a/uk_bin_collection/uk_bin_collection/councils/NorthNorfolkDistrictCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/NorthNorfolkDistrictCouncil.py @@ -1,9 +1,9 @@ from bs4 import BeautifulSoup -from selenium import webdriver from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import Select from selenium.webdriver.support.wait import WebDriverWait -from selenium.webdriver.support import expected_conditions as EC + from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import \ AbstractGetBinDataClass @@ -27,16 +27,8 @@ def parse_data(self, page: str, **kwargs) -> dict: check_paon(user_paon) check_postcode(user_postcode) - # Set up Selenium to run 'headless' - options = webdriver.ChromeOptions() - options.add_argument("--headless") - options.add_argument("--no-sandbox") - options.add_argument("--disable-gpu") - options.add_argument("--disable-dev-shm-usage") - options.add_experimental_option("excludeSwitches", ["enable-logging"]) - # Create Selenium webdriver - driver = webdriver.Chrome(options=options) + driver = create_webdriver() driver.get(page) # Populate postcode field diff --git a/uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py b/uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py index 7635acd6a1..69e36407f1 100644 --- a/uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py @@ -1,16 +1,12 @@ import time - -import requests from bs4 import BeautifulSoup -from selenium import webdriver -from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By -from selenium.webdriver.common.keys import Keys -from selenium.webdriver.support.ui import Select + from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import \ AbstractGetBinDataClass + # import the wonderful Beautiful Soup and the URL grabber @@ -39,16 +35,8 @@ def parse_data(self, page: str, **kwargs) -> dict: check_paon(user_paon) check_postcode(user_postcode) - # Set up Selenium to run 'headless' - options = webdriver.ChromeOptions() - options.add_argument("--headless") - options.add_argument("--no-sandbox") - options.add_argument("--disable-gpu") - options.add_argument("--disable-dev-shm-usage") - options.add_experimental_option("excludeSwitches", ["enable-logging"]) - # Create Selenium webdriver - driver = webdriver.Chrome(options=options) + driver = create_webdriver() driver.get(page) time.sleep(1) diff --git a/uk_bin_collection/uk_bin_collection/councils/PrestonCityCouncil.py b/uk_bin_collection/uk_bin_collection/councils/PrestonCityCouncil.py index 17480bdbdb..d1aae0f698 100644 --- a/uk_bin_collection/uk_bin_collection/councils/PrestonCityCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/PrestonCityCouncil.py @@ -1,13 +1,10 @@ -from datetime import datetime - from bs4 import BeautifulSoup -from selenium import webdriver -from selenium.webdriver.chrome.options import Options +from datetime import datetime from selenium.webdriver.common.by import By -from selenium.webdriver.common.keys import Keys +from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import Select from selenium.webdriver.support.wait import WebDriverWait -from selenium.webdriver.support import expected_conditions as EC + from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import \ AbstractGetBinDataClass @@ -33,16 +30,8 @@ def parse_data(self, page: str, **kwargs) -> dict: check_paon(user_paon) check_postcode(user_postcode) - # Set up Selenium to run 'headless' - options = webdriver.ChromeOptions() - options.add_argument("--headless") - options.add_argument("--no-sandbox") - options.add_argument("--disable-gpu") - options.add_argument("--disable-dev-shm-usage") - options.add_experimental_option("excludeSwitches", ["enable-logging"]) - # Create Selenium webdriver - driver = webdriver.Chrome(options=options) + driver = create_webdriver() driver.get(page) # If you bang in the house number (or property name) and postcode in the box it should find your property diff --git a/uk_bin_collection/uk_bin_collection/councils/ReigateAndBansteadBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/ReigateAndBansteadBoroughCouncil.py index 43a2756875..0aa3f70b02 100644 --- a/uk_bin_collection/uk_bin_collection/councils/ReigateAndBansteadBoroughCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/ReigateAndBansteadBoroughCouncil.py @@ -1,9 +1,8 @@ -import time from bs4 import BeautifulSoup -from selenium import webdriver -from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait + from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import \ AbstractGetBinDataClass @@ -18,28 +17,22 @@ class CouncilClass(AbstractGetBinDataClass): """ def parse_data(self, page: str, **kwargs) -> dict: - # Set up Selenium to run 'headless' - options = webdriver.ChromeOptions() - options.add_argument("--headless") - options.add_argument("--no-sandbox") - options.add_argument("--disable-gpu") - options.add_argument("--disable-dev-shm-usage") - options.add_experimental_option("excludeSwitches", ["enable-logging"]) - user_uprn = kwargs.get("uprn") check_uprn(user_uprn) # Pad UPRN with 0's at the start for any that aren't 12 chars user_uprn = user_uprn.zfill(12) # Create Selenium webdriver - driver = webdriver.Chrome(options=options) - driver.get(f"https://my.reigate-banstead.gov.uk/en/service/Bins_and_recycling___collections_calendar?uprn={user_uprn}") + driver = create_webdriver() + driver.get( + f"https://my.reigate-banstead.gov.uk/en/service/Bins_and_recycling___collections_calendar?uprn={user_uprn}") # Wait for iframe to load and switch to it WebDriverWait(driver, 30).until(EC.frame_to_be_available_and_switch_to_it((By.ID, 'fillform-frame-1'))) # Wait for form - WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'span[data-name="html2"] > div'))) + WebDriverWait(driver, 30).until( + EC.presence_of_element_located((By.CSS_SELECTOR, 'span[data-name="html2"] > div'))) # Make a BS4 object soup = BeautifulSoup(driver.page_source, features="html.parser") diff --git a/uk_bin_collection/uk_bin_collection/councils/RushcliffeBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/RushcliffeBoroughCouncil.py index cf2d0e0786..3144d13918 100644 --- a/uk_bin_collection/uk_bin_collection/councils/RushcliffeBoroughCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/RushcliffeBoroughCouncil.py @@ -1,9 +1,9 @@ from bs4 import BeautifulSoup -from selenium import webdriver from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import Select from selenium.webdriver.support.wait import WebDriverWait -from selenium.webdriver.support import expected_conditions as EC + from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import \ AbstractGetBinDataClass @@ -27,16 +27,8 @@ def parse_data(self, page: str, **kwargs) -> dict: check_uprn(user_uprn) check_postcode(user_postcode) - # Set up Selenium to run 'headless' - options = webdriver.ChromeOptions() - options.add_argument("--headless") - options.add_argument("--no-sandbox") - options.add_argument("--disable-gpu") - options.add_argument("--disable-dev-shm-usage") - options.add_experimental_option("excludeSwitches", ["enable-logging"]) - # Create Selenium webdriver - driver = webdriver.Chrome(options=options) + driver = create_webdriver() driver.get(page) # Populate postcode field diff --git a/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py index b507d70046..190bbc5988 100644 --- a/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py @@ -1,5 +1,4 @@ from bs4 import BeautifulSoup -from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import Select @@ -25,16 +24,8 @@ def parse_data(self, page: str, **kwargs) -> dict: check_uprn(user_uprn) check_postcode(user_postcode) - # Set up Selenium to run 'headless' - options = webdriver.ChromeOptions() - options.add_argument("--headless") - options.add_argument("--no-sandbox") - options.add_argument("--disable-gpu") - options.add_argument("--disable-dev-shm-usage") - options.add_experimental_option("excludeSwitches", ["enable-logging"]) - # Create Selenium webdriver - driver = webdriver.Chrome(options=options) + driver = create_webdriver() driver.get("https://www.staffsmoorlands.gov.uk/findyourbinday") # Close cookies banner diff --git a/uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py b/uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py index ee7df123bd..a020799137 100644 --- a/uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py @@ -1,5 +1,5 @@ from bs4 import BeautifulSoup -from selenium import webdriver + from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import \ AbstractGetBinDataClass @@ -14,16 +14,8 @@ class CouncilClass(AbstractGetBinDataClass): """ def parse_data(self, page: str, **kwargs) -> dict: - # Set up Selenium to run 'headless' - options = webdriver.ChromeOptions() - options.add_argument("--headless") - options.add_argument("--no-sandbox") - options.add_argument("--disable-gpu") - options.add_argument("--disable-dev-shm-usage") - options.add_experimental_option("excludeSwitches", ["enable-logging"]) - # Create Selenium webdriver - driver = webdriver.Chrome(options=options) + driver = create_webdriver() driver.get(kwargs.get("url")) # Make a BS4 object diff --git a/uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py b/uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py index 2860a15d5f..3d0eaf6822 100644 --- a/uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py @@ -1,5 +1,4 @@ from bs4 import BeautifulSoup -from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait @@ -24,16 +23,8 @@ def parse_data(self, page: str, **kwargs) -> dict: check_paon(user_paon) check_postcode(user_postcode) - # Set up Selenium to run 'headless' - options = webdriver.ChromeOptions() - options.add_argument("--headless") - options.add_argument("--no-sandbox") - options.add_argument("--disable-gpu") - options.add_argument("--disable-dev-shm-usage") - options.add_experimental_option("excludeSwitches", ["enable-logging"]) - # Create Selenium webdriver - driver = webdriver.Chrome(options=options) + driver = create_webdriver() driver.get("https://www.westlothian.gov.uk/article/31528/Bin-Collection-Calendar-Dates") # Close feedback banner From e0be9ddedafb1208d870f8d6184f61a9d54ab54e Mon Sep 17 00:00:00 2001 From: Oliver Cullimore Date: Sun, 29 Oct 2023 20:07:04 +0000 Subject: [PATCH 2/2] fix: Home Assistant custom component fix for Selenium based councils --- uk_bin_collection/uk_bin_collection/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uk_bin_collection/uk_bin_collection/common.py b/uk_bin_collection/uk_bin_collection/common.py index 035bf46e40..e8de827579 100644 --- a/uk_bin_collection/uk_bin_collection/common.py +++ b/uk_bin_collection/uk_bin_collection/common.py @@ -228,7 +228,7 @@ def create_webdriver() -> webdriver.Chrome: """ # Set up Selenium to run 'headless' options = webdriver.ChromeOptions() - # options.add_argument("--headless") + options.add_argument("--headless") options.add_argument("--no-sandbox") options.add_argument("--disable-gpu") options.add_argument("--disable-dev-shm-usage")