diff --git a/.github/workflows/behave.yml b/.github/workflows/behave.yml index 14a7916788..e073737f1b 100644 --- a/.github/workflows/behave.yml +++ b/.github/workflows/behave.yml @@ -24,6 +24,10 @@ jobs: matrix: python-version: ['3.10', 3.11] poetry-version: [1.5.1] + services: + selenium: + image: selenium/standalone-chrome:latest + options: --shm-size=2gb steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v4 diff --git a/README.md b/README.md index f9fc8da28f..f1740f8ed7 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,7 @@ Most scripts make use of [Beautiful Soup 4](https://pypi.org/project/beautifulso 1. Click `+ Add Integration` and search for and select `UK Bin Collection Data` then click `Download`. 1. Restart your Home Assistant. 1. In the Home Assistant UI go to `Settings` > `Devices & Services` click `+ Add Integration` and search for `UK Bin Collection Data`. +1. If your see a "URL of the remote Selenium web driver to use" field when setting up your council, you'll need to provide the URL to a web driver you've set up seperately such as [standalone-chrome](https://hub.docker.com/r/selenium/standalone-chrome). ### Install manually diff --git a/custom_components/uk_bin_collection/config_flow.py b/custom_components/uk_bin_collection/config_flow.py index bf82d00706..9d68c17a16 100644 --- a/custom_components/uk_bin_collection/config_flow.py +++ b/custom_components/uk_bin_collection/config_flow.py @@ -50,6 +50,10 @@ async def get_council_schema(self, council=str) -> vol.Schema: council_schema = council_schema.extend( {vol.Required("usrn", default=""): cv.string} ) + if "web_driver" in self.councils_data[council]: + council_schema = council_schema.extend( + {vol.Required("web_driver", default=""): cv.string} + ) return council_schema async def async_step_user(self, user_input=None): diff --git a/custom_components/uk_bin_collection/strings.json b/custom_components/uk_bin_collection/strings.json index e8c9a4a16d..398a69e244 100644 --- a/custom_components/uk_bin_collection/strings.json +++ b/custom_components/uk_bin_collection/strings.json @@ -18,6 +18,7 @@ "postcode": "Postcode of the address", "number": "House number of the address", "usrn": "USRN (Unique Street Reference Number)", + "web_driver": "URL of the remote Selenium web driver to use", "submit": "Submit" }, "description": "Please refer to your councils [wiki](https://github.com/robbrad/UKBinCollectionData/wiki/Councils) entry for details on what to enter" diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 9b503fed1c..c9df4c0e60 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -62,6 +62,7 @@ "skip_get_url": true, "uprn": "100031325997", "url": "https://www.broxtowe.gov.uk/", + "web_driver": "http://selenium:4444", "wiki_name": "Broxtowe Borough Council" }, "BuckinghamshireCouncil": { @@ -69,6 +70,7 @@ "postcode": "HP14 4LA", "skip_get_url": true, "url": "https://chiltern.gov.uk/collection-dates", + "web_driver": "http://selenium:4444", "wiki_name": "Buckinghamshire Council (Chiltern, South Bucks, Wycombe)", "wiki_note": "Pass the name of the street with the house number parameter, wrapped in double quotes" }, @@ -152,6 +154,7 @@ "skip_get_url": true, "uprn": "10070102161", "url": "https://www.derbyshiredales.gov.uk/", + "web_driver": "http://selenium:4444", "wiki_name": "Derbyshire Dales District Council" }, "DoncasterCouncil": { @@ -183,6 +186,7 @@ "postcode": "LN4 4SY", "skip_get_url": true, "url": "https://www.e-lindsey.gov.uk/", + "web_driver": "http://selenium:4444", "wiki_name": "East Lindsey District Council", "wiki_note": "Pass the house name/number in the house number parameter, wrapped in double quotes" }, @@ -197,6 +201,7 @@ "skip_get_url": true, "uprn": "10093544720", "url": "https://my.eastsuffolk.gov.uk/service/Bin_collection_dates_finder", + "web_driver": "http://selenium:4444", "wiki_name": "East Suffolk Council", "wiki_note": "To get the UPRN, you can use [FindMyAddress](https://www.findmyaddress.co.uk/search)" }, @@ -223,6 +228,7 @@ "postcode": "NE16 5LQ", "skip_get_url": true, "url": "https://www.gateshead.gov.uk/", + "web_driver": "http://selenium:4444", "wiki_name": "Gateshead Council", "wiki_note": "Pass the house name/number in the house number parameter, wrapped in double quotes" }, @@ -251,6 +257,7 @@ "postcode": "SK13 8BX", "skip_get_url": true, "url": "https://www.highpeak.gov.uk/findyourbinday", + "web_driver": "http://selenium:4444", "wiki_name": "High Peak Council", "wiki_note": "Pass the name of the street with the house number parameter, wrapped in double quotes" }, @@ -337,6 +344,7 @@ "skip_get_url": true, "uprn": "10023947752", "url": "https://www.npt.gov.uk", + "web_driver": "http://selenium:4444", "wiki_name": "Neath Port Talbot Council" }, "NewarkAndSherwoodDC": { @@ -382,6 +390,7 @@ "postcode": "NR25 6BH", "skip_get_url": true, "url": "https://www.north-norfolk.gov.uk/", + "web_driver": "http://selenium:4444", "wiki_name": "North Norfolk District Council", "wiki_note": "Pass the name of the street with the house number parameter, wrapped in double quotes" }, @@ -410,6 +419,7 @@ "postcode": "NE46 1UQ", "skip_get_url": true, "url": "https://www.northumberland.gov.uk/Waste/Bins/Bin-Calendars.aspx", + "web_driver": "http://selenium:4444", "wiki_name": "Northumberland Council" }, "PrestonCityCouncil": { @@ -417,12 +427,14 @@ "postcode": "PR1 2RL", "skip_get_url": true, "url": "https://selfservice.preston.gov.uk/service/Forms/FindMyNearest.aspx?Service=bins", + "web_driver": "http://selenium:4444", "wiki_name": "Preston City Council" }, "ReigateAndBansteadBoroughCouncil": { "skip_get_url": true, "uprn": "68134867", "url": "https://www.reigate-banstead.gov.uk/", + "web_driver": "http://selenium:4444", "wiki_name": "Reigate and Banstead Borough Council", "wiki_note": "To get the UPRN, you can use [FindMyAddress](https://www.findmyaddress.co.uk/search)" }, @@ -445,6 +457,7 @@ "skip_get_url": true, "uprn": "3040040994", "url": "https://www.rushcliffe.gov.uk/", + "web_driver": "http://selenium:4444", "wiki_name": "Rushcliffe Borough Council" }, "RushmoorCouncil": { @@ -521,6 +534,7 @@ "skip_get_url": true, "uprn": "100031863037", "url": "https://www.staffsmoorlands.gov.uk/", + "web_driver": "http://selenium:4444", "wiki_name": "Staffordshire Moorlands District Council", "wiki_note": "To get the UPRN, you can use [FindMyAddress](https://www.findmyaddress.co.uk/search)" }, @@ -578,6 +592,7 @@ "custom_component_show_url_field": true, "skip_get_url": true, "url": "https://www.wakefield.gov.uk/where-i-live/?uprn=63035490&a=115%20Elizabeth%20Drive%20Castleford%20WF10%203RR&usrn=41801243&e=445418&n=426091&p=WF10%203RR", + "web_driver": "http://selenium:4444", "wiki_command_url_override": "https://www.wakefield.gov.uk/where-i-live/?uprn=XXXXXXXXXXX&a=XXXXXXXXXXX&usrn=XXXXXXXXXXX&e=XXXXXXXXXXX&n=XXXXXXXXXXX&p=XXXXXXXXXXX", "wiki_name": "Wakefield City Council", "wiki_note": "Follow the instructions [here](https://www.wakefield.gov.uk/where-i-live/) until you get the page that includes a \"Bin Collections\" section then copy the URL and replace the URL in the command." @@ -614,6 +629,7 @@ "postcode": "EH52 5JE", "skip_get_url": true, "url": "https://www.westlothian.gov.uk/", + "web_driver": "http://selenium:4444", "wiki_name": "West Lothian Council", "wiki_note": "Pass the house name/number in the house number parameter, wrapped in double quotes" }, diff --git a/uk_bin_collection/tests/step_defs/test_validate_council.py b/uk_bin_collection/tests/step_defs/test_validate_council.py index 40657288e8..2d26fe6efc 100644 --- a/uk_bin_collection/tests/step_defs/test_validate_council.py +++ b/uk_bin_collection/tests/step_defs/test_validate_council.py @@ -50,6 +50,10 @@ def scrape_step(context, council): if "usrn" in context.metadata: usrn = context.metadata["usrn"] args.append(f"-us={usrn}") + # TODO we should somehow run this test with and without this argument passed + if "web_driver" in context.metadata: + web_driver = context.metadata["web_driver"] + args.append(f"-w={web_driver}") if "skip_get_url" in context.metadata: args.append(f"-s") diff --git a/uk_bin_collection/tests/test_common_functions.py b/uk_bin_collection/tests/test_common_functions.py index 0f34b7d60b..fc2b991624 100644 --- a/uk_bin_collection/tests/test_common_functions.py +++ b/uk_bin_collection/tests/test_common_functions.py @@ -166,15 +166,16 @@ def test_update_input_json(): url = "TEST_URL" postcode="TEST_POSTCODE" uprn="TEST_UPRN" + web_driver="TEST_WEBDRIVER" skip_get_url = True - update_input_json(council, url, postcode=postcode, uprn=uprn, skip_get_url=skip_get_url) + update_input_json(council, url, postcode=postcode, uprn=uprn, web_driver=web_driver, skip_get_url=skip_get_url) cwd = os.getcwd() input_file_path = os.path.join(cwd, "uk_bin_collection", "tests", "input.json") result1 = os.path.exists(input_file_path) with open(input_file_path, 'r') as f: data = json.load(f) assert result1 == True - assert data[council] == {'postcode': 'TEST_POSTCODE', 'skip_get_url': True, 'uprn': 'TEST_UPRN', 'url': 'TEST_URL', 'wiki_name': 'test_council'} + assert data[council] == {"postcode": postcode, "skip_get_url": skip_get_url, "uprn": uprn, "url": url, "web_driver": web_driver, "wiki_name": council} def test_update_input_json_fail(capsys, monkeypatch): def mock_os_path_exists(path): @@ -186,13 +187,18 @@ def mock_os_path_exists(path): url = "TEST_URL" postcode="TEST_POSTCODE" uprn="TEST_UPRN" + web_driver="TEST_WEBDRIVER" skip_get_url = True - update_input_json(council, url, postcode=postcode, uprn=uprn, skip_get_url=skip_get_url) + update_input_json(council, url, postcode=postcode, uprn=uprn, web_driver=web_driver, skip_get_url=skip_get_url) captured = capsys.readouterr() assert "Exception encountered: Unable to update input.json file for the council." in captured.out assert "Please check you're running developer mode" in captured.out -def test_create_webdriver(): - result = create_webdriver() +def test_create_webdriver_local(): + result = create_webdriver(None) + assert result.name == 'chrome' + +def test_create_webdriver_remote(): + result = create_webdriver("http://selenium:4444") assert result.name == 'chrome' diff --git a/uk_bin_collection/uk_bin_collection/collect_data.py b/uk_bin_collection/uk_bin_collection/collect_data.py index 6e2796a971..7bf5afc9c7 100644 --- a/uk_bin_collection/uk_bin_collection/collect_data.py +++ b/uk_bin_collection/uk_bin_collection/collect_data.py @@ -35,6 +35,12 @@ def __init__(self): required=False, ) self.parser.add_argument("-u", "--uprn", type=str, help="UPRN to parse", required=False) + self.parser.add_argument( + "-w", + "--web_driver", + help="URL for remote Selenium web driver - should be wrapped in double quotes", + required=False, + ) self.parser.add_argument( "-d", "--dev_mode", @@ -66,6 +72,7 @@ def run(self): paon = self.parsed_args.number uprn = self.parsed_args.uprn skip_get_url = self.parsed_args.skip_get_url + web_driver = self.parsed_args.web_driver dev_mode = self.parsed_args.dev_mode return self.client_code( @@ -75,6 +82,7 @@ def run(self): paon=paon, uprn=uprn, skip_get_url=skip_get_url, + web_driver=web_driver, dev_mode=dev_mode, council_module_str=council_module_str, ) diff --git a/uk_bin_collection/uk_bin_collection/common.py b/uk_bin_collection/uk_bin_collection/common.py index 103a809367..16a236e29c 100644 --- a/uk_bin_collection/uk_bin_collection/common.py +++ b/uk_bin_collection/uk_bin_collection/common.py @@ -204,7 +204,7 @@ def update_input_json(council: str, url: str, **kwargs): paon = kwargs.get("paon", None) uprn = kwargs.get("uprn", None) usrn = kwargs.get("usrn", None) -def write_output_json(council: str, content: str): + web_driver = kwargs.get("web_driver", None) skip_get_url = kwargs.get("skip_get_url", None) cwd = os.getcwd() input_file_path = os.path.join(cwd, "uk_bin_collection", "tests", "input.json") @@ -223,12 +223,8 @@ def write_output_json(council: str, content: str): data[council]["uprn"] = uprn if usrn is not None: data[council]["usrn"] = usrn - outputs_path = os.path.join(cwd, "..", "tests", "outputs") - if not os.path.exists(outputs_path) or not os.path.isdir(outputs_path): - outputs_path = os.path.join(cwd, "uk_bin_collection", "tests", "outputs") - if os.path.exists(outputs_path) and os.path.isdir(outputs_path): - with open(os.path.join(outputs_path, council + ".json"), "w") as f: - f.write(content) + if web_driver is not None: + data[council]["web_driver"] = web_driver if skip_get_url is not None: data[council]["skip_get_url"] = skip_get_url with open(input_file_path, 'w') as f: @@ -246,7 +242,7 @@ def validate_dates(bin_dates: dict) -> dict: # If a date is in December and the next is in January, increase the year -def create_webdriver() -> webdriver.Chrome: +def create_webdriver(web_driver) -> webdriver.Chrome: """ Create and return a headless Selenium webdriver :rtype: webdriver.Chrome @@ -258,5 +254,8 @@ def create_webdriver() -> webdriver.Chrome: options.add_argument("--disable-gpu") options.add_argument("--disable-dev-shm-usage") options.add_experimental_option("excludeSwitches", ["enable-logging"]) - # Return a Selenium webdriver + # Return a remote Selenium webdriver + if web_driver is not None: + return webdriver.Remote(command_executor=web_driver, options=options) + # Return a local Selenium webdriver return webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options) diff --git a/uk_bin_collection/uk_bin_collection/councils/BroxtoweBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BroxtoweBoroughCouncil.py index 1614a100b0..748afe9728 100644 --- a/uk_bin_collection/uk_bin_collection/councils/BroxtoweBoroughCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/BroxtoweBoroughCouncil.py @@ -24,11 +24,12 @@ def parse_data(self, page: str, **kwargs) -> dict: user_uprn = kwargs.get("uprn") user_postcode = kwargs.get("postcode") + web_driver = kwargs.get("web_driver") check_uprn(user_uprn) check_postcode(user_postcode) # Create Selenium webdriver - driver = create_webdriver() + driver = create_webdriver(web_driver) driver.get(page) # Populate postcode field @@ -60,6 +61,9 @@ def parse_data(self, page: str, **kwargs) -> dict: soup = BeautifulSoup(driver.page_source, features="html.parser") + # Quit Selenium webdriver to release session + driver.quit() + bins_div = soup.find("div", id="ctl00_ContentPlaceHolder1_FF5686FormGroup") if bins_div: bins_table = bins_div.find("table") diff --git a/uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py index aa034adec4..8928480b05 100644 --- a/uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py @@ -37,9 +37,10 @@ def parse_data(self, page: str, **kwargs) -> dict: # Assign user info user_postcode = kwargs.get("postcode") user_paon = kwargs.get("paon") + web_driver = kwargs.get("web_driver") # Create Selenium webdriver - driver = create_webdriver() + driver = create_webdriver(web_driver) driver.get(page) # Enter postcode in text box and wait @@ -77,6 +78,9 @@ def parse_data(self, page: str, **kwargs) -> dict: df = pd.read_html(table, header=[1]) df = df[0] + # Quit Selenium webdriver to release session + driver.quit() + # Parse data into dict data = self.get_data(df) diff --git a/uk_bin_collection/uk_bin_collection/councils/DerbyshireDalesDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/DerbyshireDalesDistrictCouncil.py index 4fe73a18e6..883daebb90 100644 --- a/uk_bin_collection/uk_bin_collection/councils/DerbyshireDalesDistrictCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/DerbyshireDalesDistrictCouncil.py @@ -24,11 +24,12 @@ def parse_data(self, page: str, **kwargs) -> dict: user_uprn = kwargs.get("uprn") user_postcode = kwargs.get("postcode") + web_driver = kwargs.get("web_driver") check_uprn(user_uprn) check_postcode(user_postcode) # Create Selenium webdriver - driver = create_webdriver() + driver = create_webdriver(web_driver) driver.get(page) # Populate postcode field @@ -60,6 +61,9 @@ def parse_data(self, page: str, **kwargs) -> dict: soup = BeautifulSoup(driver.page_source, features="html.parser") + # Quit Selenium webdriver to release session + driver.quit() + bin_rows = soup.find("div", id="ctl00_ContentPlaceHolder1_pnlConfirmation") \ .find("div", {"class": "row"}).find_all("div", {"class": "row"}) if bin_rows: diff --git a/uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py index 24e0cae5f7..d646d32ddd 100644 --- a/uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py @@ -20,11 +20,12 @@ def parse_data(self, page: str, **kwargs) -> dict: data = {"bins": []} user_paon = kwargs.get("paon") user_postcode = kwargs.get("postcode") + web_driver = kwargs.get("web_driver") check_paon(user_paon) check_postcode(user_postcode) # Create Selenium webdriver - driver = create_webdriver() + driver = create_webdriver(web_driver) driver.get("https://www.e-lindsey.gov.uk/article/6714/Your-Waste-Collection-Days") # Wait for the postcode field to appear then populate it @@ -59,6 +60,9 @@ def parse_data(self, page: str, **kwargs) -> dict: soup = BeautifulSoup(driver.page_source, features="html.parser") + # Quit Selenium webdriver to release session + driver.quit() + # Get collections for collection in soup.find_all("div", {"class": "waste-result"}): ptags = collection.find_all("p") diff --git a/uk_bin_collection/uk_bin_collection/councils/EastSuffolkCouncil.py b/uk_bin_collection/uk_bin_collection/councils/EastSuffolkCouncil.py index cd7f61f1f2..e149bcc4fb 100644 --- a/uk_bin_collection/uk_bin_collection/councils/EastSuffolkCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/EastSuffolkCouncil.py @@ -19,11 +19,12 @@ class CouncilClass(AbstractGetBinDataClass): def parse_data(self, page: str, **kwargs) -> dict: user_uprn = kwargs.get("uprn") user_postcode = kwargs.get("postcode") + web_driver = kwargs.get("web_driver") check_uprn(user_uprn) check_postcode(user_postcode) # Create Selenium webdriver - driver = create_webdriver() + driver = create_webdriver(web_driver) driver.get("https://my.eastsuffolk.gov.uk/service/Bin_collection_dates_finder") # Wait for iframe to load and switch to it @@ -66,6 +67,9 @@ def parse_data(self, page: str, **kwargs) -> dict: ) ) + # Quit Selenium webdriver to release session + driver.quit() + # Make a BS4 object soup = BeautifulSoup(data_table.get_attribute("innerHTML"), features="html.parser") diff --git a/uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py b/uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py index 471274120c..0c875de2cb 100644 --- a/uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py @@ -20,11 +20,12 @@ def parse_data(self, page: str, **kwargs) -> dict: data = {"bins": []} user_paon = kwargs.get("paon") user_postcode = kwargs.get("postcode") + web_driver = kwargs.get("web_driver") check_paon(user_paon) check_postcode(user_postcode) # Create Selenium webdriver - driver = create_webdriver() + driver = create_webdriver(web_driver) driver.get("https://www.gateshead.gov.uk/article/3150/Bin-collection-day-checker") # Wait for the postcode field to appear then populate it @@ -52,6 +53,9 @@ def parse_data(self, page: str, **kwargs) -> dict: soup = BeautifulSoup(driver.page_source, features="html.parser") + # Quit Selenium webdriver to release session + driver.quit() + # Get collections table table = soup.find("table", {"class": "bincollections__table"}) diff --git a/uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py b/uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py index 9ff3c9907a..94cf20c188 100644 --- a/uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py @@ -59,9 +59,10 @@ def parse_data(self, page: str, **kwargs) -> dict: # Assign user info user_postcode = kwargs.get("postcode") user_paon = kwargs.get("paon") + web_driver = kwargs.get("web_driver") # Create Selenium webdriver - driver = create_webdriver() + driver = create_webdriver(web_driver) driver.get(page) # Hide Cookies @@ -112,6 +113,9 @@ def parse_data(self, page: str, **kwargs) -> dict: By.ID, "FINDBINDAYSHIGHPEAK_CALENDAR_MAINCALENDAR" ).get_attribute("outerHTML") + # Quit Selenium webdriver to release session + driver.quit() + # Parse data into dict data = self.get_data(table) diff --git a/uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py b/uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py index 394cd69441..50cb795b4f 100644 --- a/uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py @@ -22,11 +22,12 @@ def parse_data(self, page: str, **kwargs) -> dict: data = {"bins": []} user_uprn = kwargs.get("uprn") user_postcode = kwargs.get("postcode") + web_driver = kwargs.get("web_driver") check_uprn(user_uprn) check_postcode(user_postcode) # Create Selenium webdriver - driver = create_webdriver() + driver = create_webdriver(web_driver) driver.get("https://www.npt.gov.uk/2195") # Accept cookies banner @@ -76,6 +77,9 @@ def parse_data(self, page: str, **kwargs) -> dict: soup = BeautifulSoup(driver.page_source, features="html.parser") + # Quit Selenium webdriver to release session + driver.quit() + # Get the property details property_details = soup.find( "div", diff --git a/uk_bin_collection/uk_bin_collection/councils/NorthNorfolkDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/NorthNorfolkDistrictCouncil.py index a6dc199140..f516720f6a 100644 --- a/uk_bin_collection/uk_bin_collection/councils/NorthNorfolkDistrictCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/NorthNorfolkDistrictCouncil.py @@ -24,11 +24,12 @@ def parse_data(self, page: str, **kwargs) -> dict: user_paon = kwargs.get("paon") user_postcode = kwargs.get("postcode") + web_driver = kwargs.get("web_driver") check_paon(user_paon) check_postcode(user_postcode) # Create Selenium webdriver - driver = create_webdriver() + driver = create_webdriver(web_driver) driver.get(page) # Populate postcode field @@ -61,6 +62,9 @@ def parse_data(self, page: str, **kwargs) -> dict: soup = BeautifulSoup(driver.page_source, features="html.parser") + # Quit Selenium webdriver to release session + driver.quit() + bins_text = soup.find("div", id="Search_result_details_cps_hd") if bins_text: diff --git a/uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py b/uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py index 69e36407f1..b547a26bc4 100644 --- a/uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py @@ -32,11 +32,12 @@ def parse_data(self, page: str, **kwargs) -> dict: user_paon = kwargs.get("paon") user_postcode = kwargs.get("postcode") + web_driver = kwargs.get("web_driver") check_paon(user_paon) check_postcode(user_postcode) # Create Selenium webdriver - driver = create_webdriver() + driver = create_webdriver(web_driver) driver.get(page) time.sleep(1) @@ -63,6 +64,9 @@ def parse_data(self, page: str, **kwargs) -> dict: soup = BeautifulSoup(driver.page_source, features="html.parser") + # Quit Selenium webdriver to release session + driver.quit() + # Work out which bins can be collected for this address. Glass bins are only on some houses due to pilot programme. bins_collected = list( map( diff --git a/uk_bin_collection/uk_bin_collection/councils/PrestonCityCouncil.py b/uk_bin_collection/uk_bin_collection/councils/PrestonCityCouncil.py index d1aae0f698..f045c1bf5b 100644 --- a/uk_bin_collection/uk_bin_collection/councils/PrestonCityCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/PrestonCityCouncil.py @@ -27,11 +27,12 @@ def parse_data(self, page: str, **kwargs) -> dict: user_paon = kwargs.get("paon") user_postcode = kwargs.get("postcode") + web_driver = kwargs.get("web_driver") check_paon(user_paon) check_postcode(user_postcode) # Create Selenium webdriver - driver = create_webdriver() + driver = create_webdriver(web_driver) driver.get(page) # If you bang in the house number (or property name) and postcode in the box it should find your property @@ -66,6 +67,9 @@ def parse_data(self, page: str, **kwargs) -> dict: soup = BeautifulSoup(driver.page_source, features="html.parser") + # Quit Selenium webdriver to release session + driver.quit() + topLevelSpan = soup.find( "span", id="lblCollectionDates" diff --git a/uk_bin_collection/uk_bin_collection/councils/ReigateAndBansteadBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/ReigateAndBansteadBoroughCouncil.py index 0aa3f70b02..87c3f66178 100644 --- a/uk_bin_collection/uk_bin_collection/councils/ReigateAndBansteadBoroughCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/ReigateAndBansteadBoroughCouncil.py @@ -18,12 +18,13 @@ class CouncilClass(AbstractGetBinDataClass): def parse_data(self, page: str, **kwargs) -> dict: user_uprn = kwargs.get("uprn") + web_driver = kwargs.get("web_driver") check_uprn(user_uprn) # Pad UPRN with 0's at the start for any that aren't 12 chars user_uprn = user_uprn.zfill(12) # Create Selenium webdriver - driver = create_webdriver() + driver = create_webdriver(web_driver) driver.get( f"https://my.reigate-banstead.gov.uk/en/service/Bins_and_recycling___collections_calendar?uprn={user_uprn}") @@ -38,6 +39,9 @@ def parse_data(self, page: str, **kwargs) -> dict: soup = BeautifulSoup(driver.page_source, features="html.parser") soup.prettify() + # Quit Selenium webdriver to release session + driver.quit() + data = {"bins": []} section = soup.find("span", {"data-name": "html2"}) dates = section.find_all("div") diff --git a/uk_bin_collection/uk_bin_collection/councils/RushcliffeBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/RushcliffeBoroughCouncil.py index 3144d13918..8b1c1e8903 100644 --- a/uk_bin_collection/uk_bin_collection/councils/RushcliffeBoroughCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/RushcliffeBoroughCouncil.py @@ -24,11 +24,12 @@ def parse_data(self, page: str, **kwargs) -> dict: user_uprn = kwargs.get("uprn") user_postcode = kwargs.get("postcode") + web_driver = kwargs.get("web_driver") check_uprn(user_uprn) check_postcode(user_postcode) # Create Selenium webdriver - driver = create_webdriver() + driver = create_webdriver(web_driver) driver.get(page) # Populate postcode field @@ -60,6 +61,9 @@ def parse_data(self, page: str, **kwargs) -> dict: soup = BeautifulSoup(driver.page_source, features="html.parser") + # Quit Selenium webdriver to release session + driver.quit() + bins_text = soup.find("div", id="ctl00_ContentPlaceHolder1_pnlConfirmation") if bins_text: diff --git a/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py index 190bbc5988..0cdc176c79 100644 --- a/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py @@ -21,11 +21,12 @@ def parse_data(self, page: str, **kwargs) -> dict: data = {"bins": []} user_uprn = kwargs.get("uprn") user_postcode = kwargs.get("postcode") + web_driver = kwargs.get("web_driver") check_uprn(user_uprn) check_postcode(user_postcode) # Create Selenium webdriver - driver = create_webdriver() + driver = create_webdriver(web_driver) driver.get("https://www.staffsmoorlands.gov.uk/findyourbinday") # Close cookies banner @@ -67,6 +68,9 @@ def parse_data(self, page: str, **kwargs) -> dict: soup = BeautifulSoup(driver.page_source, features="html.parser") + # Quit Selenium webdriver to release session + driver.quit() + # Get months for month_wrapper in soup.find_all("div", {"class": "bin-collection__month"}): if month_wrapper: diff --git a/uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py b/uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py index a020799137..eaa1d551b8 100644 --- a/uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py @@ -15,13 +15,16 @@ class CouncilClass(AbstractGetBinDataClass): def parse_data(self, page: str, **kwargs) -> dict: # Create Selenium webdriver - driver = create_webdriver() + driver = create_webdriver(kwargs.get("web_driver")) driver.get(kwargs.get("url")) # Make a BS4 object soup = BeautifulSoup(driver.page_source, features="html.parser") soup.prettify() + # Quit Selenium webdriver to release session + driver.quit() + data = {"bins": []} sections = soup.find_all("div", {"class": "wil_c-content-section_heading"}) for s in sections: diff --git a/uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py b/uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py index 3d0eaf6822..e49b0ca66d 100644 --- a/uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py @@ -20,11 +20,12 @@ def parse_data(self, page: str, **kwargs) -> dict: data = {"bins": []} user_paon = kwargs.get("paon") user_postcode = kwargs.get("postcode") + web_driver = kwargs.get("web_driver") check_paon(user_paon) check_postcode(user_postcode) # Create Selenium webdriver - driver = create_webdriver() + driver = create_webdriver(web_driver) driver.get("https://www.westlothian.gov.uk/article/31528/Bin-Collection-Calendar-Dates") # Close feedback banner @@ -59,6 +60,9 @@ def parse_data(self, page: str, **kwargs) -> dict: soup = BeautifulSoup(driver.page_source, features="html.parser") + # Quit Selenium webdriver to release session + driver.quit() + # Get collections for collection in soup.find_all("div", {"class": "bin-collect"}): dict_data = { diff --git a/uk_bin_collection/uk_bin_collection/get_bin_data.py b/uk_bin_collection/uk_bin_collection/get_bin_data.py index 4047e0f25d..604a880369 100644 --- a/uk_bin_collection/uk_bin_collection/get_bin_data.py +++ b/uk_bin_collection/uk_bin_collection/get_bin_data.py @@ -55,6 +55,7 @@ def template_method(self, address_url: str, **kwargs) -> None: # pragma: no cov this_paon = kwargs.get("paon", None) this_uprn = kwargs.get("uprn", None) this_usrn = kwargs.get("usrn", None) + this_web_driver = kwargs.get("web_driver", None) skip_get_url = kwargs.get("skip_get_url", None) dev_mode = kwargs.get("dev_mode", False) council_module_str = kwargs.get("council_module_str", None) @@ -63,18 +64,18 @@ def template_method(self, address_url: str, **kwargs) -> None: # pragma: no cov ): # we will not use the generic way to get data - needs a get data in the council class itself page = self.get_data(address_url) bin_data_dict = self.parse_data( - page, postcode=this_postcode, paon=this_paon, uprn=this_uprn, usrn=this_usrn, url=this_url + page, postcode=this_postcode, paon=this_paon, uprn=this_uprn, usrn=this_usrn, web_driver=this_web_driver, url=this_url ) json_output = self.output_json(bin_data_dict) else: bin_data_dict = self.parse_data( - "", postcode=this_postcode, paon=this_paon, uprn=this_uprn, usrn=this_usrn, url=this_url + "", postcode=this_postcode, paon=this_paon, uprn=this_uprn, usrn=this_usrn, web_driver=this_web_driver, url=this_url ) json_output = self.output_json(bin_data_dict) # if dev mode create/update council's entry in the input.json if dev_mode is not None and dev_mode is True: - update_input_json(council_module_str, this_url, postcode=this_postcode, paon=this_paon, uprn=this_uprn, usrn=this_usrn, skip_get_url=skip_get_url) + update_input_json(council_module_str, this_url, postcode=this_postcode, paon=this_paon, uprn=this_uprn, usrn=this_usrn, web_driver=this_web_driver, skip_get_url=skip_get_url) return json_output diff --git a/wiki/generate_wiki.py b/wiki/generate_wiki.py index 84a8373343..9bd7a53e74 100644 --- a/wiki/generate_wiki.py +++ b/wiki/generate_wiki.py @@ -46,6 +46,9 @@ def main(): if "usrn" in council_details: command += " -usrn XXXXXXXX" additional_parameters += "- `-us` - USRN\n" + if "web_driver" in council_details: + command += " -w http://HOST:PORT/" + additional_parameters += "- `-w` - remote Selenium web driver URL (required for Home Assistant)\n" # add to entries entries += "\n---\n\n"