From 5c41af3064f4705ecac54aae7b3ff03d2d659a18 Mon Sep 17 00:00:00 2001 From: Simon Cozens Date: Mon, 9 Sep 2024 09:16:36 +0100 Subject: [PATCH] fix: #585 Gloucester city council driver --- uk_bin_collection/tests/input.json | 8 ++ .../councils/GloucesterCityCouncil.py | 128 ++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 uk_bin_collection/uk_bin_collection/councils/GloucesterCityCouncil.py diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index dec5138af7..3ce60112a8 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -427,6 +427,14 @@ "wiki_name": "Glasgow City Council", "wiki_note": "Replace XXXXXXXX with UPRN." }, + "GloucesterCityCouncil": { + "house_number": "111", + "postcode": "GL2 0RR", + "uprn": "100120479507", + "skip_get_url": true, + "url": "https://gloucester-self.achieveservice.com/service/Bins___Check_your_bin_day", + "wiki_name": "Gloucester City Council" + }, "GuildfordCouncil": { "house_number": "THE LODGE, PUTTENHAM HILL HOUSE, PUTTENHAM HILL, PUTTENHAM, GUILDFORD, GU3 1AH", "postcode": "GU3 1AH", diff --git a/uk_bin_collection/uk_bin_collection/councils/GloucesterCityCouncil.py b/uk_bin_collection/uk_bin_collection/councils/GloucesterCityCouncil.py new file mode 100644 index 0000000000..6ef1eb120f --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/GloucesterCityCouncil.py @@ -0,0 +1,128 @@ +import time +from datetime import datetime + +from bs4 import BeautifulSoup +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import Select +from selenium.webdriver.support.wait import WebDriverWait + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + +# import the wonderful Beautiful Soup and the URL grabber + + +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + driver = None + try: + page = "https://gloucester-self.achieveservice.com/service/Bins___Check_your_bin_day" + + bin_data = {"bins": []} + + user_uprn = kwargs.get("uprn") + user_postcode = kwargs.get("postcode") + web_driver = kwargs.get("web_driver") + headless = kwargs.get("headless") + check_uprn(user_uprn) + check_postcode(user_postcode) + # Create Selenium webdriver + driver = create_webdriver(web_driver, headless, None, __name__) + driver.get(page) + + cookies_button = WebDriverWait(driver, timeout=15).until( + EC.presence_of_element_located((By.ID, "close-cookie-message")) + ) + cookies_button.click() + + without_login_button = WebDriverWait(driver, timeout=15).until( + EC.presence_of_element_located( + (By.LINK_TEXT, "or, Continue with no account") + ) + ) + without_login_button.click() + + iframe_presense = WebDriverWait(driver, 30).until( + EC.presence_of_element_located((By.ID, "fillform-frame-1")) + ) + + driver.switch_to.frame(iframe_presense) + wait = WebDriverWait(driver, 60) + inputElement_postcodesearch = wait.until( + EC.element_to_be_clickable((By.NAME, "find_postcode")) + ) + + inputElement_postcodesearch.send_keys(user_postcode) + + # Wait for the 'Select address' dropdown to be updated + time.sleep(2) + + dropdown = wait.until( + EC.element_to_be_clickable((By.NAME, "chooseAddress")) + ) + # Create a 'Select' for it, then select the first address in the list + # (Index 0 is "Select...") + dropdownSelect = Select(dropdown) + dropdownSelect.select_by_value(str(user_uprn)) + + # Wait for 'Searching for...' to be added to page + WebDriverWait(driver, timeout=15).until( + EC.text_to_be_present_in_element( + (By.CSS_SELECTOR, "span[data-name=html1]"), "Searching" + ) + ) + + # Wait for 'Searching for...' to be removed from page + WebDriverWait(driver, timeout=15).until( + EC.none_of( + EC.text_to_be_present_in_element( + (By.CSS_SELECTOR, "span[data-name=html1]"), "Searching" + ) + ) + ) + + # Even then it can still be adding data to the page... + time.sleep(5) + + soup = BeautifulSoup(driver.page_source, features="html.parser") + + # This is ugly but there is literally no consistency to the HTML + def is_a_collection_date(t): + return any("Next collection" in c for c in t.children) + + for next_collection in soup.find_all(is_a_collection_date): + bin_info = list( + next_collection.parent.select_one("div:nth-child(1)").children + ) + if not bin_info: + continue + bin = bin_info[0].get_text() + date = next_collection.select_one("strong").get_text(strip=True) + bin_date = datetime.strptime(date, "%d %b %Y") + dict_data = { + "type": bin, + "collectionDate": bin_date.strftime(date_format), + } + bin_data["bins"].append(dict_data) + + bin_data["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y") + ) + + except Exception as e: + # Here you can log the exception if needed + print(f"An error occurred: {e}") + # Optionally, re-raise the exception if you want it to propagate + raise + finally: + # This block ensures that the driver is closed regardless of an exception + if driver: + driver.quit() + return bin_data