diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 45baa3ebbd..ac527460db 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -1216,10 +1216,13 @@ "wiki_name": "St Albans City and District Council" }, "StHelensBC": { + "house_number": "15", + "postcode": "L34 2GA", "skip_get_url": true, - "uprn": "39081672", "url": "https://www.sthelens.gov.uk/", - "wiki_name": "St Helens Borough Council" + "web_driver": "http://selenium:4444", + "wiki_name": "St Helens Borough Council", + "wiki_note": "Pass the house name/number in the house number parameter, wrapped in double quotes" }, "StaffordBoroughCouncil": { "uprn": "100032203010", diff --git a/uk_bin_collection/uk_bin_collection/councils/StHelensBC.py b/uk_bin_collection/uk_bin_collection/councils/StHelensBC.py index a8f1594ae3..07a28f75d0 100644 --- a/uk_bin_collection/uk_bin_collection/councils/StHelensBC.py +++ b/uk_bin_collection/uk_bin_collection/councils/StHelensBC.py @@ -1,4 +1,8 @@ from bs4 import BeautifulSoup +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import Select +from selenium.webdriver.support.wait import WebDriverWait from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass @@ -8,50 +12,111 @@ class CouncilClass(AbstractGetBinDataClass): """ Concrete classes have to implement all abstract operations of the - baseclass. They can also override some - operations with a default implementation. + base class. They can also override some operations with a default + implementation. """ def parse_data(self, page: str, **kwargs) -> dict: - uprn = kwargs.get("uprn") - # Check the UPRN is valid - check_uprn(uprn) - - # Request URL - url = f"https://secure.sthelens.net/website/CollectionDates.nsf/servlet.xsp/NextCollections?source=1&refid={uprn}" - - # Make Request - requests.packages.urllib3.disable_warnings() - s = requests.Session() - page = s.get(url) - - # Make a BS4 object - soup = BeautifulSoup( - re.sub("]+)>", "", page.text).replace("", ""), - features="html.parser", - ) - soup.prettify() - - data = {"bins": []} - collection_rows = ( - soup.find("table", {"class": "multitable"}).find("tbody").find_all("tr") - ) - - for collection_row in collection_rows: - # Get bin collection type - bin_type = collection_row.find("th") - if bin_type: - bin_type = bin_type.get_text(strip=True) - # Get bin collection dates - for bin_date in collection_row.find_all("td"): - if bin_date.get_text(strip=True) != "Dates not allocated": - collection_date = datetime.strptime( - bin_date.get_text(strip=True), "%a %d %b %Y" - ) - dict_data = { - "type": bin_type, - "collectionDate": collection_date.strftime(date_format), - } - data["bins"].append(dict_data) + driver = None + try: + data = {"bins": []} + user_paon = kwargs.get("paon") + user_postcode = kwargs.get("postcode") + web_driver = kwargs.get("web_driver") + headless = kwargs.get("headless") + check_paon(user_paon) + check_postcode(user_postcode) + # Create Selenium webdriver + driver = create_webdriver(web_driver, headless, None, __name__) + driver.get( + "https://www.sthelens.gov.uk/article/3473/Check-your-collection-dates" + ) + + """ + accept_button = WebDriverWait(driver, timeout=30).until( + EC.element_to_be_clickable((By.ID, "ccc-notify-accept")) + ) + accept_button.click() + """ + + # Wait for the postcode field to appear then populate it + inputElement_postcode = WebDriverWait(driver, 30).until( + EC.presence_of_element_located( + (By.ID, "RESIDENTCOLLECTIONDATES_PAGE1_POSTCODE") + ) + ) + inputElement_postcode.send_keys(user_postcode) + + # Click search button + findAddress = WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.ID, "RESIDENTCOLLECTIONDATES_PAGE1_FINDADDRESS_NEXT") + ) + ) + findAddress.click() + + WebDriverWait(driver, timeout=30).until( + EC.element_to_be_clickable( + (By.ID, "RESIDENTCOLLECTIONDATES_PAGE1_ADDRESS_chosen") + ) + ).click() + + WebDriverWait(driver, 10).until( + EC.element_to_be_clickable( + ( + By.XPATH, + f"//ul[@id='RESIDENTCOLLECTIONDATES_PAGE1_ADDRESS-chosen-search-results']/li[starts-with(text(), '{user_paon}')]", + ) + ) + ).click() + + WebDriverWait(driver, timeout=30).until( + EC.element_to_be_clickable( + (By.ID, "RESIDENTCOLLECTIONDATES_PAGE1_ADDRESSNEXT_NEXT") + ) + ).click() + + # Wait for the collections table to appear + WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.ID, "RESIDENTCOLLECTIONDATES__FIELDS_OUTER") + ) + ) + + soup = BeautifulSoup(driver.page_source, features="html.parser") + + # Get the month rows first + current_month = "" + for row in soup.find_all("tr"): + # Check if the row is a month header (contains 'th' tag) + if row.find("th"): + current_month = row.find("th").get_text(strip=True) + else: + # Extract the date, day, and waste types + columns = row.find_all("td") + if len(columns) >= 4: + day = columns[0].get_text(strip=True) + date = day + " " + current_month + waste_types = columns[3].get_text(strip=True) + + for type in waste_types.split(" & "): + dict_data = { + "type": type, + "collectionDate": datetime.strptime( + date, + "%d %B %Y", + ).strftime("%d/%m/%Y"), + } + data["bins"].append(dict_data) + + except Exception as e: + # Here you can log the exception if needed + print(f"An error occurred: {e}") + # Optionally, re-raise the exception if you want it to propagate + raise + finally: + # This block ensures that the driver is closed regardless of an exception + if driver: + driver.quit() return data diff --git a/wiki/Councils.md b/wiki/Councils.md index 9592edba90..f62f76f4d6 100644 --- a/wiki/Councils.md +++ b/wiki/Councils.md @@ -1587,7 +1587,7 @@ Additional parameters: ### Newark and Sherwood District Council ```commandline -python collect_data.py NewarkAndSherwoodDC http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=XXXXXXXX +python collect_data.py NewarkAndSherwoodDC http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=XXXXXXXX&nc=1 ``` Note: Replace XXXXXXXX with UPRN. @@ -2155,11 +2155,15 @@ Additional parameters: ### St Helens Borough Council ```commandline -python collect_data.py StHelensBC https://www.sthelens.gov.uk/ -s -u XXXXXXXX +python collect_data.py StHelensBC https://www.sthelens.gov.uk/ -s -p "XXXX XXX" -n XX -w http://HOST:PORT/ ``` Additional parameters: - `-s` - skip get URL -- `-u` - UPRN +- `-p` - postcode +- `-n` - house number +- `-w` - remote Selenium web driver URL (required for Home Assistant) + +Note: Pass the house name/number in the house number parameter, wrapped in double quotes ---