From 3c40ef209310a4b57c0b319c48bc0cc24ca9c4e7 Mon Sep 17 00:00:00 2001 From: m26dvd <31007572+m26dvd@users.noreply.github.com> Date: Thu, 31 Oct 2024 22:03:59 +0000 Subject: [PATCH 1/4] feat: Adding Breckland Council fix: #656 --- uk_bin_collection/tests/input.json | 7 +++ .../councils/BrecklandCouncil.py | 55 +++++++++++++++++++ wiki/Councils.md | 12 ++++ 3 files changed, 74 insertions(+) create mode 100644 uk_bin_collection/uk_bin_collection/councils/BrecklandCouncil.py diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index a47a767641..af672b4173 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -159,6 +159,13 @@ "wiki_name": "Bradford MDC", "wiki_note": "To get the UPRN, you will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search). Post code isn't parsed by this script, but you can pass it in double quotes." }, + "BrecklandCouncil": { + "url": "https://www.breckland.gov.uk", + "wiki_command_url_override": "https://www.breckland.gov.uk", + "uprn": "100091495479", + "wiki_name": "Breckland Council", + "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN." + }, "BrightonandHoveCityCouncil": { "house_number": "44 Carden Avenue, Brighton, BN1 8NE", "postcode": "BN1 8NE", diff --git a/uk_bin_collection/uk_bin_collection/councils/BrecklandCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BrecklandCouncil.py new file mode 100644 index 0000000000..1ae477860d --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/BrecklandCouncil.py @@ -0,0 +1,55 @@ +import time + +import requests + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + + user_uprn = kwargs.get("uprn") + check_uprn(user_uprn) + bindata = {"bins": []} + + URI = "https://www.breckland.gov.uk/apiserver/ajaxlibrary" + + data = { + "id": "1730410741649", + "jsonrpc": "2.0", + "method": "Breckland.Whitespace.JointWasteAPI.GetBinCollectionsByUprn", + "params": {"uprn": user_uprn, "environment": "live"}, + } + # Make the GET request + response = requests.post(URI, json=data) + + # Parse the JSON response + bin_collection = response.json() + + # Loop through each collection in bin_collection + for collection in bin_collection["result"]: + bin_type = collection.get("collectiontype") + collection_date = collection.get("nextcollection") + + dict_data = { + "type": bin_type, + "collectionDate": datetime.strptime( + collection_date, + "%d/%m/%Y %H:%M:%S", + ).strftime("%d/%m/%Y"), + } + bindata["bins"].append(dict_data) + + bindata["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y") + ) + + return bindata diff --git a/wiki/Councils.md b/wiki/Councils.md index bc5a2547e0..28351eb1ea 100644 --- a/wiki/Councils.md +++ b/wiki/Councils.md @@ -32,6 +32,7 @@ This document is still a work in progress, don't worry if your council isn't lis - [Bolton Council](#bolton-council) - [Bracknell Forest Council](#bracknell-forest-council) - [Bradford MDC](#bradford-mdc) +- [Breckland Council](#breckland-council) - [Brighton and Hove City Council](#brighton-and-hove-city-council) - [Bristol City Council](#bristol-city-council) - [Bromley Borough Council](#bromley-borough-council) @@ -487,6 +488,17 @@ Note: To get the UPRN, you will need to use [FindMyAddress](https://www.findmyad --- +### Breckland Council +```commandline +python collect_data.py BrecklandCouncil https://www.breckland.gov.uk -u XXXXXXXX +``` +Additional parameters: +- `-u` - UPRN + +Note: You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN. + +--- + ### Brighton and Hove City Council ```commandline python collect_data.py BrightonandHoveCityCouncil https://cityclean.brighton-hove.gov.uk/link/collections -s -u XXXXXXXX -p "XXXX XXX" -n XX -w http://HOST:PORT/ From 7949aae3fb648a5b5a4b8cd45cdd903446462a6f Mon Sep 17 00:00:00 2001 From: m26dvd <31007572+m26dvd@users.noreply.github.com> Date: Thu, 31 Oct 2024 22:14:22 +0000 Subject: [PATCH 2/4] feat: Adding Cotswold District Council fix: #658 --- uk_bin_collection/tests/input.json | 9 ++ .../councils/CotswoldDistrictCouncil.py | 120 ++++++++++++++++++ wiki/Councils.md | 15 +++ 3 files changed, 144 insertions(+) create mode 100644 uk_bin_collection/uk_bin_collection/councils/CotswoldDistrictCouncil.py diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index af672b4173..9709823ea8 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -321,6 +321,15 @@ "wiki_name": "Cornwall Council", "wiki_note": "Use https://uprn.uk/ to find your UPRN." }, + "CotswoldDistrictCouncil": { + "house_number": "19", + "postcode": "GL56 0GB", + "skip_get_url": true, + "url": "https://community.cotswold.gov.uk/s/waste-collection-enquiry", + "web_driver": "http://selenium:4444", + "wiki_name": "Cotswold District Council", + "wiki_note": "Pass the full address in the house number and postcode in" + }, "CoventryCityCouncil": { "url": "https://www.coventry.gov.uk/directory-record/56384/abberton-way-", "wiki_command_url_override": "https://www.coventry.gov.uk/directory_record/XXXXXX/XXXXXX", diff --git a/uk_bin_collection/uk_bin_collection/councils/CotswoldDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/CotswoldDistrictCouncil.py new file mode 100644 index 0000000000..d34f52950c --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/CotswoldDistrictCouncil.py @@ -0,0 +1,120 @@ +import time +from datetime import datetime + +from bs4 import BeautifulSoup +from selenium.webdriver.common.by import By +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import Select +from selenium.webdriver.support.wait import WebDriverWait + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + +# import the wonderful Beautiful Soup and the URL grabber + + +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + driver = None + try: + page = "https://community.cotswold.gov.uk/s/waste-collection-enquiry" + + data = {"bins": []} + + house_number = kwargs.get("paon") + postcode = kwargs.get("postcode") + full_address = f"{house_number}, {postcode}" + web_driver = kwargs.get("web_driver") + headless = kwargs.get("headless") + + # Create Selenium webdriver + driver = create_webdriver(web_driver, headless, None, __name__) + driver.get(page) + + # If you bang in the house number (or property name) and postcode in the box it should find your property + wait = WebDriverWait(driver, 60) + address_entry_field = wait.until( + EC.presence_of_element_located( + (By.XPATH, '//*[@id="combobox-input-19"]') + ) + ) + + address_entry_field.send_keys(str(full_address)) + + address_entry_field = wait.until( + EC.element_to_be_clickable((By.XPATH, '//*[@id="combobox-input-19"]')) + ) + address_entry_field.click() + address_entry_field.send_keys(Keys.BACKSPACE) + address_entry_field.send_keys(str(full_address[len(full_address) - 1])) + + first_found_address = wait.until( + EC.element_to_be_clickable( + (By.XPATH, '//*[@id="dropdown-element-19"]/ul') + ) + ) + + first_found_address.click() + # Wait for the 'Select your property' dropdown to appear and select the first result + next_btn = wait.until( + EC.element_to_be_clickable((By.XPATH, "//lightning-button/button")) + ) + next_btn.click() + bin_data = wait.until( + EC.presence_of_element_located( + (By.XPATH, "//span[contains(text(), 'Container')]") + ) + ) + + soup = BeautifulSoup(driver.page_source, features="html.parser") + + rows = soup.find_all("tr", class_="slds-hint-parent") + current_year = datetime.now().year + + for row in rows: + columns = row.find_all("td") + if columns: + container_type = row.find("th").text.strip() + if columns[0].get_text() == "Today": + collection_day = datetime.now().strftime("%a, %d %B") + elif columns[0].get_text() == "Tomorrow": + collection_day = (datetime.now() + timedelta(days=1)).strftime( + "%a, %d %B" + ) + else: + collection_day = re.sub( + r"[^a-zA-Z0-9,\s]", "", columns[0].get_text() + ).strip() + + # Parse the date from the string + parsed_date = datetime.strptime(collection_day, "%a, %d %B") + if parsed_date < datetime( + parsed_date.year, parsed_date.month, parsed_date.day + ): + parsed_date = parsed_date.replace(year=current_year + 1) + else: + parsed_date = parsed_date.replace(year=current_year) + # Format the date as %d/%m/%Y + formatted_date = parsed_date.strftime("%d/%m/%Y") + + # Add the bin type and collection date to the 'data' dictionary + data["bins"].append( + {"type": container_type, "collectionDate": formatted_date} + ) + except Exception as e: + # Here you can log the exception if needed + print(f"An error occurred: {e}") + # Optionally, re-raise the exception if you want it to propagate + raise + finally: + # This block ensures that the driver is closed regardless of an exception + if driver: + driver.quit() + return data diff --git a/wiki/Councils.md b/wiki/Councils.md index 28351eb1ea..9592edba90 100644 --- a/wiki/Councils.md +++ b/wiki/Councils.md @@ -53,6 +53,7 @@ This document is still a work in progress, don't worry if your council isn't lis - [Colchester City Council](#colchester-city-council) - [Conwy County Borough Council](#conwy-county-borough-council) - [Cornwall Council](#cornwall-council) +- [Cotswold District Council](#cotswold-district-council) - [Coventry City Council](#coventry-city-council) - [Crawley Borough Council](#crawley-borough-council) - [Croydon Council](#croydon-council) @@ -743,6 +744,20 @@ Note: Use https://uprn.uk/ to find your UPRN. --- +### Cotswold District Council +```commandline +python collect_data.py CotswoldDistrictCouncil https://community.cotswold.gov.uk/s/waste-collection-enquiry -s -p "XXXX XXX" -n XX -w http://HOST:PORT/ +``` +Additional parameters: +- `-s` - skip get URL +- `-p` - postcode +- `-n` - house number +- `-w` - remote Selenium web driver URL (required for Home Assistant) + +Note: Pass the full address in the house number and postcode in + +--- + ### Coventry City Council ```commandline python collect_data.py CoventryCityCouncil https://www.coventry.gov.uk/directory_record/XXXXXX/XXXXXX From b38004cc30c6ffe2c0aa9fb39e381faf0573fa35 Mon Sep 17 00:00:00 2001 From: m26dvd <31007572+m26dvd@users.noreply.github.com> Date: Fri, 1 Nov 2024 22:02:10 +0000 Subject: [PATCH 3/4] fix: NewarkAndSherwoodDC fix: #941 --- uk_bin_collection/tests/input.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 9709823ea8..45baa3ebbd 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -853,8 +853,8 @@ "wiki_name": "New Forest Council" }, "NewarkAndSherwoodDC": { - "url": "http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=200004258529", - "wiki_command_url_override": "http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=XXXXXXXX", + "url": "http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=200004258529&nc=1", + "wiki_command_url_override": "http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=XXXXXXXX&nc=1", "wiki_name": "Newark and Sherwood District Council", "wiki_note": "Replace XXXXXXXX with UPRN." }, From 7c5ce184f7b8e02b4dedb1faa014d7da6c3b60e7 Mon Sep 17 00:00:00 2001 From: m26dvd <31007572+m26dvd@users.noreply.github.com> Date: Fri, 1 Nov 2024 23:16:51 +0000 Subject: [PATCH 4/4] fix: St Helens Borough Council fix: #753 --- uk_bin_collection/tests/input.json | 7 +- .../uk_bin_collection/councils/StHelensBC.py | 149 +++++++++++++----- wiki/Councils.md | 10 +- 3 files changed, 119 insertions(+), 47 deletions(-) diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 45baa3ebbd..ac527460db 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -1216,10 +1216,13 @@ "wiki_name": "St Albans City and District Council" }, "StHelensBC": { + "house_number": "15", + "postcode": "L34 2GA", "skip_get_url": true, - "uprn": "39081672", "url": "https://www.sthelens.gov.uk/", - "wiki_name": "St Helens Borough Council" + "web_driver": "http://selenium:4444", + "wiki_name": "St Helens Borough Council", + "wiki_note": "Pass the house name/number in the house number parameter, wrapped in double quotes" }, "StaffordBoroughCouncil": { "uprn": "100032203010", diff --git a/uk_bin_collection/uk_bin_collection/councils/StHelensBC.py b/uk_bin_collection/uk_bin_collection/councils/StHelensBC.py index a8f1594ae3..07a28f75d0 100644 --- a/uk_bin_collection/uk_bin_collection/councils/StHelensBC.py +++ b/uk_bin_collection/uk_bin_collection/councils/StHelensBC.py @@ -1,4 +1,8 @@ from bs4 import BeautifulSoup +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import Select +from selenium.webdriver.support.wait import WebDriverWait from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass @@ -8,50 +12,111 @@ class CouncilClass(AbstractGetBinDataClass): """ Concrete classes have to implement all abstract operations of the - baseclass. They can also override some - operations with a default implementation. + base class. They can also override some operations with a default + implementation. """ def parse_data(self, page: str, **kwargs) -> dict: - uprn = kwargs.get("uprn") - # Check the UPRN is valid - check_uprn(uprn) - - # Request URL - url = f"https://secure.sthelens.net/website/CollectionDates.nsf/servlet.xsp/NextCollections?source=1&refid={uprn}" - - # Make Request - requests.packages.urllib3.disable_warnings() - s = requests.Session() - page = s.get(url) - - # Make a BS4 object - soup = BeautifulSoup( - re.sub("]+)>", "", page.text).replace("", ""), - features="html.parser", - ) - soup.prettify() - - data = {"bins": []} - collection_rows = ( - soup.find("table", {"class": "multitable"}).find("tbody").find_all("tr") - ) - - for collection_row in collection_rows: - # Get bin collection type - bin_type = collection_row.find("th") - if bin_type: - bin_type = bin_type.get_text(strip=True) - # Get bin collection dates - for bin_date in collection_row.find_all("td"): - if bin_date.get_text(strip=True) != "Dates not allocated": - collection_date = datetime.strptime( - bin_date.get_text(strip=True), "%a %d %b %Y" - ) - dict_data = { - "type": bin_type, - "collectionDate": collection_date.strftime(date_format), - } - data["bins"].append(dict_data) + driver = None + try: + data = {"bins": []} + user_paon = kwargs.get("paon") + user_postcode = kwargs.get("postcode") + web_driver = kwargs.get("web_driver") + headless = kwargs.get("headless") + check_paon(user_paon) + check_postcode(user_postcode) + # Create Selenium webdriver + driver = create_webdriver(web_driver, headless, None, __name__) + driver.get( + "https://www.sthelens.gov.uk/article/3473/Check-your-collection-dates" + ) + + """ + accept_button = WebDriverWait(driver, timeout=30).until( + EC.element_to_be_clickable((By.ID, "ccc-notify-accept")) + ) + accept_button.click() + """ + + # Wait for the postcode field to appear then populate it + inputElement_postcode = WebDriverWait(driver, 30).until( + EC.presence_of_element_located( + (By.ID, "RESIDENTCOLLECTIONDATES_PAGE1_POSTCODE") + ) + ) + inputElement_postcode.send_keys(user_postcode) + + # Click search button + findAddress = WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.ID, "RESIDENTCOLLECTIONDATES_PAGE1_FINDADDRESS_NEXT") + ) + ) + findAddress.click() + + WebDriverWait(driver, timeout=30).until( + EC.element_to_be_clickable( + (By.ID, "RESIDENTCOLLECTIONDATES_PAGE1_ADDRESS_chosen") + ) + ).click() + + WebDriverWait(driver, 10).until( + EC.element_to_be_clickable( + ( + By.XPATH, + f"//ul[@id='RESIDENTCOLLECTIONDATES_PAGE1_ADDRESS-chosen-search-results']/li[starts-with(text(), '{user_paon}')]", + ) + ) + ).click() + + WebDriverWait(driver, timeout=30).until( + EC.element_to_be_clickable( + (By.ID, "RESIDENTCOLLECTIONDATES_PAGE1_ADDRESSNEXT_NEXT") + ) + ).click() + + # Wait for the collections table to appear + WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.ID, "RESIDENTCOLLECTIONDATES__FIELDS_OUTER") + ) + ) + + soup = BeautifulSoup(driver.page_source, features="html.parser") + + # Get the month rows first + current_month = "" + for row in soup.find_all("tr"): + # Check if the row is a month header (contains 'th' tag) + if row.find("th"): + current_month = row.find("th").get_text(strip=True) + else: + # Extract the date, day, and waste types + columns = row.find_all("td") + if len(columns) >= 4: + day = columns[0].get_text(strip=True) + date = day + " " + current_month + waste_types = columns[3].get_text(strip=True) + + for type in waste_types.split(" & "): + dict_data = { + "type": type, + "collectionDate": datetime.strptime( + date, + "%d %B %Y", + ).strftime("%d/%m/%Y"), + } + data["bins"].append(dict_data) + + except Exception as e: + # Here you can log the exception if needed + print(f"An error occurred: {e}") + # Optionally, re-raise the exception if you want it to propagate + raise + finally: + # This block ensures that the driver is closed regardless of an exception + if driver: + driver.quit() return data diff --git a/wiki/Councils.md b/wiki/Councils.md index 9592edba90..f62f76f4d6 100644 --- a/wiki/Councils.md +++ b/wiki/Councils.md @@ -1587,7 +1587,7 @@ Additional parameters: ### Newark and Sherwood District Council ```commandline -python collect_data.py NewarkAndSherwoodDC http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=XXXXXXXX +python collect_data.py NewarkAndSherwoodDC http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=XXXXXXXX&nc=1 ``` Note: Replace XXXXXXXX with UPRN. @@ -2155,11 +2155,15 @@ Additional parameters: ### St Helens Borough Council ```commandline -python collect_data.py StHelensBC https://www.sthelens.gov.uk/ -s -u XXXXXXXX +python collect_data.py StHelensBC https://www.sthelens.gov.uk/ -s -p "XXXX XXX" -n XX -w http://HOST:PORT/ ``` Additional parameters: - `-s` - skip get URL -- `-u` - UPRN +- `-p` - postcode +- `-n` - house number +- `-w` - remote Selenium web driver URL (required for Home Assistant) + +Note: Pass the house name/number in the house number parameter, wrapped in double quotes ---