From 5146d88deaa0905f5a18ba9077c35c35118899ea Mon Sep 17 00:00:00 2001 From: Oliver Cullimore Date: Fri, 20 Oct 2023 17:59:36 +0100 Subject: [PATCH 1/4] feat: Add support for Staffordshire Moorlands District Council --- ...affordshireMoorlandsDistrictCouncil.schema | 48 +++++++++ .../features/validate_council_outputs.feature | 1 + uk_bin_collection/tests/input.json | 8 ++ ...StaffordshireMoorlandsDistrictCouncil.json | 64 ++++++++++++ .../StaffordshireMoorlandsDistrictCouncil.py | 98 +++++++++++++++++++ 5 files changed, 219 insertions(+) create mode 100644 uk_bin_collection/tests/council_schemas/StaffordshireMoorlandsDistrictCouncil.schema create mode 100644 uk_bin_collection/tests/outputs/StaffordshireMoorlandsDistrictCouncil.json create mode 100644 uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py diff --git a/uk_bin_collection/tests/council_schemas/StaffordshireMoorlandsDistrictCouncil.schema b/uk_bin_collection/tests/council_schemas/StaffordshireMoorlandsDistrictCouncil.schema new file mode 100644 index 0000000000..784e1df0ad --- /dev/null +++ b/uk_bin_collection/tests/council_schemas/StaffordshireMoorlandsDistrictCouncil.schema @@ -0,0 +1,48 @@ +{ + "$schema": "http://json-schema.org/draft-06/schema#", + "$ref": "#/definitions/Welcome4", + "definitions": { + "Welcome4": { + "type": "object", + "additionalProperties": false, + "properties": { + "bins": { + "type": "array", + "items": { + "$ref": "#/definitions/Bin" + } + } + }, + "required": [ + "bins" + ], + "title": "Welcome4" + }, + "Bin": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { + "$ref": "#/definitions/Type" + }, + "collectionDate": { + "type": "string" + } + }, + "required": [ + "collectionDate", + "type" + ], + "title": "Bin" + }, + "Type": { + "type": "string", + "enum": [ + "Recycling with food and garden", + "Rubbish", + "Recycling" + ], + "title": "Type" + } + } +} diff --git a/uk_bin_collection/tests/features/validate_council_outputs.feature b/uk_bin_collection/tests/features/validate_council_outputs.feature index 78fbdef491..f76df165d1 100644 --- a/uk_bin_collection/tests/features/validate_council_outputs.feature +++ b/uk_bin_collection/tests/features/validate_council_outputs.feature @@ -81,6 +81,7 @@ Feature: Test each council output matches expected results in /outputs | SouthNorfolkCouncil | | SouthOxfordshireCouncil | | SouthTynesideCouncil | + | StaffordshireMoorlandsDistrictCouncil | | StHelensBC | | StockportBoroughCouncil | | StratfordUponAvonCouncil | diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 075ac10a5d..6f9ec2de89 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -480,6 +480,14 @@ "url": "https://www.southtyneside.gov.uk/article/33352/Bin-collection-dates", "wiki_name": "South Tyneside Council" }, + "StaffordshireMoorlandsDistrictCouncil": { + "SKIP_GET_URL": "SKIP_GET_URL", + "postcode": "ST8 6HN", + "uprn": "100031863037", + "url": "https://www.staffsmoorlands.gov.uk/", + "wiki_name": "Staffordshire Moorlands District Council", + "wiki_note": "To get the UPRN, you can use [FindMyAddress](https://www.findmyaddress.co.uk/search)" + }, "StHelensBC": { "SKIP_GET_URL": "SKIP_GET_URL", "uprn": "39081672", diff --git a/uk_bin_collection/tests/outputs/StaffordshireMoorlandsDistrictCouncil.json b/uk_bin_collection/tests/outputs/StaffordshireMoorlandsDistrictCouncil.json new file mode 100644 index 0000000000..11b0070f00 --- /dev/null +++ b/uk_bin_collection/tests/outputs/StaffordshireMoorlandsDistrictCouncil.json @@ -0,0 +1,64 @@ +{ + "bins": [ + { + "type": "Recycling with food and garden", + "collectionDate": "24/10/2023" + }, + { + "type": "Rubbish", + "collectionDate": "31/10/2023" + }, + { + "type": "Recycling with food and garden", + "collectionDate": "07/11/2023" + }, + { + "type": "Rubbish", + "collectionDate": "14/11/2023" + }, + { + "type": "Recycling with food and garden", + "collectionDate": "21/11/2023" + }, + { + "type": "Rubbish", + "collectionDate": "28/11/2023" + }, + { + "type": "Recycling with food and garden", + "collectionDate": "05/12/2023" + }, + { + "type": "Rubbish", + "collectionDate": "12/12/2023" + }, + { + "type": "Recycling with food and garden", + "collectionDate": "19/12/2023" + }, + { + "type": "Rubbish", + "collectionDate": "28/12/2023" + }, + { + "type": "Recycling", + "collectionDate": "02/01/2024" + }, + { + "type": "Rubbish", + "collectionDate": "09/01/2024" + }, + { + "type": "Recycling with food and garden", + "collectionDate": "16/01/2024" + }, + { + "type": "Rubbish", + "collectionDate": "23/01/2024" + }, + { + "type": "Recycling with food and garden", + "collectionDate": "30/01/2024" + } + ] +} \ No newline at end of file diff --git a/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py new file mode 100644 index 0000000000..b507d70046 --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py @@ -0,0 +1,98 @@ +from bs4 import BeautifulSoup +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import Select +from selenium.webdriver.support.wait import WebDriverWait + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import \ + AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + data = {"bins": []} + user_uprn = kwargs.get("uprn") + user_postcode = kwargs.get("postcode") + check_uprn(user_uprn) + check_postcode(user_postcode) + + # Set up Selenium to run 'headless' + options = webdriver.ChromeOptions() + options.add_argument("--headless") + options.add_argument("--no-sandbox") + options.add_argument("--disable-gpu") + options.add_argument("--disable-dev-shm-usage") + options.add_experimental_option("excludeSwitches", ["enable-logging"]) + + # Create Selenium webdriver + driver = webdriver.Chrome(options=options) + driver.get("https://www.staffsmoorlands.gov.uk/findyourbinday") + + # Close cookies banner + cookieAccept = WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.CSS_SELECTOR, ".cookiemessage__link--close")) + ) + cookieAccept.click() + + # Wait for the postcode field to appear then populate it + inputElement_postcode = WebDriverWait(driver, 30).until( + EC.presence_of_element_located( + (By.ID, "FINDBINDAYSSTAFFORDSHIREMOORLANDS_POSTCODESELECT_POSTCODE")) + ) + inputElement_postcode.send_keys(user_postcode) + + # Click search button + findAddress = WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.ID, "FINDBINDAYSSTAFFORDSHIREMOORLANDS_POSTCODESELECT_PAGE1NEXT_NEXT")) + ) + findAddress.click() + + # Wait for the 'Select address' dropdown to appear and select option matching UPRN + dropdown = WebDriverWait(driver, 30).until( + EC.presence_of_element_located( + (By.ID, "FINDBINDAYSSTAFFORDSHIREMOORLANDS_ADDRESSSELECT_ADDRESS")) + ) + # Create a 'Select' for it, then select the matching URPN option + dropdownSelect = Select(dropdown) + dropdownSelect.select_by_value(user_uprn) + + # Wait for the submit button to appear, then click it to get the collection dates + submit = WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.ID, "FINDBINDAYSSTAFFORDSHIREMOORLANDS_ADDRESSSELECT_ADDRESSSELECTNEXTBTN_NEXT")) + ) + submit.click() + + soup = BeautifulSoup(driver.page_source, features="html.parser") + + # Get months + for month_wrapper in soup.find_all("div", {"class": "bin-collection__month"}): + if month_wrapper: + month_year = month_wrapper.find("h3", {"class": "bin-collection__title"}).get_text(strip=True) + # Get collections + for collection in month_wrapper.find_all("li", {"class": "bin-collection__item"}): + day = collection.find("span", {"class": "bin-collection__number"}).get_text(strip=True) + if month_year and day: + bin_date = datetime.strptime(day + " " + month_year, "%d %B %Y") + dict_data = { + "type": collection.find("span", {"class": "bin-collection__type"}).get_text(strip=True), + "collectionDate": bin_date.strftime(date_format) + } + data["bins"].append(dict_data) + + data["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y") + ) + + return data From 916a0b1a92f79103ade8c7646557e564fda9aa5b Mon Sep 17 00:00:00 2001 From: Oliver Cullimore Date: Fri, 20 Oct 2023 20:33:58 +0100 Subject: [PATCH 2/4] feat: Add support for Gateshead Council --- .../council_schemas/GatesheadCouncil.schema | 47 ++++++++++ .../features/validate_council_outputs.feature | 1 + uk_bin_collection/tests/input.json | 8 ++ .../tests/outputs/GatesheadCouncil.json | 68 +++++++++++++++ .../councils/GatesheadCouncil.py | 85 +++++++++++++++++++ 5 files changed, 209 insertions(+) create mode 100644 uk_bin_collection/tests/council_schemas/GatesheadCouncil.schema create mode 100644 uk_bin_collection/tests/outputs/GatesheadCouncil.json create mode 100644 uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py diff --git a/uk_bin_collection/tests/council_schemas/GatesheadCouncil.schema b/uk_bin_collection/tests/council_schemas/GatesheadCouncil.schema new file mode 100644 index 0000000000..f6a48c63d5 --- /dev/null +++ b/uk_bin_collection/tests/council_schemas/GatesheadCouncil.schema @@ -0,0 +1,47 @@ +{ + "$schema": "http://json-schema.org/draft-06/schema#", + "$ref": "#/definitions/Welcome10", + "definitions": { + "Welcome10": { + "type": "object", + "additionalProperties": false, + "properties": { + "bins": { + "type": "array", + "items": { + "$ref": "#/definitions/Bin" + } + } + }, + "required": [ + "bins" + ], + "title": "Welcome10" + }, + "Bin": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { + "$ref": "#/definitions/Type" + }, + "collectionDate": { + "type": "string" + } + }, + "required": [ + "collectionDate", + "type" + ], + "title": "Bin" + }, + "Type": { + "type": "string", + "enum": [ + "Recycling", + "Household Waste" + ], + "title": "Type" + } + } +} diff --git a/uk_bin_collection/tests/features/validate_council_outputs.feature b/uk_bin_collection/tests/features/validate_council_outputs.feature index f76df165d1..d6d03abb2e 100644 --- a/uk_bin_collection/tests/features/validate_council_outputs.feature +++ b/uk_bin_collection/tests/features/validate_council_outputs.feature @@ -39,6 +39,7 @@ Feature: Test each council output matches expected results in /outputs | EastSuffolkCouncil | | ErewashBoroughCouncil | | FenlandDistrictCouncil | + | GatesheadCouncil | | GlasgowCityCouncil | | GuildfordCouncil | | HarrogateBoroughCouncil | diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 6f9ec2de89..d8da799936 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -202,6 +202,14 @@ "url": "https://www.fenland.gov.uk/article/13114/", "wiki_name": "Fenland District Council" }, + "GatesheadCouncil": { + "SKIP_GET_URL": "SKIP_GET_URL", + "postcode": "NE16 5LQ", + "house_number": "Bracken Cottage", + "url": "https://www.gateshead.gov.uk/", + "wiki_name": "Gateshead Council", + "wiki_note": "Pass the house name/number in the house number parameter, wrapped in double quotes" + }, "GlasgowCityCouncil": { "url": "https://www.glasgow.gov.uk/forms/refuseandrecyclingcalendar/PrintCalendar.aspx?UPRN=906700034497", "wiki_name": "Glasgow City Council", diff --git a/uk_bin_collection/tests/outputs/GatesheadCouncil.json b/uk_bin_collection/tests/outputs/GatesheadCouncil.json new file mode 100644 index 0000000000..baa9b64585 --- /dev/null +++ b/uk_bin_collection/tests/outputs/GatesheadCouncil.json @@ -0,0 +1,68 @@ +{ + "bins": [ + { + "type": "Recycling", + "collectionDate": "02/01/2023" + }, + { + "type": "Household Waste", + "collectionDate": "09/01/2023" + }, + { + "type": "Recycling", + "collectionDate": "16/01/2023" + }, + { + "type": "Household Waste", + "collectionDate": "23/01/2023" + }, + { + "type": "Recycling", + "collectionDate": "30/01/2023" + }, + { + "type": "Household Waste", + "collectionDate": "06/02/2023" + }, + { + "type": "Recycling", + "collectionDate": "24/10/2023" + }, + { + "type": "Household Waste", + "collectionDate": "31/10/2023" + }, + { + "type": "Recycling", + "collectionDate": "07/11/2023" + }, + { + "type": "Household Waste", + "collectionDate": "14/11/2023" + }, + { + "type": "Recycling", + "collectionDate": "21/11/2023" + }, + { + "type": "Household Waste", + "collectionDate": "28/11/2023" + }, + { + "type": "Recycling", + "collectionDate": "05/12/2023" + }, + { + "type": "Household Waste", + "collectionDate": "12/12/2023" + }, + { + "type": "Recycling", + "collectionDate": "19/12/2023" + }, + { + "type": "Household Waste", + "collectionDate": "27/12/2023" + } + ] +} \ No newline at end of file diff --git a/uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py b/uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py new file mode 100644 index 0000000000..167a0026f1 --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py @@ -0,0 +1,85 @@ +from bs4 import BeautifulSoup +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.wait import WebDriverWait + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import \ + AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + data = {"bins": []} + user_paon = kwargs.get("paon") + user_postcode = kwargs.get("postcode") + check_paon(user_paon) + check_postcode(user_postcode) + + # Set up Selenium to run 'headless' + options = webdriver.ChromeOptions() + options.add_argument("--headless") + options.add_argument("--no-sandbox") + options.add_argument("--disable-gpu") + options.add_argument("--disable-dev-shm-usage") + options.add_experimental_option("excludeSwitches", ["enable-logging"]) + + # Create Selenium webdriver + driver = webdriver.Chrome(options=options) + driver.get("https://www.gateshead.gov.uk/article/3150/Bin-collection-day-checker") + + # Wait for the postcode field to appear then populate it + inputElement_postcode = WebDriverWait(driver, 30).until( + EC.presence_of_element_located( + (By.ID, "BINCOLLECTIONCHECKER_ADDRESSSEARCH_ADDRESSLOOKUPPOSTCODE")) + ) + inputElement_postcode.send_keys(user_postcode) + + # Click search button + findAddress = WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.ID, "BINCOLLECTIONCHECKER_ADDRESSSEARCH_ADDRESSLOOKUPSEARCH")) + ) + findAddress.click() + + # Wait for the 'Select address' dropdown to appear and select option matching the house name/number + WebDriverWait(driver, 10).until(EC.element_to_be_clickable(( + By.XPATH, + "//select[@id='BINCOLLECTIONCHECKER_ADDRESSSEARCH_ADDRESSLOOKUPADDRESS']//option[contains(., '" + user_paon + "')]" + ))).click() + + # Wait for the collections table to appear + WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".bincollections__table"))) + + soup = BeautifulSoup(driver.page_source, features="html.parser") + + # Get collections table + table = soup.find("table", {"class": "bincollections__table"}) + + # Get rows + month_year = "" + for row in table.find_all("tr"): + if row.find("th"): + month_year = row.find("th").get_text(strip=True) + " " + datetime.now().strftime("%Y") + elif month_year != "": + collection = row.find_all("td") + bin_date = datetime.strptime(collection[0].get_text(strip=True) + " " + month_year, "%d %B %Y") + dict_data = { + "type": collection[2].get_text().replace("- DAY CHANGE", "").strip(), + "collectionDate": bin_date.strftime(date_format) + } + data["bins"].append(dict_data) + + data["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y") + ) + + return data From 2d6ee41b87283f6a138fcb2fd6f0478262c0b651 Mon Sep 17 00:00:00 2001 From: Oliver Cullimore Date: Fri, 20 Oct 2023 21:21:13 +0100 Subject: [PATCH 3/4] feat: Add support for East Lindsey District Council --- .../EastLindseyDistrictCouncil.schema | 39 ++++++++ .../features/validate_council_outputs.feature | 1 + uk_bin_collection/tests/input.json | 8 ++ .../outputs/EastLindseyDistrictCouncil.json | 16 ++++ .../councils/EastLindseyDistrictCouncil.py | 89 +++++++++++++++++++ 5 files changed, 153 insertions(+) create mode 100644 uk_bin_collection/tests/council_schemas/EastLindseyDistrictCouncil.schema create mode 100644 uk_bin_collection/tests/outputs/EastLindseyDistrictCouncil.json create mode 100644 uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py diff --git a/uk_bin_collection/tests/council_schemas/EastLindseyDistrictCouncil.schema b/uk_bin_collection/tests/council_schemas/EastLindseyDistrictCouncil.schema new file mode 100644 index 0000000000..202bcb4793 --- /dev/null +++ b/uk_bin_collection/tests/council_schemas/EastLindseyDistrictCouncil.schema @@ -0,0 +1,39 @@ +{ + "$schema": "http://json-schema.org/draft-06/schema#", + "$ref": "#/definitions/Welcome3", + "definitions": { + "Welcome3": { + "type": "object", + "additionalProperties": false, + "properties": { + "bins": { + "type": "array", + "items": { + "$ref": "#/definitions/Bin" + } + } + }, + "required": [ + "bins" + ], + "title": "Welcome3" + }, + "Bin": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { + "type": "string" + }, + "collectionDate": { + "type": "string" + } + }, + "required": [ + "collectionDate", + "type" + ], + "title": "Bin" + } + } +} diff --git a/uk_bin_collection/tests/features/validate_council_outputs.feature b/uk_bin_collection/tests/features/validate_council_outputs.feature index d6d03abb2e..6a8fb786b3 100644 --- a/uk_bin_collection/tests/features/validate_council_outputs.feature +++ b/uk_bin_collection/tests/features/validate_council_outputs.feature @@ -34,6 +34,7 @@ Feature: Test each council output matches expected results in /outputs | EastCambridgeshireCouncil | | EastDevonDC | | EastleighBoroughCouncil | + | EastLindseyDistrictCouncil | | EastNorthamptonshireCouncil | | EastRidingCouncil | | EastSuffolkCouncil | diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index d8da799936..7b9089c794 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -170,6 +170,14 @@ "url": "https://www.eastleigh.gov.uk/waste-bins-and-recycling/collection-dates/your-waste-bin-and-recycling-collections?uprn=", "wiki_name": "Eastleigh Borough Council" }, + "EastLindseyDistrictCouncil": { + "SKIP_GET_URL": "SKIP_GET_URL", + "postcode": "LN4 4SY", + "house_number": "Raf Coningsby", + "url": "https://www.e-lindsey.gov.uk/", + "wiki_name": "East Lindsey District Council", + "wiki_note": "Pass the house name/number in the house number parameter, wrapped in double quotes" + }, "EastNorthamptonshireCouncil": { "SKIP_GET_URL": "SKIP_GET_URL", "uprn": "100031021317", diff --git a/uk_bin_collection/tests/outputs/EastLindseyDistrictCouncil.json b/uk_bin_collection/tests/outputs/EastLindseyDistrictCouncil.json new file mode 100644 index 0000000000..c764368222 --- /dev/null +++ b/uk_bin_collection/tests/outputs/EastLindseyDistrictCouncil.json @@ -0,0 +1,16 @@ +{ + "bins": [ + { + "type": "Paper (Purple-Lidded Bin)", + "collectionDate": "27/10/2023" + }, + { + "type": "Domestic Waste (Black Bin)", + "collectionDate": "03/11/2023" + }, + { + "type": "Recycling (Grey Bin)", + "collectionDate": "10/11/2023" + } + ] +} \ No newline at end of file diff --git a/uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py new file mode 100644 index 0000000000..34fc229d42 --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py @@ -0,0 +1,89 @@ +from bs4 import BeautifulSoup +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.wait import WebDriverWait + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import \ + AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + data = {"bins": []} + user_paon = kwargs.get("paon") + user_postcode = kwargs.get("postcode") + check_paon(user_paon) + check_postcode(user_postcode) + + # Set up Selenium to run 'headless' + options = webdriver.ChromeOptions() + options.add_argument("--headless") + options.add_argument("--no-sandbox") + options.add_argument("--disable-gpu") + options.add_argument("--disable-dev-shm-usage") + options.add_experimental_option("excludeSwitches", ["enable-logging"]) + + # Create Selenium webdriver + driver = webdriver.Chrome(options=options) + driver.get("https://www.e-lindsey.gov.uk/article/6714/Your-Waste-Collection-Days") + + # Wait for the postcode field to appear then populate it + inputElement_postcode = WebDriverWait(driver, 30).until( + EC.presence_of_element_located( + (By.ID, "WASTECOLLECTIONDAYS202324_LOOKUP_ADDRESSLOOKUPPOSTCODE")) + ) + inputElement_postcode.send_keys(user_postcode) + + # Click search button + findAddress = WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.ID, "WASTECOLLECTIONDAYS202324_LOOKUP_ADDRESSLOOKUPSEARCH")) + ) + findAddress.click() + + # Wait for the 'Select address' dropdown to appear and select option matching the house name/number + WebDriverWait(driver, 10).until(EC.element_to_be_clickable(( + By.XPATH, + "//select[@id='WASTECOLLECTIONDAYS202324_LOOKUP_ADDRESSLOOKUPADDRESS']//option[contains(., '" + user_paon + "')]" + ))).click() + + # Wait for the submit button to appear, then click it to get the collection dates + submit = WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.ID, "WASTECOLLECTIONDAYS202324_LOOKUP_FIELD2_NEXT")) + ) + submit.click() + + # Wait for the collections table to appear + WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".waste-results"))) + + soup = BeautifulSoup(driver.page_source, features="html.parser") + + # Get collections + for collection in soup.find_all("div", {"class": "waste-result"}): + ptags = collection.find_all("p") + dict_data = { + "type": collection.find("h3").get_text(strip=True), + "collectionDate": datetime.strptime( + remove_ordinal_indicator_from_date_string( + ptags[1].get_text().replace("The date of your next collection is", "").replace(".", "").strip() + ), + "%A %d %B %Y" + ).strftime(date_format) + } + data["bins"].append(dict_data) + + data["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y") + ) + + return data From 780f6b4b16aba26106cfb2dc7c98fd6df5a0feac Mon Sep 17 00:00:00 2001 From: Oliver Cullimore Date: Fri, 20 Oct 2023 22:30:19 +0100 Subject: [PATCH 4/4] feat: Add support for West Lothian Council --- .../council_schemas/WestLothianCouncil.schema | 39 ++++++++ .../features/validate_council_outputs.feature | 1 + uk_bin_collection/tests/input.json | 8 ++ .../tests/outputs/WestLothianCouncil.json | 20 +++++ .../councils/WestLothianCouncil.py | 88 +++++++++++++++++++ 5 files changed, 156 insertions(+) create mode 100644 uk_bin_collection/tests/council_schemas/WestLothianCouncil.schema create mode 100644 uk_bin_collection/tests/outputs/WestLothianCouncil.json create mode 100644 uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py diff --git a/uk_bin_collection/tests/council_schemas/WestLothianCouncil.schema b/uk_bin_collection/tests/council_schemas/WestLothianCouncil.schema new file mode 100644 index 0000000000..6331ceb8b9 --- /dev/null +++ b/uk_bin_collection/tests/council_schemas/WestLothianCouncil.schema @@ -0,0 +1,39 @@ +{ + "$schema": "http://json-schema.org/draft-06/schema#", + "$ref": "#/definitions/Welcome7", + "definitions": { + "Welcome7": { + "type": "object", + "additionalProperties": false, + "properties": { + "bins": { + "type": "array", + "items": { + "$ref": "#/definitions/Bin" + } + } + }, + "required": [ + "bins" + ], + "title": "Welcome7" + }, + "Bin": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { + "type": "string" + }, + "collectionDate": { + "type": "string" + } + }, + "required": [ + "collectionDate", + "type" + ], + "title": "Bin" + } + } +} diff --git a/uk_bin_collection/tests/features/validate_council_outputs.feature b/uk_bin_collection/tests/features/validate_council_outputs.feature index 6a8fb786b3..3c1eb0963b 100644 --- a/uk_bin_collection/tests/features/validate_council_outputs.feature +++ b/uk_bin_collection/tests/features/validate_council_outputs.feature @@ -98,6 +98,7 @@ Feature: Test each council output matches expected results in /outputs | WaverleyBoroughCouncil | | WealdenDistrictCouncil | | WelhatCouncil | + | WestLothianCouncil | | WiganBoroughCouncil | | WiltshireCouncil | | WindsorAndMaidenheadCouncil | diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 7b9089c794..8b44081a61 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -595,6 +595,14 @@ "url": "https://www.welhat.gov.uk/xfp/form/214", "wiki_name": "Welhat Council" }, + "WestLothianCouncil": { + "SKIP_GET_URL": "SKIP_GET_URL", + "postcode": "EH52 5JE", + "house_number": "1 GOSCHEN PLACE", + "url": "https://www.westlothian.gov.uk/", + "wiki_name": "West Lothian Council", + "wiki_note": "Pass the house name/number in the house number parameter, wrapped in double quotes" + }, "WiganBoroughCouncil": { "SKIP_GET_URL": "SKIP_GET_URL", "postcode": "WN24UQ", diff --git a/uk_bin_collection/tests/outputs/WestLothianCouncil.json b/uk_bin_collection/tests/outputs/WestLothianCouncil.json new file mode 100644 index 0000000000..5417f3f278 --- /dev/null +++ b/uk_bin_collection/tests/outputs/WestLothianCouncil.json @@ -0,0 +1,20 @@ +{ + "bins": [ + { + "type": "Green Bin", + "collectionDate": "20/10/2023" + }, + { + "type": "Grey Bin", + "collectionDate": "27/10/2023" + }, + { + "type": "Brown Bin (Now for Garden and Food Waste)", + "collectionDate": "30/10/2023" + }, + { + "type": "Blue Bin", + "collectionDate": "03/11/2023" + } + ] +} \ No newline at end of file diff --git a/uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py b/uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py new file mode 100644 index 0000000000..2860a15d5f --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py @@ -0,0 +1,88 @@ +from bs4 import BeautifulSoup +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.wait import WebDriverWait + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import \ + AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + data = {"bins": []} + user_paon = kwargs.get("paon") + user_postcode = kwargs.get("postcode") + check_paon(user_paon) + check_postcode(user_postcode) + + # Set up Selenium to run 'headless' + options = webdriver.ChromeOptions() + options.add_argument("--headless") + options.add_argument("--no-sandbox") + options.add_argument("--disable-gpu") + options.add_argument("--disable-dev-shm-usage") + options.add_experimental_option("excludeSwitches", ["enable-logging"]) + + # Create Selenium webdriver + driver = webdriver.Chrome(options=options) + driver.get("https://www.westlothian.gov.uk/article/31528/Bin-Collection-Calendar-Dates") + + # Close feedback banner + feedbackBanner = WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.CSS_SELECTOR, ".feedback__link--no")) + ) + feedbackBanner.click() + + # Wait for the postcode field to appear then populate it + inputElement_postcode = WebDriverWait(driver, 30).until( + EC.presence_of_element_located( + (By.ID, "WLBINCOLLECTION_PAGE1_ADDRESSLOOKUPPOSTCODE")) + ) + inputElement_postcode.send_keys(user_postcode) + + # Click search button + findAddress = WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.ID, "WLBINCOLLECTION_PAGE1_ADDRESSLOOKUPSEARCH")) + ) + findAddress.click() + + # Wait for the 'Select address' dropdown to appear and select option matching the house name/number + WebDriverWait(driver, 10).until(EC.element_to_be_clickable(( + By.XPATH, + "//select[@id='WLBINCOLLECTION_PAGE1_ADDRESSLOOKUPADDRESS']//option[contains(., '" + user_paon + "')]" + ))).click() + + # Wait for the collections table to appear + WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".bin-collections"))) + + soup = BeautifulSoup(driver.page_source, features="html.parser") + + # Get collections + for collection in soup.find_all("div", {"class": "bin-collect"}): + dict_data = { + "type": collection.find("h3").get_text(strip=True), + "collectionDate": datetime.strptime( + remove_ordinal_indicator_from_date_string( + collection.find("span", {"class": "bin-collect__date"}).get_text(strip=True) + ), + "%A, %B %d %Y" + ).strftime(date_format) + } + data["bins"].append(dict_data) + + data["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y") + ) + + return data