diff --git a/uk_bin_collection/tests/council_schemas/EastLindseyDistrictCouncil.schema b/uk_bin_collection/tests/council_schemas/EastLindseyDistrictCouncil.schema new file mode 100644 index 0000000000..202bcb4793 --- /dev/null +++ b/uk_bin_collection/tests/council_schemas/EastLindseyDistrictCouncil.schema @@ -0,0 +1,39 @@ +{ + "$schema": "http://json-schema.org/draft-06/schema#", + "$ref": "#/definitions/Welcome3", + "definitions": { + "Welcome3": { + "type": "object", + "additionalProperties": false, + "properties": { + "bins": { + "type": "array", + "items": { + "$ref": "#/definitions/Bin" + } + } + }, + "required": [ + "bins" + ], + "title": "Welcome3" + }, + "Bin": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { + "type": "string" + }, + "collectionDate": { + "type": "string" + } + }, + "required": [ + "collectionDate", + "type" + ], + "title": "Bin" + } + } +} diff --git a/uk_bin_collection/tests/council_schemas/GatesheadCouncil.schema b/uk_bin_collection/tests/council_schemas/GatesheadCouncil.schema new file mode 100644 index 0000000000..f6a48c63d5 --- /dev/null +++ b/uk_bin_collection/tests/council_schemas/GatesheadCouncil.schema @@ -0,0 +1,47 @@ +{ + "$schema": "http://json-schema.org/draft-06/schema#", + "$ref": "#/definitions/Welcome10", + "definitions": { + "Welcome10": { + "type": "object", + "additionalProperties": false, + "properties": { + "bins": { + "type": "array", + "items": { + "$ref": "#/definitions/Bin" + } + } + }, + "required": [ + "bins" + ], + "title": "Welcome10" + }, + "Bin": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { + "$ref": "#/definitions/Type" + }, + "collectionDate": { + "type": "string" + } + }, + "required": [ + "collectionDate", + "type" + ], + "title": "Bin" + }, + "Type": { + "type": "string", + "enum": [ + "Recycling", + "Household Waste" + ], + "title": "Type" + } + } +} diff --git a/uk_bin_collection/tests/council_schemas/StaffordshireMoorlandsDistrictCouncil.schema b/uk_bin_collection/tests/council_schemas/StaffordshireMoorlandsDistrictCouncil.schema new file mode 100644 index 0000000000..784e1df0ad --- /dev/null +++ b/uk_bin_collection/tests/council_schemas/StaffordshireMoorlandsDistrictCouncil.schema @@ -0,0 +1,48 @@ +{ + "$schema": "http://json-schema.org/draft-06/schema#", + "$ref": "#/definitions/Welcome4", + "definitions": { + "Welcome4": { + "type": "object", + "additionalProperties": false, + "properties": { + "bins": { + "type": "array", + "items": { + "$ref": "#/definitions/Bin" + } + } + }, + "required": [ + "bins" + ], + "title": "Welcome4" + }, + "Bin": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { + "$ref": "#/definitions/Type" + }, + "collectionDate": { + "type": "string" + } + }, + "required": [ + "collectionDate", + "type" + ], + "title": "Bin" + }, + "Type": { + "type": "string", + "enum": [ + "Recycling with food and garden", + "Rubbish", + "Recycling" + ], + "title": "Type" + } + } +} diff --git a/uk_bin_collection/tests/council_schemas/WestLothianCouncil.schema b/uk_bin_collection/tests/council_schemas/WestLothianCouncil.schema new file mode 100644 index 0000000000..6331ceb8b9 --- /dev/null +++ b/uk_bin_collection/tests/council_schemas/WestLothianCouncil.schema @@ -0,0 +1,39 @@ +{ + "$schema": "http://json-schema.org/draft-06/schema#", + "$ref": "#/definitions/Welcome7", + "definitions": { + "Welcome7": { + "type": "object", + "additionalProperties": false, + "properties": { + "bins": { + "type": "array", + "items": { + "$ref": "#/definitions/Bin" + } + } + }, + "required": [ + "bins" + ], + "title": "Welcome7" + }, + "Bin": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { + "type": "string" + }, + "collectionDate": { + "type": "string" + } + }, + "required": [ + "collectionDate", + "type" + ], + "title": "Bin" + } + } +} diff --git a/uk_bin_collection/tests/features/validate_council_outputs.feature b/uk_bin_collection/tests/features/validate_council_outputs.feature index 78fbdef491..3c1eb0963b 100644 --- a/uk_bin_collection/tests/features/validate_council_outputs.feature +++ b/uk_bin_collection/tests/features/validate_council_outputs.feature @@ -34,11 +34,13 @@ Feature: Test each council output matches expected results in /outputs | EastCambridgeshireCouncil | | EastDevonDC | | EastleighBoroughCouncil | + | EastLindseyDistrictCouncil | | EastNorthamptonshireCouncil | | EastRidingCouncil | | EastSuffolkCouncil | | ErewashBoroughCouncil | | FenlandDistrictCouncil | + | GatesheadCouncil | | GlasgowCityCouncil | | GuildfordCouncil | | HarrogateBoroughCouncil | @@ -81,6 +83,7 @@ Feature: Test each council output matches expected results in /outputs | SouthNorfolkCouncil | | SouthOxfordshireCouncil | | SouthTynesideCouncil | + | StaffordshireMoorlandsDistrictCouncil | | StHelensBC | | StockportBoroughCouncil | | StratfordUponAvonCouncil | @@ -95,6 +98,7 @@ Feature: Test each council output matches expected results in /outputs | WaverleyBoroughCouncil | | WealdenDistrictCouncil | | WelhatCouncil | + | WestLothianCouncil | | WiganBoroughCouncil | | WiltshireCouncil | | WindsorAndMaidenheadCouncil | diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 075ac10a5d..8b44081a61 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -170,6 +170,14 @@ "url": "https://www.eastleigh.gov.uk/waste-bins-and-recycling/collection-dates/your-waste-bin-and-recycling-collections?uprn=", "wiki_name": "Eastleigh Borough Council" }, + "EastLindseyDistrictCouncil": { + "SKIP_GET_URL": "SKIP_GET_URL", + "postcode": "LN4 4SY", + "house_number": "Raf Coningsby", + "url": "https://www.e-lindsey.gov.uk/", + "wiki_name": "East Lindsey District Council", + "wiki_note": "Pass the house name/number in the house number parameter, wrapped in double quotes" + }, "EastNorthamptonshireCouncil": { "SKIP_GET_URL": "SKIP_GET_URL", "uprn": "100031021317", @@ -202,6 +210,14 @@ "url": "https://www.fenland.gov.uk/article/13114/", "wiki_name": "Fenland District Council" }, + "GatesheadCouncil": { + "SKIP_GET_URL": "SKIP_GET_URL", + "postcode": "NE16 5LQ", + "house_number": "Bracken Cottage", + "url": "https://www.gateshead.gov.uk/", + "wiki_name": "Gateshead Council", + "wiki_note": "Pass the house name/number in the house number parameter, wrapped in double quotes" + }, "GlasgowCityCouncil": { "url": "https://www.glasgow.gov.uk/forms/refuseandrecyclingcalendar/PrintCalendar.aspx?UPRN=906700034497", "wiki_name": "Glasgow City Council", @@ -480,6 +496,14 @@ "url": "https://www.southtyneside.gov.uk/article/33352/Bin-collection-dates", "wiki_name": "South Tyneside Council" }, + "StaffordshireMoorlandsDistrictCouncil": { + "SKIP_GET_URL": "SKIP_GET_URL", + "postcode": "ST8 6HN", + "uprn": "100031863037", + "url": "https://www.staffsmoorlands.gov.uk/", + "wiki_name": "Staffordshire Moorlands District Council", + "wiki_note": "To get the UPRN, you can use [FindMyAddress](https://www.findmyaddress.co.uk/search)" + }, "StHelensBC": { "SKIP_GET_URL": "SKIP_GET_URL", "uprn": "39081672", @@ -571,6 +595,14 @@ "url": "https://www.welhat.gov.uk/xfp/form/214", "wiki_name": "Welhat Council" }, + "WestLothianCouncil": { + "SKIP_GET_URL": "SKIP_GET_URL", + "postcode": "EH52 5JE", + "house_number": "1 GOSCHEN PLACE", + "url": "https://www.westlothian.gov.uk/", + "wiki_name": "West Lothian Council", + "wiki_note": "Pass the house name/number in the house number parameter, wrapped in double quotes" + }, "WiganBoroughCouncil": { "SKIP_GET_URL": "SKIP_GET_URL", "postcode": "WN24UQ", diff --git a/uk_bin_collection/tests/outputs/EastLindseyDistrictCouncil.json b/uk_bin_collection/tests/outputs/EastLindseyDistrictCouncil.json new file mode 100644 index 0000000000..c764368222 --- /dev/null +++ b/uk_bin_collection/tests/outputs/EastLindseyDistrictCouncil.json @@ -0,0 +1,16 @@ +{ + "bins": [ + { + "type": "Paper (Purple-Lidded Bin)", + "collectionDate": "27/10/2023" + }, + { + "type": "Domestic Waste (Black Bin)", + "collectionDate": "03/11/2023" + }, + { + "type": "Recycling (Grey Bin)", + "collectionDate": "10/11/2023" + } + ] +} \ No newline at end of file diff --git a/uk_bin_collection/tests/outputs/GatesheadCouncil.json b/uk_bin_collection/tests/outputs/GatesheadCouncil.json new file mode 100644 index 0000000000..baa9b64585 --- /dev/null +++ b/uk_bin_collection/tests/outputs/GatesheadCouncil.json @@ -0,0 +1,68 @@ +{ + "bins": [ + { + "type": "Recycling", + "collectionDate": "02/01/2023" + }, + { + "type": "Household Waste", + "collectionDate": "09/01/2023" + }, + { + "type": "Recycling", + "collectionDate": "16/01/2023" + }, + { + "type": "Household Waste", + "collectionDate": "23/01/2023" + }, + { + "type": "Recycling", + "collectionDate": "30/01/2023" + }, + { + "type": "Household Waste", + "collectionDate": "06/02/2023" + }, + { + "type": "Recycling", + "collectionDate": "24/10/2023" + }, + { + "type": "Household Waste", + "collectionDate": "31/10/2023" + }, + { + "type": "Recycling", + "collectionDate": "07/11/2023" + }, + { + "type": "Household Waste", + "collectionDate": "14/11/2023" + }, + { + "type": "Recycling", + "collectionDate": "21/11/2023" + }, + { + "type": "Household Waste", + "collectionDate": "28/11/2023" + }, + { + "type": "Recycling", + "collectionDate": "05/12/2023" + }, + { + "type": "Household Waste", + "collectionDate": "12/12/2023" + }, + { + "type": "Recycling", + "collectionDate": "19/12/2023" + }, + { + "type": "Household Waste", + "collectionDate": "27/12/2023" + } + ] +} \ No newline at end of file diff --git a/uk_bin_collection/tests/outputs/StaffordshireMoorlandsDistrictCouncil.json b/uk_bin_collection/tests/outputs/StaffordshireMoorlandsDistrictCouncil.json new file mode 100644 index 0000000000..11b0070f00 --- /dev/null +++ b/uk_bin_collection/tests/outputs/StaffordshireMoorlandsDistrictCouncil.json @@ -0,0 +1,64 @@ +{ + "bins": [ + { + "type": "Recycling with food and garden", + "collectionDate": "24/10/2023" + }, + { + "type": "Rubbish", + "collectionDate": "31/10/2023" + }, + { + "type": "Recycling with food and garden", + "collectionDate": "07/11/2023" + }, + { + "type": "Rubbish", + "collectionDate": "14/11/2023" + }, + { + "type": "Recycling with food and garden", + "collectionDate": "21/11/2023" + }, + { + "type": "Rubbish", + "collectionDate": "28/11/2023" + }, + { + "type": "Recycling with food and garden", + "collectionDate": "05/12/2023" + }, + { + "type": "Rubbish", + "collectionDate": "12/12/2023" + }, + { + "type": "Recycling with food and garden", + "collectionDate": "19/12/2023" + }, + { + "type": "Rubbish", + "collectionDate": "28/12/2023" + }, + { + "type": "Recycling", + "collectionDate": "02/01/2024" + }, + { + "type": "Rubbish", + "collectionDate": "09/01/2024" + }, + { + "type": "Recycling with food and garden", + "collectionDate": "16/01/2024" + }, + { + "type": "Rubbish", + "collectionDate": "23/01/2024" + }, + { + "type": "Recycling with food and garden", + "collectionDate": "30/01/2024" + } + ] +} \ No newline at end of file diff --git a/uk_bin_collection/tests/outputs/WestLothianCouncil.json b/uk_bin_collection/tests/outputs/WestLothianCouncil.json new file mode 100644 index 0000000000..5417f3f278 --- /dev/null +++ b/uk_bin_collection/tests/outputs/WestLothianCouncil.json @@ -0,0 +1,20 @@ +{ + "bins": [ + { + "type": "Green Bin", + "collectionDate": "20/10/2023" + }, + { + "type": "Grey Bin", + "collectionDate": "27/10/2023" + }, + { + "type": "Brown Bin (Now for Garden and Food Waste)", + "collectionDate": "30/10/2023" + }, + { + "type": "Blue Bin", + "collectionDate": "03/11/2023" + } + ] +} \ No newline at end of file diff --git a/uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py new file mode 100644 index 0000000000..34fc229d42 --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/EastLindseyDistrictCouncil.py @@ -0,0 +1,89 @@ +from bs4 import BeautifulSoup +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.wait import WebDriverWait + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import \ + AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + data = {"bins": []} + user_paon = kwargs.get("paon") + user_postcode = kwargs.get("postcode") + check_paon(user_paon) + check_postcode(user_postcode) + + # Set up Selenium to run 'headless' + options = webdriver.ChromeOptions() + options.add_argument("--headless") + options.add_argument("--no-sandbox") + options.add_argument("--disable-gpu") + options.add_argument("--disable-dev-shm-usage") + options.add_experimental_option("excludeSwitches", ["enable-logging"]) + + # Create Selenium webdriver + driver = webdriver.Chrome(options=options) + driver.get("https://www.e-lindsey.gov.uk/article/6714/Your-Waste-Collection-Days") + + # Wait for the postcode field to appear then populate it + inputElement_postcode = WebDriverWait(driver, 30).until( + EC.presence_of_element_located( + (By.ID, "WASTECOLLECTIONDAYS202324_LOOKUP_ADDRESSLOOKUPPOSTCODE")) + ) + inputElement_postcode.send_keys(user_postcode) + + # Click search button + findAddress = WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.ID, "WASTECOLLECTIONDAYS202324_LOOKUP_ADDRESSLOOKUPSEARCH")) + ) + findAddress.click() + + # Wait for the 'Select address' dropdown to appear and select option matching the house name/number + WebDriverWait(driver, 10).until(EC.element_to_be_clickable(( + By.XPATH, + "//select[@id='WASTECOLLECTIONDAYS202324_LOOKUP_ADDRESSLOOKUPADDRESS']//option[contains(., '" + user_paon + "')]" + ))).click() + + # Wait for the submit button to appear, then click it to get the collection dates + submit = WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.ID, "WASTECOLLECTIONDAYS202324_LOOKUP_FIELD2_NEXT")) + ) + submit.click() + + # Wait for the collections table to appear + WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".waste-results"))) + + soup = BeautifulSoup(driver.page_source, features="html.parser") + + # Get collections + for collection in soup.find_all("div", {"class": "waste-result"}): + ptags = collection.find_all("p") + dict_data = { + "type": collection.find("h3").get_text(strip=True), + "collectionDate": datetime.strptime( + remove_ordinal_indicator_from_date_string( + ptags[1].get_text().replace("The date of your next collection is", "").replace(".", "").strip() + ), + "%A %d %B %Y" + ).strftime(date_format) + } + data["bins"].append(dict_data) + + data["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y") + ) + + return data diff --git a/uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py b/uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py new file mode 100644 index 0000000000..167a0026f1 --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py @@ -0,0 +1,85 @@ +from bs4 import BeautifulSoup +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.wait import WebDriverWait + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import \ + AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + data = {"bins": []} + user_paon = kwargs.get("paon") + user_postcode = kwargs.get("postcode") + check_paon(user_paon) + check_postcode(user_postcode) + + # Set up Selenium to run 'headless' + options = webdriver.ChromeOptions() + options.add_argument("--headless") + options.add_argument("--no-sandbox") + options.add_argument("--disable-gpu") + options.add_argument("--disable-dev-shm-usage") + options.add_experimental_option("excludeSwitches", ["enable-logging"]) + + # Create Selenium webdriver + driver = webdriver.Chrome(options=options) + driver.get("https://www.gateshead.gov.uk/article/3150/Bin-collection-day-checker") + + # Wait for the postcode field to appear then populate it + inputElement_postcode = WebDriverWait(driver, 30).until( + EC.presence_of_element_located( + (By.ID, "BINCOLLECTIONCHECKER_ADDRESSSEARCH_ADDRESSLOOKUPPOSTCODE")) + ) + inputElement_postcode.send_keys(user_postcode) + + # Click search button + findAddress = WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.ID, "BINCOLLECTIONCHECKER_ADDRESSSEARCH_ADDRESSLOOKUPSEARCH")) + ) + findAddress.click() + + # Wait for the 'Select address' dropdown to appear and select option matching the house name/number + WebDriverWait(driver, 10).until(EC.element_to_be_clickable(( + By.XPATH, + "//select[@id='BINCOLLECTIONCHECKER_ADDRESSSEARCH_ADDRESSLOOKUPADDRESS']//option[contains(., '" + user_paon + "')]" + ))).click() + + # Wait for the collections table to appear + WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".bincollections__table"))) + + soup = BeautifulSoup(driver.page_source, features="html.parser") + + # Get collections table + table = soup.find("table", {"class": "bincollections__table"}) + + # Get rows + month_year = "" + for row in table.find_all("tr"): + if row.find("th"): + month_year = row.find("th").get_text(strip=True) + " " + datetime.now().strftime("%Y") + elif month_year != "": + collection = row.find_all("td") + bin_date = datetime.strptime(collection[0].get_text(strip=True) + " " + month_year, "%d %B %Y") + dict_data = { + "type": collection[2].get_text().replace("- DAY CHANGE", "").strip(), + "collectionDate": bin_date.strftime(date_format) + } + data["bins"].append(dict_data) + + data["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y") + ) + + return data diff --git a/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py new file mode 100644 index 0000000000..b507d70046 --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py @@ -0,0 +1,98 @@ +from bs4 import BeautifulSoup +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import Select +from selenium.webdriver.support.wait import WebDriverWait + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import \ + AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + data = {"bins": []} + user_uprn = kwargs.get("uprn") + user_postcode = kwargs.get("postcode") + check_uprn(user_uprn) + check_postcode(user_postcode) + + # Set up Selenium to run 'headless' + options = webdriver.ChromeOptions() + options.add_argument("--headless") + options.add_argument("--no-sandbox") + options.add_argument("--disable-gpu") + options.add_argument("--disable-dev-shm-usage") + options.add_experimental_option("excludeSwitches", ["enable-logging"]) + + # Create Selenium webdriver + driver = webdriver.Chrome(options=options) + driver.get("https://www.staffsmoorlands.gov.uk/findyourbinday") + + # Close cookies banner + cookieAccept = WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.CSS_SELECTOR, ".cookiemessage__link--close")) + ) + cookieAccept.click() + + # Wait for the postcode field to appear then populate it + inputElement_postcode = WebDriverWait(driver, 30).until( + EC.presence_of_element_located( + (By.ID, "FINDBINDAYSSTAFFORDSHIREMOORLANDS_POSTCODESELECT_POSTCODE")) + ) + inputElement_postcode.send_keys(user_postcode) + + # Click search button + findAddress = WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.ID, "FINDBINDAYSSTAFFORDSHIREMOORLANDS_POSTCODESELECT_PAGE1NEXT_NEXT")) + ) + findAddress.click() + + # Wait for the 'Select address' dropdown to appear and select option matching UPRN + dropdown = WebDriverWait(driver, 30).until( + EC.presence_of_element_located( + (By.ID, "FINDBINDAYSSTAFFORDSHIREMOORLANDS_ADDRESSSELECT_ADDRESS")) + ) + # Create a 'Select' for it, then select the matching URPN option + dropdownSelect = Select(dropdown) + dropdownSelect.select_by_value(user_uprn) + + # Wait for the submit button to appear, then click it to get the collection dates + submit = WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.ID, "FINDBINDAYSSTAFFORDSHIREMOORLANDS_ADDRESSSELECT_ADDRESSSELECTNEXTBTN_NEXT")) + ) + submit.click() + + soup = BeautifulSoup(driver.page_source, features="html.parser") + + # Get months + for month_wrapper in soup.find_all("div", {"class": "bin-collection__month"}): + if month_wrapper: + month_year = month_wrapper.find("h3", {"class": "bin-collection__title"}).get_text(strip=True) + # Get collections + for collection in month_wrapper.find_all("li", {"class": "bin-collection__item"}): + day = collection.find("span", {"class": "bin-collection__number"}).get_text(strip=True) + if month_year and day: + bin_date = datetime.strptime(day + " " + month_year, "%d %B %Y") + dict_data = { + "type": collection.find("span", {"class": "bin-collection__type"}).get_text(strip=True), + "collectionDate": bin_date.strftime(date_format) + } + data["bins"].append(dict_data) + + data["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y") + ) + + return data diff --git a/uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py b/uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py new file mode 100644 index 0000000000..2860a15d5f --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/WestLothianCouncil.py @@ -0,0 +1,88 @@ +from bs4 import BeautifulSoup +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.wait import WebDriverWait + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import \ + AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + data = {"bins": []} + user_paon = kwargs.get("paon") + user_postcode = kwargs.get("postcode") + check_paon(user_paon) + check_postcode(user_postcode) + + # Set up Selenium to run 'headless' + options = webdriver.ChromeOptions() + options.add_argument("--headless") + options.add_argument("--no-sandbox") + options.add_argument("--disable-gpu") + options.add_argument("--disable-dev-shm-usage") + options.add_experimental_option("excludeSwitches", ["enable-logging"]) + + # Create Selenium webdriver + driver = webdriver.Chrome(options=options) + driver.get("https://www.westlothian.gov.uk/article/31528/Bin-Collection-Calendar-Dates") + + # Close feedback banner + feedbackBanner = WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.CSS_SELECTOR, ".feedback__link--no")) + ) + feedbackBanner.click() + + # Wait for the postcode field to appear then populate it + inputElement_postcode = WebDriverWait(driver, 30).until( + EC.presence_of_element_located( + (By.ID, "WLBINCOLLECTION_PAGE1_ADDRESSLOOKUPPOSTCODE")) + ) + inputElement_postcode.send_keys(user_postcode) + + # Click search button + findAddress = WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.ID, "WLBINCOLLECTION_PAGE1_ADDRESSLOOKUPSEARCH")) + ) + findAddress.click() + + # Wait for the 'Select address' dropdown to appear and select option matching the house name/number + WebDriverWait(driver, 10).until(EC.element_to_be_clickable(( + By.XPATH, + "//select[@id='WLBINCOLLECTION_PAGE1_ADDRESSLOOKUPADDRESS']//option[contains(., '" + user_paon + "')]" + ))).click() + + # Wait for the collections table to appear + WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".bin-collections"))) + + soup = BeautifulSoup(driver.page_source, features="html.parser") + + # Get collections + for collection in soup.find_all("div", {"class": "bin-collect"}): + dict_data = { + "type": collection.find("h3").get_text(strip=True), + "collectionDate": datetime.strptime( + remove_ordinal_indicator_from_date_string( + collection.find("span", {"class": "bin-collect__date"}).get_text(strip=True) + ), + "%A, %B %d %Y" + ).strftime(date_format) + } + data["bins"].append(dict_data) + + data["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y") + ) + + return data