diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 2480b50888..58df932495 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -871,6 +871,12 @@ "wiki_name": "High Peak Council", "wiki_note": "Pass the name of the street with the house number parameter, wrapped in double quotes. This parser requires a Selenium webdriver." }, + "HinckleyandBosworthBoroughCouncil": { + "url": "https://www.hinckley-bosworth.gov.uk", + "uprn": "100030533512", + "wiki_name": "Hinckley and Bosworth Borough Council", + "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN." + }, "HounslowCouncil": { "house_number": "17A LAMPTON PARK ROAD, HOUNSLOW", "postcode": "TW3 4HS", @@ -1072,7 +1078,7 @@ "wiki_note": "Pass the UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search)." }, "MertonCouncil": { - "url": "https://myneighbourhood.merton.gov.uk/wasteservices/WasteServices.aspx?ID=25851371", + "url": "https://myneighbourhood.merton.gov.uk/wasteservices/WasteServices.aspx?ID=25936129", "wiki_command_url_override": "https://myneighbourhood.merton.gov.uk/Wasteservices/WasteServices.aspx?ID=XXXXXXXX", "wiki_name": "Merton Council", "wiki_note": "Follow the instructions [here](https://myneighbourhood.merton.gov.uk/Wasteservices/WasteServicesSearch.aspx) until you get the \"Your recycling and rubbish collection days\" page, then copy the URL and replace the URL in the command." @@ -1132,6 +1138,12 @@ "wiki_name": "Mole Valley District Council", "wiki_note": "UPRN can only be parsed with a valid postcode." }, + "MonmouthshireCountyCouncil": { + "url": "https://maps.monmouthshire.gov.uk", + "uprn": "100100266220", + "wiki_name": "Monmouthshire County Council", + "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN." + }, "MorayCouncil": { "uprn": "28841", "url": "https://bindayfinder.moray.gov.uk/", diff --git a/uk_bin_collection/uk_bin_collection/councils/BlabyDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BlabyDistrictCouncil.py index d51b2c19d7..c24d71e527 100644 --- a/uk_bin_collection/uk_bin_collection/councils/BlabyDistrictCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/BlabyDistrictCouncil.py @@ -31,11 +31,14 @@ def parse_data(self, page: str, **kwargs) -> dict: for container in soup.find_all(class_="box-item"): # Get the next collection dates from the

tag containing - dates_tag = ( - container.find("p", string=lambda text: "Next" in text) - .find_next("p") - .find("strong") - ) + try: + dates_tag = ( + container.find("p", string=lambda text: "Next" in text) + .find_next("p") + .find("strong") + ) + except: + continue collection_dates = ( dates_tag.text.strip().split(", and then ") if dates_tag diff --git a/uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py index 28b0c05e38..d71e40274a 100644 --- a/uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py @@ -67,11 +67,19 @@ def parse_data(self, page: str, **kwargs) -> dict: for collection in bin_collections: if collection is not None: bin_type = collection[0].get("BinType") + current_collection_date = collection[0].get("CollectionDate") + if current_collection_date is None: + continue current_collection_date = datetime.strptime( - collection[0].get("CollectionDate"), "%Y-%m-%d" + current_collection_date, "%Y-%m-%d" ) + next_collection_date = collection[0].get( + "NextScheduledCollectionDate" + ) + if next_collection_date is None: + continue next_collection_date = datetime.strptime( - collection[0].get("NextScheduledCollectionDate"), "%Y-%m-%d" + next_collection_date, "%Y-%m-%d" ) # Work out the most recent collection date to display diff --git a/uk_bin_collection/uk_bin_collection/councils/CarmarthenshireCountyCouncil.py b/uk_bin_collection/uk_bin_collection/councils/CarmarthenshireCountyCouncil.py index 22814d82ad..9bedb1ca6b 100644 --- a/uk_bin_collection/uk_bin_collection/councils/CarmarthenshireCountyCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/CarmarthenshireCountyCouncil.py @@ -34,10 +34,10 @@ def parse_data(self, page: str, **kwargs) -> dict: # Find the next collection date date_tag = container.find(class_="font11 text-center") - if date_tag: - collection_date = date_tag.text.strip() - else: + if date_tag.text.strip() == "": continue + else: + collection_date = date_tag.text.strip() dict_data = { "type": bin_type, diff --git a/uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py b/uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py index d499f00ae9..4c0eaaa78d 100644 --- a/uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py @@ -27,6 +27,23 @@ def parse_data(self, page: str, **kwargs) -> dict: "../Images/Bins/ashBin.gif": "Ash bin", } + fieldset = soup.find("fieldset") + ps = fieldset.find_all("p") + for p in ps: + collection = p.text.strip().replace("Your next ", "").split(".")[0] + bin_type = collection.split(" day is")[0] + collection_date = datetime.strptime( + remove_ordinal_indicator_from_date_string(collection).split("day is ")[ + 1 + ], + "%A %d %B %Y", + ) + dict_data = { + "type": bin_type, + "collectionDate": collection_date.strftime(date_format), + } + data["bins"].append(dict_data) + # Find the page body with all the calendars body = soup.find("div", {"id": "Application_ctl00"}) calendars = body.find_all_next("table", {"title": "Calendar"}) diff --git a/uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py b/uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py index c0dd931e4d..99529e891c 100644 --- a/uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py @@ -83,11 +83,16 @@ def parse_data(self, page: str, **kwargs) -> dict: ) # Select address from dropdown and wait - inputElement_ad = Select( - driver.find_element(By.ID, "FINDBINDAYSHIGHPEAK_ADDRESSSELECT_ADDRESS") - ) - - inputElement_ad.select_by_visible_text(user_paon) + WebDriverWait(driver, 10).until( + EC.element_to_be_clickable( + ( + By.XPATH, + "//select[@id='FINDBINDAYSHIGHPEAK_ADDRESSSELECT_ADDRESS']//option[contains(., '" + + user_paon + + "')]", + ) + ) + ).click() WebDriverWait(driver, 10).until( EC.presence_of_element_located( diff --git a/uk_bin_collection/uk_bin_collection/councils/HinckleyandBosworthBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/HinckleyandBosworthBoroughCouncil.py new file mode 100644 index 0000000000..cf754fbf5d --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/HinckleyandBosworthBoroughCouncil.py @@ -0,0 +1,71 @@ +import requests +from bs4 import BeautifulSoup + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + + user_uprn = kwargs.get("uprn") + check_uprn(user_uprn) + bindata = {"bins": []} + + URI = f"https://www.hinckley-bosworth.gov.uk/set-location?id={user_uprn}&redirect=refuse&rememberloc=" + + # Make the GET request + response = requests.get(URI) + + # Parse the HTML + soup = BeautifulSoup(response.content, "html.parser") + + # Find all the bin collection date containers + bin_schedule = [] + collection_divs = soup.find_all( + "div", class_=["first_date_bins", "last_date_bins"] + ) + + for div in collection_divs: + # Extract the date + date = div.find("h3", class_="collectiondate").text.strip().replace(":", "") + + # Extract bin types + bins = [img["alt"] for img in div.find_all("img", class_="collection")] + + # Append to the schedule + bin_schedule.append({"date": date, "bins": bins}) + + current_year = datetime.now().year + current_month = datetime.now().month + + # Print the schedule + for entry in bin_schedule: + bin_types = entry["bins"] + date = datetime.strptime(entry["date"], "%d %B") + + if (current_month > 9) and (date.month < 4): + date = date.replace(year=(current_year + 1)) + else: + date = date.replace(year=current_year) + + for bin_type in bin_types: + + dict_data = { + "type": bin_type, + "collectionDate": date.strftime("%d/%m/%Y"), + } + bindata["bins"].append(dict_data) + + bindata["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y") + ) + + return bindata diff --git a/uk_bin_collection/uk_bin_collection/councils/MonmouthshireCountyCouncil.py b/uk_bin_collection/uk_bin_collection/councils/MonmouthshireCountyCouncil.py new file mode 100644 index 0000000000..2cefb977ce --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/MonmouthshireCountyCouncil.py @@ -0,0 +1,70 @@ +import requests +from bs4 import BeautifulSoup + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + + user_uprn = kwargs.get("uprn") + check_uprn(user_uprn) + bindata = {"bins": []} + + URI = ( + f"https://maps.monmouthshire.gov.uk/?action=SetAddress&UniqueId={user_uprn}" + ) + + # Make the GET request + response = requests.get(URI) + + # Parse the HTML + soup = BeautifulSoup(response.content, "html.parser") + + waste_collections_div = soup.find("div", {"aria-label": "Waste Collections"}) + + # Find all bin collection panels + bin_panels = waste_collections_div.find_all("div", class_="atPanelContent") + + current_year = datetime.now().year + current_month = datetime.now().month + + for panel in bin_panels: + # Extract bin name (e.g., "Household rubbish bag") + bin_name = panel.find("h4").text.strip().replace("\r", "").replace("\n", "") + + # Extract collection date (e.g., "Monday 9th December") + date_tag = panel.find("p") + if date_tag and "Your next collection date is" in date_tag.text: + collection_date = date_tag.find("strong").text.strip() + else: + continue + + collection_date = datetime.strptime( + remove_ordinal_indicator_from_date_string(collection_date), "%A %d %B" + ) + + if (current_month > 9) and (collection_date.month < 4): + collection_date = collection_date.replace(year=(current_year + 1)) + else: + collection_date = collection_date.replace(year=current_year) + + dict_data = { + "type": bin_name, + "collectionDate": collection_date.strftime("%d/%m/%Y"), + } + bindata["bins"].append(dict_data) + + bindata["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y") + ) + + return bindata diff --git a/uk_bin_collection/uk_bin_collection/councils/WarwickDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/WarwickDistrictCouncil.py index 892ab5f4a8..1608d0a048 100644 --- a/uk_bin_collection/uk_bin_collection/councils/WarwickDistrictCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/WarwickDistrictCouncil.py @@ -1,7 +1,11 @@ # This script pulls (in one hit) the data # from Warick District Council Bins Data +from datetime import datetime + from bs4 import BeautifulSoup + +from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass @@ -20,15 +24,30 @@ def parse_data(self, page: str, **kwargs) -> dict: data = {"bins": []} - for element in soup.find_all("strong"): - bin_type = element.next_element - bin_type = bin_type.lstrip() - collectionDateElement = element.next_sibling.next_element.next_element - collectionDate = collectionDateElement.getText() - dict_data = { - "type": bin_type, - "collectionDate": collectionDate, - } - data["bins"].append(dict_data) + # Find all bin panels + bin_panels = soup.find_all("div", class_="col-sm-4 col-lg-3") + + # Iterate through each panel to extract information + for panel in bin_panels: + bin_type = panel.find("img")["alt"].strip() + + waste_dates = panel.find( + "div", class_="col-xs-12 text-center waste-dates margin-bottom-15" + ) + + for p in waste_dates.find_all("p")[1:]: + date = p.text.strip() + if " " in date: + date = date.split(" ")[1] + + dict_data = { + "type": bin_type, + "collectionDate": date, + } + data["bins"].append(dict_data) + + data["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), date_format) + ) return data diff --git a/wiki/Councils.md b/wiki/Councils.md index a02a0ed77f..3403f61634 100644 --- a/wiki/Councils.md +++ b/wiki/Councils.md @@ -120,6 +120,7 @@ This document is still a work in progress, don't worry if your council isn't lis - [Hertsmere Borough Council](#hertsmere-borough-council) - [Highland Council](#highland-council) - [High Peak Council](#high-peak-council) +- [Hinckley and Bosworth Borough Council](#hinckley-and-bosworth-borough-council) - [Hounslow Council](#hounslow-council) - [Hull City Council](#hull-city-council) - [Huntingdon District Council](#huntingdon-district-council) @@ -155,6 +156,7 @@ This document is still a work in progress, don't worry if your council isn't lis - [Mid Sussex District Council](#mid-sussex-district-council) - [Milton Keynes City Council](#milton-keynes-city-council) - [Mole Valley District Council](#mole-valley-district-council) +- [Monmouthshire County Council](#monmouthshire-county-council) - [Moray Council](#moray-council) - [Neath Port Talbot Council](#neath-port-talbot-council) - [New Forest Council](#new-forest-council) @@ -1622,6 +1624,17 @@ Note: Pass the name of the street with the house number parameter, wrapped in do --- +### Hinckley and Bosworth Borough Council +```commandline +python collect_data.py HinckleyandBosworthBoroughCouncil https://www.hinckley-bosworth.gov.uk -u XXXXXXXX +``` +Additional parameters: +- `-u` - UPRN + +Note: You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN. + +--- + ### Hounslow Council ```commandline python collect_data.py HounslowCouncil https://www.hounslow.gov.uk/info/20272/recycling_and_waste_collection_day_finder -s -u XXXXXXXX -p "XXXX XXX" -n XX -w http://HOST:PORT/ @@ -2043,6 +2056,17 @@ Note: UPRN can only be parsed with a valid postcode. --- +### Monmouthshire County Council +```commandline +python collect_data.py MonmouthshireCountyCouncil https://maps.monmouthshire.gov.uk -u XXXXXXXX +``` +Additional parameters: +- `-u` - UPRN + +Note: You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN. + +--- + ### Moray Council ```commandline python collect_data.py MorayCouncil https://bindayfinder.moray.gov.uk/ -u XXXXXXXX