Merge pull request #1056 from m26dvd/master

feat: Council Pack 20
robbrad · Dec 4, 2024 · 50e5b4b · 50e5b4b
2 parents 12729e4 + 706547e
commit 50e5b4b
Show file tree

Hide file tree

Showing 10 changed files with 255 additions and 26 deletions.
diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json
@@ -871,6 +871,12 @@
         "wiki_name": "High Peak Council",
         "wiki_note": "Pass the name of the street with the house number parameter, wrapped in double quotes. This parser requires a Selenium webdriver."
     },
+    "HinckleyandBosworthBoroughCouncil": {
+        "url": "https://www.hinckley-bosworth.gov.uk",
+        "uprn": "100030533512",
+        "wiki_name": "Hinckley and Bosworth Borough Council",
+        "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
+    },
     "HounslowCouncil": {
         "house_number": "17A LAMPTON PARK ROAD, HOUNSLOW",
         "postcode": "TW3 4HS",
@@ -1072,7 +1078,7 @@
         "wiki_note": "Pass the UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search)."
     },
     "MertonCouncil": {
-        "url": "https://myneighbourhood.merton.gov.uk/wasteservices/WasteServices.aspx?ID=25851371",
+        "url": "https://myneighbourhood.merton.gov.uk/wasteservices/WasteServices.aspx?ID=25936129",
         "wiki_command_url_override": "https://myneighbourhood.merton.gov.uk/Wasteservices/WasteServices.aspx?ID=XXXXXXXX",
         "wiki_name": "Merton Council",
         "wiki_note": "Follow the instructions [here](https://myneighbourhood.merton.gov.uk/Wasteservices/WasteServicesSearch.aspx) until you get the \"Your recycling and rubbish collection days\" page, then copy the URL and replace the URL in the command."
@@ -1132,6 +1138,12 @@
         "wiki_name": "Mole Valley District Council",
         "wiki_note": "UPRN can only be parsed with a valid postcode."
     },
+    "MonmouthshireCountyCouncil": {
+        "url": "https://maps.monmouthshire.gov.uk",
+        "uprn": "100100266220",
+        "wiki_name": "Monmouthshire County Council",
+        "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
+    },
     "MorayCouncil": {
         "uprn": "28841",
         "url": "https://bindayfinder.moray.gov.uk/",

diff --git a/uk_bin_collection/uk_bin_collection/councils/BlabyDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BlabyDistrictCouncil.py
@@ -31,11 +31,14 @@ def parse_data(self, page: str, **kwargs) -> dict:
         for container in soup.find_all(class_="box-item"):
 
             # Get the next collection dates from the <p> tag containing <strong>
-            dates_tag = (
-                container.find("p", string=lambda text: "Next" in text)
-                .find_next("p")
-                .find("strong")
-            )
+            try:
+                dates_tag = (
+                    container.find("p", string=lambda text: "Next" in text)
+                    .find_next("p")
+                    .find("strong")
+                )
+            except:
+                continue
             collection_dates = (
                 dates_tag.text.strip().split(", and then ")
                 if dates_tag

diff --git a/uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py
@@ -67,11 +67,19 @@ def parse_data(self, page: str, **kwargs) -> dict:
                 for collection in bin_collections:
                     if collection is not None:
                         bin_type = collection[0].get("BinType")
+                        current_collection_date = collection[0].get("CollectionDate")
+                        if current_collection_date is None:
+                            continue
                         current_collection_date = datetime.strptime(
-                            collection[0].get("CollectionDate"), "%Y-%m-%d"
+                            current_collection_date, "%Y-%m-%d"
                         )
+                        next_collection_date = collection[0].get(
+                            "NextScheduledCollectionDate"
+                        )
+                        if next_collection_date is None:
+                            continue
                         next_collection_date = datetime.strptime(
-                            collection[0].get("NextScheduledCollectionDate"), "%Y-%m-%d"
+                            next_collection_date, "%Y-%m-%d"
                         )
 
                         # Work out the most recent collection date to display

diff --git a/uk_bin_collection/uk_bin_collection/councils/CarmarthenshireCountyCouncil.py b/uk_bin_collection/uk_bin_collection/councils/CarmarthenshireCountyCouncil.py
@@ -34,10 +34,10 @@ def parse_data(self, page: str, **kwargs) -> dict:
 
             # Find the next collection date
             date_tag = container.find(class_="font11 text-center")
-            if date_tag:
-                collection_date = date_tag.text.strip()
-            else:
+            if date_tag.text.strip() == "":
                 continue
+            else:
+                collection_date = date_tag.text.strip()
 
             dict_data = {
                 "type": bin_type,

diff --git a/uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py b/uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py
@@ -27,6 +27,23 @@ def parse_data(self, page: str, **kwargs) -> dict:
             "../Images/Bins/ashBin.gif": "Ash bin",
         }
 
+        fieldset = soup.find("fieldset")
+        ps = fieldset.find_all("p")
+        for p in ps:
+            collection = p.text.strip().replace("Your next ", "").split(".")[0]
+            bin_type = collection.split(" day is")[0]
+            collection_date = datetime.strptime(
+                remove_ordinal_indicator_from_date_string(collection).split("day is ")[
+                    1
+                ],
+                "%A %d %B %Y",
+            )
+            dict_data = {
+                "type": bin_type,
+                "collectionDate": collection_date.strftime(date_format),
+            }
+            data["bins"].append(dict_data)
+
         # Find the page body with all the calendars
         body = soup.find("div", {"id": "Application_ctl00"})
         calendars = body.find_all_next("table", {"title": "Calendar"})

diff --git a/uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py b/uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py
@@ -83,11 +83,16 @@ def parse_data(self, page: str, **kwargs) -> dict:
             )
 
             # Select address from dropdown and wait
-            inputElement_ad = Select(
-                driver.find_element(By.ID, "FINDBINDAYSHIGHPEAK_ADDRESSSELECT_ADDRESS")
-            )
-
-            inputElement_ad.select_by_visible_text(user_paon)
+            WebDriverWait(driver, 10).until(
+                EC.element_to_be_clickable(
+                    (
+                        By.XPATH,
+                        "//select[@id='FINDBINDAYSHIGHPEAK_ADDRESSSELECT_ADDRESS']//option[contains(., '"
+                        + user_paon
+                        + "')]",
+                    )
+                )
+            ).click()
 
             WebDriverWait(driver, 10).until(
                 EC.presence_of_element_located(

diff --git a/uk_bin_collection/uk_bin_collection/councils/HinckleyandBosworthBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/HinckleyandBosworthBoroughCouncil.py
@@ -0,0 +1,71 @@
+import requests
+from bs4 import BeautifulSoup
+
+from uk_bin_collection.uk_bin_collection.common import *
+from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
+
+
+# import the wonderful Beautiful Soup and the URL grabber
+class CouncilClass(AbstractGetBinDataClass):
+    """
+    Concrete classes have to implement all abstract operations of the
+    base class. They can also override some operations with a default
+    implementation.
+    """
+
+    def parse_data(self, page: str, **kwargs) -> dict:
+
+        user_uprn = kwargs.get("uprn")
+        check_uprn(user_uprn)
+        bindata = {"bins": []}
+
+        URI = f"https://www.hinckley-bosworth.gov.uk/set-location?id={user_uprn}&redirect=refuse&rememberloc="
+
+        # Make the GET request
+        response = requests.get(URI)
+
+        # Parse the HTML
+        soup = BeautifulSoup(response.content, "html.parser")
+
+        # Find all the bin collection date containers
+        bin_schedule = []
+        collection_divs = soup.find_all(
+            "div", class_=["first_date_bins", "last_date_bins"]
+        )
+
+        for div in collection_divs:
+            # Extract the date
+            date = div.find("h3", class_="collectiondate").text.strip().replace(":", "")
+
+            # Extract bin types
+            bins = [img["alt"] for img in div.find_all("img", class_="collection")]
+
+            # Append to the schedule
+            bin_schedule.append({"date": date, "bins": bins})
+
+        current_year = datetime.now().year
+        current_month = datetime.now().month
+
+        # Print the schedule
+        for entry in bin_schedule:
+            bin_types = entry["bins"]
+            date = datetime.strptime(entry["date"], "%d %B")
+
+            if (current_month > 9) and (date.month < 4):
+                date = date.replace(year=(current_year + 1))
+            else:
+                date = date.replace(year=current_year)
+
+            for bin_type in bin_types:
+
+                dict_data = {
+                    "type": bin_type,
+                    "collectionDate": date.strftime("%d/%m/%Y"),
+                }
+                bindata["bins"].append(dict_data)
+
+        bindata["bins"].sort(
+            key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
+        )
+
+        return bindata
diff --git a/uk_bin_collection/uk_bin_collection/councils/MonmouthshireCountyCouncil.py b/uk_bin_collection/uk_bin_collection/councils/MonmouthshireCountyCouncil.py
@@ -0,0 +1,70 @@
+import requests
+from bs4 import BeautifulSoup
+
+from uk_bin_collection.uk_bin_collection.common import *
+from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
+
+
+# import the wonderful Beautiful Soup and the URL grabber
+class CouncilClass(AbstractGetBinDataClass):
+    """
+    Concrete classes have to implement all abstract operations of the
+    base class. They can also override some operations with a default
+    implementation.
+    """
+
+    def parse_data(self, page: str, **kwargs) -> dict:
+
+        user_uprn = kwargs.get("uprn")
+        check_uprn(user_uprn)
+        bindata = {"bins": []}
+
+        URI = (
+            f"https://maps.monmouthshire.gov.uk/?action=SetAddress&UniqueId={user_uprn}"
+        )
+
+        # Make the GET request
+        response = requests.get(URI)
+
+        # Parse the HTML
+        soup = BeautifulSoup(response.content, "html.parser")
+
+        waste_collections_div = soup.find("div", {"aria-label": "Waste Collections"})
+
+        # Find all bin collection panels
+        bin_panels = waste_collections_div.find_all("div", class_="atPanelContent")
+
+        current_year = datetime.now().year
+        current_month = datetime.now().month
+
+        for panel in bin_panels:
+            # Extract bin name (e.g., "Household rubbish bag")
+            bin_name = panel.find("h4").text.strip().replace("\r", "").replace("\n", "")
+
+            # Extract collection date (e.g., "Monday 9th December")
+            date_tag = panel.find("p")
+            if date_tag and "Your next collection date is" in date_tag.text:
+                collection_date = date_tag.find("strong").text.strip()
+            else:
+                continue
+
+            collection_date = datetime.strptime(
+                remove_ordinal_indicator_from_date_string(collection_date), "%A %d %B"
+            )
+
+            if (current_month > 9) and (collection_date.month < 4):
+                collection_date = collection_date.replace(year=(current_year + 1))
+            else:
+                collection_date = collection_date.replace(year=current_year)
+
+            dict_data = {
+                "type": bin_name,
+                "collectionDate": collection_date.strftime("%d/%m/%Y"),
+            }
+            bindata["bins"].append(dict_data)
+
+        bindata["bins"].sort(
+            key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
+        )
+
+        return bindata
diff --git a/uk_bin_collection/uk_bin_collection/councils/WarwickDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/WarwickDistrictCouncil.py
@@ -1,7 +1,11 @@
 # This script pulls (in one hit) the data
 # from Warick District Council Bins Data
 
+from datetime import datetime
+
 from bs4 import BeautifulSoup
+
+from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 
 
@@ -20,15 +24,30 @@ def parse_data(self, page: str, **kwargs) -> dict:
 
         data = {"bins": []}
 
-        for element in soup.find_all("strong"):
-            bin_type = element.next_element
-            bin_type = bin_type.lstrip()
-            collectionDateElement = element.next_sibling.next_element.next_element
-            collectionDate = collectionDateElement.getText()
-            dict_data = {
-                "type": bin_type,
-                "collectionDate": collectionDate,
-            }
-            data["bins"].append(dict_data)
+        # Find all bin panels
+        bin_panels = soup.find_all("div", class_="col-sm-4 col-lg-3")
+
+        # Iterate through each panel to extract information
+        for panel in bin_panels:
+            bin_type = panel.find("img")["alt"].strip()
+
+            waste_dates = panel.find(
+                "div", class_="col-xs-12 text-center waste-dates margin-bottom-15"
+            )
+
+            for p in waste_dates.find_all("p")[1:]:
+                date = p.text.strip()
+                if " " in date:
+                    date = date.split(" ")[1]
+
+                dict_data = {
+                    "type": bin_type,
+                    "collectionDate": date,
+                }
+                data["bins"].append(dict_data)
+
+        data["bins"].sort(
+            key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
+        )
 
         return data
diff --git a/wiki/Councils.md b/wiki/Councils.md
@@ -120,6 +120,7 @@ This document is still a work in progress, don't worry if your council isn't lis
 - [Hertsmere Borough Council](#hertsmere-borough-council)
 - [Highland Council](#highland-council)
 - [High Peak Council](#high-peak-council)
+- [Hinckley and Bosworth Borough Council](#hinckley-and-bosworth-borough-council)
 - [Hounslow Council](#hounslow-council)
 - [Hull City Council](#hull-city-council)
 - [Huntingdon District Council](#huntingdon-district-council)
@@ -155,6 +156,7 @@ This document is still a work in progress, don't worry if your council isn't lis
 - [Mid Sussex District Council](#mid-sussex-district-council)
 - [Milton Keynes City Council](#milton-keynes-city-council)
 - [Mole Valley District Council](#mole-valley-district-council)
+- [Monmouthshire County Council](#monmouthshire-county-council)
 - [Moray Council](#moray-council)
 - [Neath Port Talbot Council](#neath-port-talbot-council)
 - [New Forest Council](#new-forest-council)
@@ -1622,6 +1624,17 @@ Note: Pass the name of the street with the house number parameter, wrapped in do
 
 ---
 
+### Hinckley and Bosworth Borough Council
+```commandline
+python collect_data.py HinckleyandBosworthBoroughCouncil https://www.hinckley-bosworth.gov.uk -u XXXXXXXX
+```
+Additional parameters:
+- `-u` - UPRN
+
+Note: You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN.
+
+---
+
 ### Hounslow Council
 ```commandline
 python collect_data.py HounslowCouncil https://www.hounslow.gov.uk/info/20272/recycling_and_waste_collection_day_finder -s -u XXXXXXXX -p "XXXX XXX" -n XX -w http://HOST:PORT/
@@ -2043,6 +2056,17 @@ Note: UPRN can only be parsed with a valid postcode.
 
 ---
 
+### Monmouthshire County Council
+```commandline
+python collect_data.py MonmouthshireCountyCouncil https://maps.monmouthshire.gov.uk -u XXXXXXXX
+```
+Additional parameters:
+- `-u` - UPRN
+
+Note: You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN.
+
+---
+
 ### Moray Council
 ```commandline
 python collect_data.py MorayCouncil https://bindayfinder.moray.gov.uk/ -u XXXXXXXX