Merge pull request #909 from rustyd0g/MidlothianCouncil

robbrad · Oct 20, 2024 · 038c72f · 038c72f
2 parents 27914d7 + e427b6a
commit 038c72f
Show file tree

Hide file tree

Showing 2 changed files with 74 additions and 0 deletions.
diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json
@@ -719,6 +719,12 @@
         "wiki_name": "Mid and East Antrim Borough Council",
         "wiki_note": "Pass the house name/number plus the name of the street with the postcode parameter, wrapped in double quotes.  Check the address in the web site first. This version will only pick the first SHOW button returned by the search or if it is fully unique.  The search is not very predictable (e.g. house number 4 returns 14,24,4,44 etc.)."
     },
+    "MidlothianCouncil": {
+        "url": "https://www.midlothian.gov.uk/directory_record/92551426/glenesk_bonnyrigg_eh19_3je",
+        "wiki_command_url_override": "https://www.midlothian.gov.uk/directory_record/XXXXXX/XXXXXX",
+        "wiki_name": "Midlothian Council",
+        "wiki_note": "Follow the instructions [here](https://www.midlothian.gov.uk/info/1054/bins_and_recycling/343/bin_collection_days) until you get the page that shows the weekly collections for your address then copy the URL and replace the URL in the command."
+    },
     "MidSussexDistrictCouncil": {
         "house_number": "OAKLANDS, OAKLANDS ROAD RH16 1SS",
         "postcode": "RH16 1SS",

diff --git a/uk_bin_collection/uk_bin_collection/councils/MidlothianCouncil.py b/uk_bin_collection/uk_bin_collection/councils/MidlothianCouncil.py
@@ -0,0 +1,68 @@
+from bs4 import BeautifulSoup
+
+from uk_bin_collection.uk_bin_collection.common import *
+from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
+
+
+# import the wonderful Beautiful Soup and the URL grabber
+class CouncilClass(AbstractGetBinDataClass):
+    """
+    Concrete classes have to implement all abstract operations of the
+    base class. They can also override some operations with a default
+    implementation.
+    """
+
+    def parse_data(self, page: str, **kwargs) -> dict:
+        # Parse the HTML content using BeautifulSoup
+        soup = BeautifulSoup(page.text, features="html.parser")
+
+        # Initialize a dictionary to store the parsed bin data
+        data = {"bins": []}
+
+        # Define a mapping of bin collection labels to their corresponding types
+        bin_types = {
+            "Next recycling collection": "Recycling",
+            "Next grey bin collection": "Grey Bin",
+            "Next brown bin collection": "Brown Bin",
+            "Next food bin collection": "Food Bin",
+        }
+
+        # Locate the <ul> element with the class "data-table"
+        bin_collections = soup.find("ul", {"class": "data-table"})
+
+        # Proceed only if the <ul> element is found
+        if bin_collections:
+            # Retrieve all <li> elements within the <ul>, skipping the first two (not relevant)
+            bin_items = bin_collections.find_all("li")[2:]
+
+            # Iterate through each bin item
+            for bin in bin_items:
+                bin_type = None
+                # Retrieve the bin type from the header if it exists
+                if bin.h2 and bin.h2.text.strip() in bin_types:
+                    bin_type = bin_types[bin.h2.text.strip()]
+
+                bin_collection_date = None
+                # Retrieve the bin collection date from the div if it exists
+                if bin.div and bin.div.text.strip():
+                    try:
+                        # Parse the collection date from the div text and format it
+                        bin_collection_date = datetime.strptime(
+                            bin.div.text.strip(),
+                            "%A %d/%m/%Y",
+                        ).strftime(date_format)
+                    except ValueError:
+                        # If date parsing fails, keep bin_collection_date as None
+                        pass
+
+                # If both bin type and collection date are identified, add to the data
+                if bin_type and bin_collection_date:
+                    data["bins"].append(
+                        {
+                            "type": bin_type,
+                            "collectionDate": bin_collection_date,
+                        }
+                    )
+
+        # Return the parsed data, which may be empty if no bins were found
+        return data