Merge pull request #683 from jimmygulp/BradfordMDC

Added Bradford MDC - closes #442
robbrad · Apr 27, 2024 · 46b77dd · 46b77dd
2 parents 0088ae4 + cbb1881
commit 46b77dd
Show file tree

Hide file tree

Showing 3 changed files with 117 additions and 1 deletion.
diff --git a/uk_bin_collection/tests/features/validate_council_outputs.feature b/uk_bin_collection/tests/features/validate_council_outputs.feature
@@ -71,6 +71,11 @@ Feature: Test each council output matches expected results
 		| council | selenium_url | selenium_mode |
 		| BoltonCouncil | http://selenium:4444  | local  |
 
+        @BradfordMDC
+		Examples: BradfordMDC
+		| council | selenium_url | selenium_mode |
+		| BradfordMDC | None  | None  |
+
         @BrightonandHoveCityCouncil
 		Examples: BrightonandHoveCityCouncil
 		| council | selenium_url | selenium_mode |

diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json
@@ -95,6 +95,14 @@
     "wiki_name": "Bolton Council",
     "wiki_note": "To get the UPRN, you will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search). Previously required single field that was UPRN and full address, now requires UPRN and postcode as separate fields."
   },
+  "BradfordMDC": {
+    "custom_component_show_url_field": false,
+    "skip_get_url": true,
+    "uprn": "100052235823",
+    "url": "https://onlineforms.bradford.gov.uk/ufs/collectiondates.eb",
+    "wiki_name": "Bradford MDC"
+    "wiki_note": "To get the UPRN, you will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search). Post code isn't parsed by this script, but you can pass it in double quotes."
+  },
   "BrightonandHoveCityCouncil": {
     "house_number": "44 Carden Avenue, Brighton, BN1 8NE",
     "postcode": "BN1 8NE",
@@ -1055,4 +1063,4 @@
     "url": "https://waste-api.york.gov.uk/api/Collections/GetBinCollectionDataForUprn/",
     "wiki_name": "York Council"
   }
-}
+}
diff --git a/uk_bin_collection/uk_bin_collection/councils/BradfordMDC.py b/uk_bin_collection/uk_bin_collection/councils/BradfordMDC.py
@@ -0,0 +1,103 @@
+import dumper
+import requests
+from bs4 import BeautifulSoup
+from uk_bin_collection.uk_bin_collection.common import *
+from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
+
+# import the wonderful Beautiful Soup and the URL grabber
+class CouncilClass(AbstractGetBinDataClass):
+    """
+    Concrete classes have to implement all abstract operations of the
+    base class. They can also override some operations with a default
+    implementation.
+    """
+
+    def parse_data(self, page: str, **kwargs) -> dict:
+        user_uprn = kwargs.get("uprn")
+        check_uprn(user_uprn)
+
+        # UPRN is passed in via a cookie. Set cookies/params and GET the page
+        cookies = {
+            "COLLECTIONDATES": f"{user_uprn}",
+        }
+        headers = {
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
+            "Accept-Language": "en-GB,en;q=0.7",
+            "Cache-Control": "max-age=0",
+            "Connection": "keep-alive",
+            "Referer": "https://onlineforms.bradford.gov.uk/ufs/collectiondates.eb",
+            "Sec-Fetch-Dest": "document",
+            "Sec-Fetch-Mode": "navigate",
+            "Sec-Fetch-Site": "same-origin",
+            "Sec-Fetch-User": "?1",
+            "Sec-GPC": "1",
+            "Upgrade-Insecure-Requests": "1",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
+        }
+        params = {
+            "ebp": "30",
+            "ebd": "0",
+            "ebz": "1_1713270660323",
+        }
+        requests.packages.urllib3.disable_warnings()
+        response = requests.get(
+            "https://onlineforms.bradford.gov.uk/ufs/collectiondates.eb",
+            params=params,
+            headers=headers,
+            cookies=cookies,
+        )
+
+        # Parse response text for super speedy finding
+        soup = BeautifulSoup(response.text, features="html.parser")
+        soup.prettify()
+
+        data = {"bins": []}
+
+        # BradfordMDC site has lots of embedded tables, find the table titled 'Your next general/recycling collections are:'
+        for bin in soup.find_all(attrs={"class": "CTID-FHGh1Q77-_"}):
+             if bin.find_all(attrs={"class": "CTID-62bNngCB-_"}):
+                 bin_type = "General Waste"
+                 bin_colour = "Green"
+                 bin_date_text = bin.find(attrs={"class": "CTID-62bNngCB-_"}).get_text()
+             elif bin.find_all(attrs={"class": "CTID-LHo9iO0y-_"}):
+                 bin_type = "Recycling Waste"
+                 bin_colour = "Grey"
+                 bin_date_text = bin.find(attrs={"class": "CTID-LHo9iO0y-_"}).get_text()
+             else:
+                 raise ValueError(f"No bin info found in {bin_type_info[0]}")
+
+             # Collection Date info is alongside the bin type, we got the whole line in the if/elif above
+             # below strips the text off at the beginning, to get a date, though recycling is a character shorter hence the lstrip
+             bin_date_info = bin_date_text[29:50].lstrip(' ')
+
+             if contains_date(bin_date_info):
+                bin_date = get_next_occurrence_from_day_month(
+                    datetime.strptime(
+                        bin_date_info,# + " " + datetime.today().strftime("%Y"),
+                        "%a %b %d %Y",
+                    )
+                ).strftime(date_format)
+                #print(bin_date_info)
+                #print(bin_date)
+            # On exceptional collection schedule (e.g. around English Bank Holidays), date will be contained in the second stripped string
+             else:
+                bin_date = get_next_occurrence_from_day_month(
+                    datetime.strptime(
+                        bin_date_info[1] + " " + datetime.today().strftime("%Y"),
+                        "%a %b %d %Y",
+                    )
+                ).strftime(date_format)
+
+        # Build data dict for each entry
+        dict_data = {
+           "type": bin_type,
+           "collectionDate": bin_date,
+           "colour": bin_colour,
+        }
+        data["bins"].append(dict_data)
+
+        data["bins"].sort(
+            key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
+        )
+
+        return data