fix: fix date parsing and change BS4 logic

robbrad · Oct 14, 2023 · 03d6709 · 03d6709
1 parent 32bebe4
commit 03d6709
Showing 1 changed file with 34 additions and 22 deletions.
diff --git a/uk_bin_collection/uk_bin_collection/councils/BromleyBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BromleyBoroughCouncil.py
@@ -1,5 +1,6 @@
 # This script pulls (in one hit) the data from Bromley Council Bins Data
-import dateutil.parser
+import datetime
+from dateutil.relativedelta import relativedelta
 from bs4 import BeautifulSoup
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import \
@@ -20,27 +21,38 @@ def parse_data(self, page: str, **kwargs) -> dict:
         soup.prettify()
 
         bin_data_dict = {"bins": []}
+        collections = []
+
+
+        # Search for the specific bins in the table using BS4
+        bin_types = soup.find_all("h3", class_="govuk-heading-m waste-service-name")
+        collection_info = soup.find_all("dl", {"class": "govuk-summary-list"})
+
+        # Raise error if data is not loaded at time of scrape (30% chance it is)
+        if len(bin_types) == 0:
+            raise ConnectionError("Error fetching council data: data absent when page was scraped.")
+
+        # Parse the data
+        for idx, value in enumerate(collection_info):
+            bin_type = bin_types[idx].text.strip()
+            collection_date = value.contents[3].contents[3].text.strip()
+            next_collection = datetime.strptime(remove_ordinal_indicator_from_date_string(collection_date.replace(',', '')), "%A %d %B")
+            curr_date = datetime.now().date()
+            next_collection = next_collection.replace(year=curr_date.year)
+            if curr_date.month == 12 and next_collection.month == 1:
+                next_collection = next_collection + relativedelta(years=1)
+            collections.append((bin_type, next_collection))
+
+        # Sort the text and list elements by date
+        ordered_data = sorted(collections, key=lambda x: x[1])
+
+        # Put the elements into the dictionary
+        for item in ordered_data:
+            dict_data = {
+                "type": item[0],
+                "collectionDate": item[1].strftime(date_format),
+            }
+            bin_data_dict["bins"].append(dict_data)
 
-        # Search for the specific bin in the table using BS4
-        rows = soup.find("div", class_=("waste__collections")).find_all(
-            "h3",
-            class_=("waste-service-name",),
-        )
-
-        # Loops the Rows
-        for row in rows:
-            bin_type = row.get_text().strip()
-            collectionDate = row.find_all_next(
-                "dd", {"class": "govuk-summary-list__value"}
-            )
-            # Make each Bin element in the JSON, but only if we have a date available
-            if collectionDate:
-                date = dateutil.parser.parse(collectionDate[1].text.strip())
-                dict_data = {
-                    "type": bin_type,
-                    "collectionDate": date.strftime(date_format),
-                }
-                # Add data to the main JSON Wrapper
-                bin_data_dict["bins"].append(dict_data)
 
         return bin_data_dict