fix: #709 Update DoverDistrictCouncil.py

robbrad · May 16, 2024 · c4f81f5 · c4f81f5
1 parent f8d3784
commit c4f81f5
Showing 1 changed file with 36 additions and 44 deletions.
diff --git a/uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/DoverDistrictCouncil.py
@@ -1,49 +1,41 @@
 from bs4 import BeautifulSoup
-from uk_bin_collection.uk_bin_collection.common import *
+from datetime import datetime
+import re
+from uk_bin_collection.uk_bin_collection.common import *  # Consider specific imports
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 
-
-# import the wonderful Beautiful Soup and the URL grabber
 class CouncilClass(AbstractGetBinDataClass):
-    """
-    Concrete classes have to implement all abstract operations of the
-    base class. They can also override some operations with a default
-    implementation.
-    """
-
     def parse_data(self, page: str, **kwargs) -> dict:
-        # Make a BS4 object
-        soup = BeautifulSoup(page.text, features="html.parser")
-        soup.prettify()
-
-        data = {"bins": []}
-        collections = []
-
-        bins = soup.find("div", {"class": "results-table-wrapper"}).find_all(
-            "div", {"class": "service-wrapper"}
-        )
-        for bin in bins:
-            bin_type = (
-                bin.find("h3", {"class": "service-name"})
-                .get_text()
-                .replace("Collection", "bin")
-                .strip()
-            )
-            bin_date = datetime.strptime(
-                bin.find("td", {"class": "next-service"})
-                .find("span", {"class": "table-label"})
-                .next_sibling.get_text()
-                .strip(),
-                "%d/%m/%Y",
-            )
-            collections.append((bin_type, bin_date))
-
-        ordered_data = sorted(collections, key=lambda x: x[1])
-        for item in ordered_data:
-            dict_data = {
-                "type": item[0].capitalize(),
-                "collectionDate": item[1].strftime(date_format),
-            }
-            data["bins"].append(dict_data)
-
-        return data
+        soup = BeautifulSoup(page.text, 'html.parser')
+
+        bins_data = {"bins": []}
+        bin_collections = []
+
+        results_wrapper = soup.find("div", {"class": "results-table-wrapper"})
+        if not results_wrapper:
+            return bins_data  # Return empty if the results wrapper is not found
+
+        bins = results_wrapper.find_all("div", {"class": "service-wrapper"})
+        for bin_item in bins:
+            service_name = bin_item.find("h3", {"class": "service-name"})
+            next_service = bin_item.find("td", {"class": "next-service"})
+
+            if service_name and next_service:
+                bin_type = service_name.get_text().replace("Collection", "bin").strip()
+                date_span = next_service.find("span", {"class": "table-label"})
+                date_text = date_span.next_sibling.get_text().strip() if date_span else None
+
+                if date_text and re.match(r"\d{2}/\d{2}/\d{4}", date_text):
+                    try:
+                        bin_date = datetime.strptime(date_text, "%d/%m/%Y")
+                        bin_collections.append((bin_type, bin_date))
+                    except ValueError:
+                        continue
+
+        for bin_type, bin_date in sorted(bin_collections, key=lambda x: x[1]):
+            bins_data["bins"].append({
+                "type": bin_type.capitalize(),
+                "collectionDate": bin_date.strftime("%d/%m/%Y"),
+            })
+
+        return bins_data