Merge pull request #499 from robbrad/fix_broken_councils

191_fixingbroken_councils
robbrad · Dec 24, 2023 · 286085f · 286085f
2 parents 1f56a11 + 46a5551
commit 286085f
Show file tree

Hide file tree

Showing 8 changed files with 343 additions and 256 deletions.
diff --git a/uk_bin_collection/tests/features/validate_council_outputs.feature b/uk_bin_collection/tests/features/validate_council_outputs.feature
@@ -14,18 +14,18 @@ Feature: Test each council output matches expected results
             | BathAndNorthEastSomersetCouncil | None | None |
             | BCPCouncil | None | None |
             | BedfordshireCouncil | None | None |
-            | BexleyCouncil | None | None |
+            | BexleyCouncil | http://selenium:4444 | local |
             | BlackburnCouncil | http://selenium:4444 | local |
-            | BoltonCouncil | None | None |
+            | BoltonCouncil | http://selenium:4444 | local |
             | BristolCityCouncil | None | None |
-            | BromleyBoroughCouncil | None | None |
+            | BromleyBoroughCouncil | http://selenium:4444 | local |
             | BroxtoweBoroughCouncil | http://selenium:4444 | local |
             | BuckinghamshireCouncil | http://selenium:4444 | local |
             | BuryCouncil | None | None |
-            | CalderdaleCouncil | None | None |
+            | CalderdaleCouncil | http://selenium:4444 | local |
             | CannockChaseDistrictCouncil | None | None |
             | CardiffCouncil | None | None |
-            | CastlepointDistrictCouncil | None | None |
+            | CastlepointDistrictCouncil | http://selenium:4444 | local |
             | CharnwoodBoroughCouncil | None | None |
             | ChelmsfordCityCouncil | None | None |
             | CheshireEastCouncil | None | None |

diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json
@@ -42,8 +42,11 @@
     },
     "BexleyCouncil": {
         "skip_get_url": true,
-        "uprn": "[email protected]",
-        "url": "https://www.bexley.gov.uk/",
+        "url": "https://mybexley.bexley.gov.uk/service/When_is_my_collection_day",
+        "web_driver": "http://selenium:4444",
+        "postcode": "DA5 3AH",
+        "uprn": "100020196143",
+        "house_number":"1 Dorchester Avenue, Bexley",
         "wiki_name": "Bexley Council",
         "wiki_note": "In order to use this parser, you will need to sign up to [Bexley's @Home app](https://www.bexley.gov.uk/services/rubbish-and-recycling/bexley-home-recycling-app/about-app) (available for [iOS](https://apps.apple.com/gb/app/home-collection-reminder/id1050703690) and [Android](https://play.google.com/store/apps/details?id=com.contender.athome.android)).\nComplete the setup by entering your email and setting your address with postcode and address line.\nOnce you can see the calendar, you _should_ be good to run the parser.\nJust pass the email you used in quotes in the UPRN parameter.\n"
     },
@@ -58,10 +61,11 @@
     "BoltonCouncil": {
         "skip_get_url": true,
         "postcode": "BL1 5PQ",
-        "uprn": "100010886949",
+        "uprn": "100010886936",
         "url": "https://carehomes.bolton.gov.uk/bins.aspx",
         "wiki_name": "Bolton Council",
-        "wiki_note": "To get the UPRN, you will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search). Previously required single field that was UPRN and full address, now requires UPRN and postcode as separate fields."
+        "wiki_note": "To get the UPRN, you will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search). Previously required single field that was UPRN and full address, now requires UPRN and postcode as separate fields.",
+        "web_driver": "http://selenium:4444"
     },
     "BristolCityCouncil": {
         "skip_get_url": true,
@@ -73,7 +77,8 @@
         "url": "https://recyclingservices.bromley.gov.uk/waste/6087017",
         "wiki_command_url_override": "https://recyclingservices.bromley.gov.uk/waste/XXXXXXX",
         "wiki_name": "Bromley Borough Council",
-        "wiki_note": "Follow the instructions [here](https://recyclingservices.bromley.gov.uk/waste) until the \"Your bin days\" page then copy the URL and replace the URL in the command."
+        "wiki_note": "Follow the instructions [here](https://recyclingservices.bromley.gov.uk/waste) until the \"Your bin days\" page then copy the URL and replace the URL in the command.",
+        "web_driver": "http://selenium:4444"
     },
     "BroxtoweBoroughCouncil": {
         "postcode": "NG16 2LY",
@@ -104,8 +109,9 @@
         "postcode": "OL14 7EX",
         "skip_get_url": true,
         "uprn": "010035034598",
-        "url": "https://www.calderdale.gov.uk/",
-        "wiki_name": "Calderdale Council"
+        "url": "https://www.calderdale.gov.uk/environment/waste/household-collections/collectiondayfinder.jsp",
+        "wiki_name": "Calderdale Council",
+        "web_driver": "http://selenium:4444"
     },
     "CannockChaseDistrictCouncil": {
         "postcode": "WS15 1JA",
@@ -125,7 +131,8 @@
         "skip_get_url": true,
         "uprn": "4525",
         "url": "https://apps.castlepoint.gov.uk/cpapps/index.cfm?fa=wastecalendar",
-        "wiki_name": "Castlepoint District Council"
+        "wiki_name": "Castlepoint District Council",
+        "web_driver": "http://selenium:4444"
     },
     "CharnwoodBoroughCouncil": {
         "url": "https://my.charnwood.gov.uk/location?put=cbc10070067259&rememberme=0&redirect=%2F",

diff --git a/uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py
@@ -53,14 +53,18 @@ def parse_data(self, page: str, **kwargs) -> dict:
 
         for collection_type, collection_class in COLLECTION_KINDS.items():
             for date in soup.select(f"div#{collection_class} li"):
-                bins.append({
-                    "type": collection_type,
-                    "collectionDate": datetime.strptime(
-                        # Friday, 21 July 2023
-                        date.get_text(strip=True),
-                        '%A, %d %B %Y'
-                    ).strftime(date_format)
-                })
+
+                date_pattern = r'\d{1,2}\s\w+\s\d{4}'  # Regex pattern to extract date
+                match = re.search(date_pattern, date.get_text(strip=True))
+
+                if match:
+                    extracted_date = match.group()
+                    formatted_date = datetime.strptime(extracted_date, '%d %B %Y').strftime(date_format)
+
+                    bins.append({
+                        "type": collection_type,
+                        "collectionDate": formatted_date
+                    })
 
         return {
             "bins": bins

diff --git a/uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py
@@ -1,9 +1,17 @@
-import json
+from bs4 import BeautifulSoup
+from datetime import datetime
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.ui import Select
+from selenium.webdriver.support.wait import WebDriverWait
+from selenium.webdriver.common.keys import Keys
 
-import requests
+import time
 from uk_bin_collection.uk_bin_collection.common import *
-from uk_bin_collection.uk_bin_collection.get_bin_data import \
-    AbstractGetBinDataClass
+from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
+
+
+# import the wonderful Beautiful Soup and the URL grabber
 
 
 class CouncilClass(AbstractGetBinDataClass):
@@ -14,50 +22,97 @@ class CouncilClass(AbstractGetBinDataClass):
     """
 
     def parse_data(self, page: str, **kwargs) -> dict:
-        # User email from @Home app as UPRN
-        user_email = kwargs.get("uprn")
-        headers = {
-            "X-country": "gb",
-            "X-email": user_email,
-            "Connection": "Keep-Alive",
-        }
-
-        # Sniffed from the app
-        requests.packages.urllib3.disable_warnings()
-        response = requests.get(
-            "https://services.athomeapp.net/ServiceData/GetUserRoundJson",
-            headers=headers,
-        )
-
-        # 200 is OK. Sometimes it times out and gives this, but I'm not parsing HTTP codes
-        if response.status_code != 200:
-            raise ValueError(
-                "Error parsing API. Please check your email is correct and registered on the @Home app."
-            )
-
-        # Load in the json and only get the bins
-        json_data = json.loads(response.text)["userrounds"]
-        data = {"bins": []}
-        collections = []
-
-        # For each bin, run through the list of dates and add them to a collection
-        for item in json_data:
-            bin_type = item["containername"]
-            for sched in item["nextcollectiondates"]:
-                bin_collection = datetime.strptime(
-                    sched["datestring"], "%d %m %Y %H:%M"
-                )
-                if bin_collection.date() >= datetime.now().date():
-                    collections.append((bin_type, bin_collection))
+        page = "https://mybexley.bexley.gov.uk/service/When_is_my_collection_day"
 
-        # Order the collection of bins and dates by date order, then add to dict
-        ordered_data = sorted(collections, key=lambda x: x[1])
         data = {"bins": []}
-        for item in ordered_data:
-            dict_data = {
-                "type": item[0],
-                "collectionDate": item[1].strftime(date_format),
-            }
-            data["bins"].append(dict_data)
+
+        user_uprn = kwargs.get("uprn")
+        user_paon = kwargs.get("paon") 
+        user_postcode = kwargs.get("postcode")
+        web_driver = kwargs.get("web_driver")
+
+        # Create Selenium webdriver
+        driver = create_webdriver(web_driver)
+        driver.get(page)
+
+        # If you bang in the house number (or property name) and postcode in the box it should find your property
+
+        iframe_presense = WebDriverWait(driver, 30).until(
+            EC.presence_of_element_located((By.ID, "fillform-frame-1"))
+        )
+
+
+        driver.switch_to.frame(iframe_presense)
+        wait = WebDriverWait(driver, 60)
+        start_btn = wait.until(
+            EC.element_to_be_clickable((By.XPATH, "//button/span[contains(text(), 'Next')]"))
+        )
+
+        start_btn.click()
+
+        inputElement_postcodesearch = wait.until(
+            EC.element_to_be_clickable((By.NAME, "postcode_search"))
+        )
+        inputElement_postcodesearch.send_keys(user_postcode)
+
+        find_address_btn = wait.until(
+            EC.element_to_be_clickable((By.XPATH, '//*[@id="search"]'))
+        )
+        find_address_btn.click()
+
+        dropdown_options = wait.until(
+            EC.presence_of_element_located((By.XPATH, '//*[@id="select2-chosen-1"]'))
+        )
+        time.sleep(2)
+        dropdown_options.click()
+        time.sleep(1)
+        dropdown_input = wait.until(
+            EC.presence_of_element_located((By.XPATH, '//*[@id="s2id_autogen1_search"]'))
+        )        
+        time.sleep(1)
+        dropdown_input.send_keys(user_paon)
+        dropdown_input.send_keys(Keys.ENTER)
+
+        results_found = wait.until(
+            EC.presence_of_element_located((By.CLASS_NAME, "found-content"))
+        )
+        finish_btn = wait.until(
+            EC.element_to_be_clickable((By.XPATH, "//button/span[contains(text(), 'Next')]"))
+        )
+        finish_btn.click()
+        final_page = wait.until(
+            EC.presence_of_element_located((By.CLASS_NAME, "waste-header-container"))
+        )
+
+
+        soup = BeautifulSoup(driver.page_source, features="html.parser")
+
+        bin_fields = soup.find_all("div", class_="waste-panel-container")
+        # Define your XPath
+
+        for bin in bin_fields:
+
+            # Extract h3 text from the current element
+            h3_text = bin.find('h3', class_='container-name').get_text(strip=True) if bin.find('h3', class_='container-name') else None
+
+            date_text = bin.find('p', class_='container-status').get_text(strip=True) if bin.find('p', class_='container-status') else None
+
+            if h3_text and date_text:
+                # Parse the date using the appropriate format
+                parsed_date = datetime.strptime(date_text, "%A %d %B")
+
+                # Assuming the current year is used for the collection date
+                current_year = datetime.now().year
+
+                # If the parsed date is in the past, assume it's for the next year
+                if parsed_date < datetime.now():
+                    current_year += 1
+
+                data["bins"].append(
+                    {
+                        "type": h3_text,
+                        "collectionDate": parsed_date.replace(year=current_year).strftime("%d/%m/%Y")
+                    }
+                )
 
         return data