Merge pull request #1088 from dp247/202412-fixes

December 2024 fixes
robbrad · Jan 4, 2025 · 7595bd0 · 7595bd0
2 parents 9cb7ade + 6d9af00
commit 7595bd0
Show file tree

Hide file tree

Showing 11 changed files with 120 additions and 97 deletions.
diff --git a/.github/workflows/behave_pull_request.yml b/.github/workflows/behave_pull_request.yml
@@ -35,7 +35,6 @@ jobs:
         with:
           files: |
             uk_bin_collection/uk_bin_collection/councils/**.py
-
       - name: Set Council Tests Environment Variable
         id: set-council-tests
         run: |
@@ -50,7 +49,6 @@ jobs:
             fi
           done
           echo "council_tests=$COUNCIL_TESTS" >> $GITHUB_OUTPUT
-
     outputs:
       council_tests: ${{ steps.set-council-tests.outputs.council_tests }}
 
@@ -111,7 +109,6 @@ jobs:
           repo=${{ github.event.pull_request.head.repo.full_name || 'robbrad/UKBinCollectionData' }}
           branch=${{ github.event.pull_request.head.ref || 'master' }}
           make parity-check repo=$repo branch=$branch
-
   integration-tests:
     name: Run Integration Tests
     needs: setup

diff --git a/README.md b/README.md
@@ -92,7 +92,7 @@ If you miss this on the first setup you can reconfigure it.
     "color": "blue"     
   }
 }
-
+```
 ---
 
 ## Standalone Usage

diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json
@@ -1993,8 +1993,8 @@
         "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
     },
     "WestLindseyDistrictCouncil": {
-        "house_number": "PRIVATE ACCOMMODATION",
-        "postcode": "LN8 2AR",
+        "house_number": "35",
+        "postcode": "LN8 3AX",
         "skip_get_url": true,
         "url": "https://www.west-lindsey.gov.uk/",
         "wiki_name": "West Lindsey District Council",

diff --git a/uk_bin_collection/uk_bin_collection/councils/BromleyBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BromleyBoroughCouncil.py
@@ -21,7 +21,6 @@ class CouncilClass(AbstractGetBinDataClass):
     """
 
     def parse_data(self, page: str, **kwargs) -> dict:
-        # Make a BS4 object
         driver = None
         try:
             bin_data_dict = {"bins": []}
@@ -76,12 +75,13 @@ def parse_data(self, page: str, **kwargs) -> dict:
                     # Get the current year
                     current_year = datetime.now().year
 
+                    # Append the year to the date
+                    date_with_year = date_object.replace(year=current_year)
+
                     # Check if the parsed date is in the past compared to the current date
                     if date_object < datetime.now():
                         # If the parsed date is in the past, assume it's for the next year
                         current_year += 1
-                    # Append the year to the date
-                    date_with_year = date_object.replace(year=current_year)
 
                     # Format the date with the year
                     date_with_year_formatted = date_with_year.strftime(

diff --git a/uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py b/uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py
@@ -1,12 +1,17 @@
+import time
 from datetime import datetime
 from typing import Optional
 
+from bs4 import BeautifulSoup
 from selenium.common import TimeoutException
 from selenium.webdriver.common.by import By
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.remote.webdriver import WebDriver
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.wait import WebDriverWait
+from webdriver_manager.drivers.chrome import ChromeDriver
+
+from selenium import webdriver
 
 from uk_bin_collection.uk_bin_collection.common import create_webdriver
 from uk_bin_collection.uk_bin_collection.common import date_format
@@ -55,78 +60,84 @@ def _parse_data(self, page: str, **kwargs) -> dict:
 
         - Extract info from the 'alt' attribute of the images on that page
         """
-        bins = []
+        data = {"bins": []}
+        collections = []
 
         user_paon = kwargs["paon"]
         user_postcode = kwargs["postcode"]
 
-        self._driver = driver = create_webdriver(
-            web_driver=kwargs["web_driver"],
-            headless=kwargs.get("headless", True),
-            session_name=__name__,
-        )
+        self._driver = driver = webdriver.Chrome()
+        # self._driver = driver = create_webdriver(
+        #     web_driver=kwargs["web_driver"],
+        #     headless=kwargs.get("headless", True),
+        #     session_name=__name__,
+        # )
         driver.implicitly_wait(1)
 
         driver.get(
-            "https://www.kirklees.gov.uk/beta/your-property-bins-recycling/your-bins/default.aspx"
+            "https://my.kirklees.gov.uk/service/Bins_and_recycling___Manage_your_bins"
         )
 
-        wait_for_element(
-            driver, By.ID, "cphPageBody_cphContent_thisGeoSearch_txtGeoPremises"
-        )
+        time.sleep(5)
+
+        # Switch to iframe
+        iframe = driver.find_element(By.CSS_SELECTOR, "#fillform-frame-1")
+        driver.switch_to.frame(iframe)
 
-        house_input = driver.find_element(
-            By.ID, "cphPageBody_cphContent_thisGeoSearch_txtGeoPremises"
+        wait_for_element(
+            driver, By.ID, "mandatory_Postcode", timeout=10
         )
-        house_input.send_keys(user_paon)
 
         postcode_input = driver.find_element(
-            By.ID, "cphPageBody_cphContent_thisGeoSearch_txtGeoSearch"
+            By.ID, "Postcode"
         )
         postcode_input.send_keys(user_postcode)
 
-        # submit address search
-        driver.find_element(By.ID, "butGeoSearch").send_keys(Keys.RETURN)
+        wait_for_element(driver, By.ID, "List")
+        time.sleep(2)
+
+        WebDriverWait(driver, 10).until(
+            EC.element_to_be_clickable(
+                (
+                    By.XPATH,
+                    "//select[@name='List']//option[contains(., '"
+                    + user_paon
+                    + "')]",
+                )
+            )
+        ).click()
 
-        wait_for_element(
-            driver,
-            By.ID,
-            "cphPageBody_cphContent_wtcDomestic240__lnkAccordionAnchor",
-            # submitting can be slow
-            timeout=30,
-        )
+        time.sleep(10)
 
-        # Open the panel
-        driver.find_element(
-            By.ID, "cphPageBody_cphContent_wtcDomestic240__lnkAccordionAnchor"
-        ).click()
+        # For whatever reason, the page sometimes automatically goes to the next step
+        next_button = driver.find_element(By.XPATH, '/html/body/div/div/section/form/div/nav/div[2]/button')
+        if next_button.is_displayed():
+            next_button.click()
 
-        # Domestic waste calendar
-        wait_for_element(
-            driver, By.ID, "cphPageBody_cphContent_wtcDomestic240__LnkCalendar"
-        )
-        calendar_link = driver.find_element(
-            By.ID, "cphPageBody_cphContent_wtcDomestic240__LnkCalendar"
-        )
-        driver.execute_script("arguments[0].click();", calendar_link)
 
-        # <img alt="Recycling                      collection date 14 March 2024"
-        # <img alt="Domestic                       collection date 21 March 2024
-        date_strings = driver.find_elements(
-            By.CSS_SELECTOR, 'img[alt*="collection date"]'
-        )
+        time.sleep(5)
 
-        for date in date_strings:
-            bin_type, _, _, day, month, year = date.get_attribute("alt").split()
-            collection_date = datetime.strptime(
-                f"{day} {month} {year}", "%d %B %Y"
-            ).strftime(date_format)
-
-            bins.append(
-                {
-                    "type": bin_type,
-                    "collectionDate": collection_date,
-                }
-            )
+        soup = BeautifulSoup(self._driver.page_source, features="html.parser")
+        soup.prettify()
+
+        radio_button_text = soup.find_all("label", {"class": "radio-label"})
+        for label in radio_button_text:
+            parsed_text = label.text.split("x ")
+            row = parsed_text[1].lower().split("collection date: ")
+            bin_type = row[0].split("(")[0].strip()
+            date_text = row[1].strip().replace(")", "")
+            if date_text == "today":
+                bin_date = datetime.now()
+            else:
+                bin_date = datetime.strptime(date_text, "%A %d %B %Y")
+            collections.append((bin_type, bin_date))
+
+        ordered_data = sorted(collections, key=lambda x: x[1])
+        for item in ordered_data:
+            dict_data = {
+                "type": item[0].replace("standard ", "").capitalize(),
+                "collectionDate": item[1].strftime(date_format),
+            }
+            data["bins"].append(dict_data)
 
-        return {"bins": bins}
+        return data
diff --git a/uk_bin_collection/uk_bin_collection/councils/NorwichCityCouncil.py b/uk_bin_collection/uk_bin_collection/councils/NorwichCityCouncil.py
@@ -48,15 +48,13 @@ def parse_data(self, page: str, **kwargs) -> dict:
             alternateCheck = False
 
         strong = soup.find_all("strong")
+        collections = []
 
         if alternateCheck:
             bin_types = strong[2].text.strip().replace(".", "").split(" and ")
             for bin in bin_types:
-                dict_data = {
-                    "type": bin,
-                    "collectionDate": strong[1].text.strip(),
-                }
-                bindata["bins"].append(dict_data)
+                collections.append((bin.capitalize(), datetime.strptime(strong[1].text.strip(), date_format)))
+
         else:
             p_tag = soup.find_all("p")
             i = 1
@@ -65,11 +63,18 @@ def parse_data(self, page: str, **kwargs) -> dict:
                     p.text.split("Your ")[1].split(" is collected")[0].split(" and ")
                 )
                 for bin in bin_types:
-                    dict_data = {
-                        "type": bin,
-                        "collectionDate": strong[i].text.strip(),
-                    }
-                    bindata["bins"].append(dict_data)
+                    collections.append((bin.capitalize(), datetime.strptime(strong[1].text.strip(), date_format)))
                 i += 2
 
+        if len(strong) > 3:
+            collections.append(("Garden", datetime.strptime(strong[4].text.strip(), date_format)))
+
+        ordered_data = sorted(collections, key=lambda x: x[1])
+        for item in ordered_data:
+            dict_data = {
+                "type": item[0] + " bin",
+                "collectionDate": item[1].strftime(date_format),
+            }
+            bindata["bins"].append(dict_data)
+
         return bindata
diff --git a/uk_bin_collection/uk_bin_collection/councils/RugbyBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/RugbyBoroughCouncil.py
@@ -13,11 +13,6 @@ class CouncilClass(AbstractGetBinDataClass):
 
     def parse_data(self, page: str, **kwargs) -> dict:
         data = {"bins": []}
-        bin_types = {
-            "240L RUBBISH BIN": "Black bin",
-            "240L GARDEN BIN": "Green bin",
-            "180L RECYCLING BIN": "Blue lid bin",
-        }
         collections = []
 
         user_postcode = kwargs.get("postcode")
@@ -73,7 +68,8 @@ def parse_data(self, page: str, **kwargs) -> dict:
 
         for row in table_rows:
             row_text = row.text.strip().split("\n")
-            bin_type = bin_types.get(row_text[0])
+            bin_text = row_text[0].split(" ")
+            bin_type = ' '.join(bin_text[1:]).capitalize()
             collections.append(
                 (bin_type, datetime.strptime(row_text[1], "%A %d %b %Y"))
             )

diff --git a/uk_bin_collection/uk_bin_collection/councils/SwaleBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/SwaleBoroughCouncil.py
@@ -6,8 +6,18 @@
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 
-# import the wonderful Beautiful Soup and the URL grabber
 
+def parse_collection_date(date_string) -> datetime:
+    now = datetime.now()
+    if date_string == "is due today":
+        return now
+
+    parsed_date = datetime.strptime(date_string, "%A, %d %B").replace(year=now.year)
+
+    if now.month == 12 and parsed_date.month < 12:
+        parsed_date = parsed_date.replace(year=(now.year + 1))
+
+    return parsed_date
 
 class CouncilClass(AbstractGetBinDataClass):
     """

diff --git a/uk_bin_collection/uk_bin_collection/councils/WalsallCouncil.py b/uk_bin_collection/uk_bin_collection/councils/WalsallCouncil.py
@@ -28,29 +28,33 @@ def parse_data(self, page: str, **kwargs) -> dict:
         response = requests.get(URI, headers=headers)
 
         soup = BeautifulSoup(response.text, "html.parser")
-        # Extract links to collection shedule pages and iterate through the pages
-        schedule_links = soup.findAll("a", {"class": "nav-link"}, href=True)
+        # Extract links to collection schedule pages and iterate through the pages
+        schedule_links = soup.findAll("td")
+
         for item in schedule_links:
-            if "roundname" in item["href"]:
+            if "roundname" in item.contents[1]["href"]:
                 # get bin colour
-                bincolour = item["href"].split("=")[-1].split("%")[0].upper()
-                binURL = "https://cag.walsall.gov.uk" + item["href"]
-                r = requests.get(binURL, headers=headers)
+                bin_colour = item.contents[1]["href"].split("=")[-1].split("%")[0].upper()
+                bin_url = "https://cag.walsall.gov.uk" + item.contents[1]["href"]
+                r = requests.get(bin_url, headers=headers)
+                if r.status_code != 200:
+                    print(f"Collection details for {bin_colour.lower()} bin could not be retrieved.")
+                    break
                 soup = BeautifulSoup(r.text, "html.parser")
                 table = soup.findAll("tr")
                 for tr in table:
                     td = tr.findAll("td")
                     if td:
                         dict_data = {
-                            "type": bincolour,
+                            "type": bin_colour.capitalize() + " bin",
                             "collectionDate": datetime.strptime(
                                 td[1].text.strip(), "%d/%m/%Y"
                             ).strftime("%d/%m/%Y"),
                         }
                         bindata["bins"].append(dict_data)
 
         bindata["bins"].sort(
-            key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
+            key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
         )
 
         return bindata
diff --git a/uk_bin_collection/uk_bin_collection/councils/WestBerkshireCouncil.py b/uk_bin_collection/uk_bin_collection/councils/WestBerkshireCouncil.py
@@ -99,7 +99,7 @@ def parse_data(self, page: str, **kwargs) -> dict:
                 ).replace(year=datetime.now().year)
 
             food_div = soup.find(
-                "div", {"id": "FINDYOURBINDAYS_RECYCLINGDATE_OUTERDIV"}
+                "div", {"id": "FINDYOURBINDAYS_FOODWASTEDATE_OUTERDIV"}
             )
             food_date = food_div.find_all("div")[2]
             if food_date.text == "Today":
-Original file line number
+Diff line change
@@ Expand Up / @@ -92,7 +92,7 @@ If you miss this on the first setup you can reconfigure it. @@
         "color": "blue"
       }
     }
+    ```
     ---
     ## Standalone Usage
@@ Expand Down @@