Merge pull request #489 from robbrad/488_blackburnfixes

fix: 488_blackburnfixes
robbrad · Dec 16, 2023 · da3365e · da3365e
2 parents ae7a366 + f58d2a5
commit da3365e
Show file tree

Hide file tree

Showing 3 changed files with 51 additions and 56 deletions.
diff --git a/uk_bin_collection/tests/features/validate_council_outputs.feature b/uk_bin_collection/tests/features/validate_council_outputs.feature
@@ -15,7 +15,7 @@ Feature: Test each council output matches expected results
             | BCPCouncil | None | None |
             | BedfordshireCouncil | None | None |
             | BexleyCouncil | None | None |
-            | BlackburnCouncil | None | None |
+            | BlackburnCouncil | http://selenium:4444 | local |
             | BoltonCouncil | None | None |
             | BristolCityCouncil | None | None |
             | BromleyBoroughCouncil | None | None |

diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json
@@ -52,7 +52,8 @@
         "uprn": "100010733027",
         "url": "https://mybins.blackburn.gov.uk/api/mybins/getbincollectiondays?uprn=100010733027&month=8&year=2022",
         "wiki_command_url_override": "https://www.blackburn.gov.uk",
-        "wiki_name": "Blackburn Council"
+        "wiki_name": "Blackburn Council",
+        "web_driver": "http://selenium:4444"
     },
     "BoltonCouncil": {
         "skip_get_url": true,

diff --git a/uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py
@@ -1,13 +1,14 @@
 import json
 from collections import OrderedDict
 from datetime import datetime
-
+from bs4 import BeautifulSoup
 import requests
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import \
     AbstractGetBinDataClass
 import ssl
 import urllib3
+import logging
 
 class CustomHttpAdapter (requests.adapters.HTTPAdapter):
     '''Transport adapter" that allows us to use custom ssl_context.'''
@@ -34,63 +35,56 @@ def parse_data(self, page: str, **kwargs) -> dict:
 
         data = {"bins": []}
         uprn = kwargs.get("uprn")
+        web_driver = kwargs.get("web_driver")
         current_month = datetime.today().strftime("%m")
         current_year = datetime.today().strftime("%Y")
         url = (
             f"https://mybins.blackburn.gov.uk/api/mybins/getbincollectiondays?uprn={uprn}&month={current_month}"
             f"&year={current_year}"
         )
+        driver = create_webdriver(web_driver)
+        driver.get(url)
 
-        # Build request header string, then parse it and get response
-        response_header_str = (
-            "Accept: application/json, text/plain, */*|Accept-Encoding: gzip, deflate, "
-            "br|Accept-Language: en-GB,en;q=0.9|Connection: keep-alive|Host: "
-            "mybins.blackburn.gov.uk|Referer: "
-            "https://mybins.blackburn.gov.uk/calendar/MTAwMDEwNzUwNzQy|Sec-Fetch-Dest: "
-            "empty|Sec-Fetch-Mode: cors|Sec-Fetch-Site: same-origin|Sec-GPC: 1|User-Agent: "
-            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
-            "Chrome/103.0.5060.134 Safari/537.36 "
-        )
-        response_headers = parse_header(response_header_str)
-        requests.packages.urllib3.disable_warnings()
-        session = requests.Session()
-        ctx = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
-        ctx.options |= 0x4
-        session.mount('https://', CustomHttpAdapter(ctx))
-
-        response = session.get(url, headers=response_headers)
-
-        # Return JSON from response and loop through collections
-        json_result = json.loads(response.content)
-        bin_collections = json_result["BinCollectionDays"]
-        for collection in bin_collections:
-            if collection is not None:
-                bin_type = collection[0].get("BinType")
-                current_collection_date = datetime.strptime(
-                    collection[0].get("CollectionDate"), "%Y-%m-%d"
-                )
-                next_collection_date = datetime.strptime(
-                    collection[0].get("NextScheduledCollectionDate"), "%Y-%m-%d"
-                )
-
-                # Work out the most recent collection date to display
-                if (
-                        datetime.today().date()
-                        <= current_collection_date.date()
-                        < next_collection_date.date()
-                ):
-                    collection_date = current_collection_date
-                else:
-                    collection_date = next_collection_date
-
-                dict_data = {
-                    "type": bin_type,
-                    "collectionDate": collection_date.strftime(date_format),
-                }
-                data["bins"].append(dict_data)
-
-                data["bins"].sort(
-                    key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
-                )
-
-        return data
+        soup = BeautifulSoup(driver.page_source, 'html.parser')
+
+        # Find the <pre> tag that contains the JSON data
+        pre_tag = soup.find('pre')
+
+        if pre_tag:
+            # Extract the text content within the <pre> tag
+
+
+            # Return JSON from response and loop through collections
+            json_result = json.loads(pre_tag.contents[0])
+            bin_collections = json_result["BinCollectionDays"]
+            for collection in bin_collections:
+                if collection is not None:
+                    bin_type = collection[0].get("BinType")
+                    current_collection_date = datetime.strptime(
+                        collection[0].get("CollectionDate"), "%Y-%m-%d"
+                    )
+                    next_collection_date = datetime.strptime(
+                        collection[0].get("NextScheduledCollectionDate"), "%Y-%m-%d"
+                    )
+
+                    # Work out the most recent collection date to display
+                    if (
+                            datetime.today().date()
+                            <= current_collection_date.date()
+                            < next_collection_date.date()
+                    ):
+                        collection_date = current_collection_date
+                    else:
+                        collection_date = next_collection_date
+
+                    dict_data = {
+                        "type": bin_type,
+                        "collectionDate": collection_date.strftime(date_format),
+                    }
+                    data["bins"].append(dict_data)
+
+                    data["bins"].sort(
+                        key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
+                    )
+
+            return data