diff --git a/uk_bin_collection/tests/features/validate_council_outputs.feature b/uk_bin_collection/tests/features/validate_council_outputs.feature index cf1edd426d..8085efd8c0 100644 --- a/uk_bin_collection/tests/features/validate_council_outputs.feature +++ b/uk_bin_collection/tests/features/validate_council_outputs.feature @@ -15,7 +15,7 @@ Feature: Test each council output matches expected results | BCPCouncil | None | None | | BedfordshireCouncil | None | None | | BexleyCouncil | None | None | - | BlackburnCouncil | None | None | + | BlackburnCouncil | http://selenium:4444 | local | | BoltonCouncil | None | None | | BristolCityCouncil | None | None | | BromleyBoroughCouncil | None | None | diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 2c3700960c..4ba6919e47 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -52,7 +52,8 @@ "uprn": "100010733027", "url": "https://mybins.blackburn.gov.uk/api/mybins/getbincollectiondays?uprn=100010733027&month=8&year=2022", "wiki_command_url_override": "https://www.blackburn.gov.uk", - "wiki_name": "Blackburn Council" + "wiki_name": "Blackburn Council", + "web_driver": "http://selenium:4444" }, "BoltonCouncil": { "skip_get_url": true, diff --git a/uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py index 2da2510c09..ab4a9f45e4 100644 --- a/uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py @@ -1,13 +1,14 @@ import json from collections import OrderedDict from datetime import datetime - +from bs4 import BeautifulSoup import requests from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import \ AbstractGetBinDataClass import ssl import urllib3 +import logging class CustomHttpAdapter (requests.adapters.HTTPAdapter): '''Transport adapter" that allows us to use custom ssl_context.''' @@ -34,63 +35,56 @@ def parse_data(self, page: str, **kwargs) -> dict: data = {"bins": []} uprn = kwargs.get("uprn") + web_driver = kwargs.get("web_driver") current_month = datetime.today().strftime("%m") current_year = datetime.today().strftime("%Y") url = ( f"https://mybins.blackburn.gov.uk/api/mybins/getbincollectiondays?uprn={uprn}&month={current_month}" f"&year={current_year}" ) + driver = create_webdriver(web_driver) + driver.get(url) - # Build request header string, then parse it and get response - response_header_str = ( - "Accept: application/json, text/plain, */*|Accept-Encoding: gzip, deflate, " - "br|Accept-Language: en-GB,en;q=0.9|Connection: keep-alive|Host: " - "mybins.blackburn.gov.uk|Referer: " - "https://mybins.blackburn.gov.uk/calendar/MTAwMDEwNzUwNzQy|Sec-Fetch-Dest: " - "empty|Sec-Fetch-Mode: cors|Sec-Fetch-Site: same-origin|Sec-GPC: 1|User-Agent: " - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " - "Chrome/103.0.5060.134 Safari/537.36 " - ) - response_headers = parse_header(response_header_str) - requests.packages.urllib3.disable_warnings() - session = requests.Session() - ctx = ssl.create_default_context(ssl.Purpose.SERVER_AUTH) - ctx.options |= 0x4 - session.mount('https://', CustomHttpAdapter(ctx)) - - response = session.get(url, headers=response_headers) - - # Return JSON from response and loop through collections - json_result = json.loads(response.content) - bin_collections = json_result["BinCollectionDays"] - for collection in bin_collections: - if collection is not None: - bin_type = collection[0].get("BinType") - current_collection_date = datetime.strptime( - collection[0].get("CollectionDate"), "%Y-%m-%d" - ) - next_collection_date = datetime.strptime( - collection[0].get("NextScheduledCollectionDate"), "%Y-%m-%d" - ) - - # Work out the most recent collection date to display - if ( - datetime.today().date() - <= current_collection_date.date() - < next_collection_date.date() - ): - collection_date = current_collection_date - else: - collection_date = next_collection_date - - dict_data = { - "type": bin_type, - "collectionDate": collection_date.strftime(date_format), - } - data["bins"].append(dict_data) - - data["bins"].sort( - key=lambda x: datetime.strptime(x.get("collectionDate"), date_format) - ) - - return data + soup = BeautifulSoup(driver.page_source, 'html.parser') + + # Find the
tag that contains the JSON data + pre_tag = soup.find('pre') + + if pre_tag: + # Extract the text content within thetag + + + # Return JSON from response and loop through collections + json_result = json.loads(pre_tag.contents[0]) + bin_collections = json_result["BinCollectionDays"] + for collection in bin_collections: + if collection is not None: + bin_type = collection[0].get("BinType") + current_collection_date = datetime.strptime( + collection[0].get("CollectionDate"), "%Y-%m-%d" + ) + next_collection_date = datetime.strptime( + collection[0].get("NextScheduledCollectionDate"), "%Y-%m-%d" + ) + + # Work out the most recent collection date to display + if ( + datetime.today().date() + <= current_collection_date.date() + < next_collection_date.date() + ): + collection_date = current_collection_date + else: + collection_date = next_collection_date + + dict_data = { + "type": bin_type, + "collectionDate": collection_date.strftime(date_format), + } + data["bins"].append(dict_data) + + data["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), date_format) + ) + + return data