From 11ec82894540723bbbc44e0c6dc9c0ec7d347bd8 Mon Sep 17 00:00:00 2001 From: skelt0 <16990151+skelt0@users.noreply.github.com> Date: Tue, 19 Dec 2023 21:39:25 +0000 Subject: [PATCH 1/2] feat: Adding West Lindsey District Council --- .../features/validate_council_outputs.feature | 1 + uk_bin_collection/tests/input.json | 12 +- .../councils/WestLindseyDistrictCouncil.py | 104 ++++++++++++++++++ 3 files changed, 115 insertions(+), 2 deletions(-) create mode 100644 uk_bin_collection/uk_bin_collection/councils/WestLindseyDistrictCouncil.py diff --git a/uk_bin_collection/tests/features/validate_council_outputs.feature b/uk_bin_collection/tests/features/validate_council_outputs.feature index 3842fdd41f..cfb27aa495 100644 --- a/uk_bin_collection/tests/features/validate_council_outputs.feature +++ b/uk_bin_collection/tests/features/validate_council_outputs.feature @@ -117,6 +117,7 @@ Feature: Test each council output matches expected results | WaverleyBoroughCouncil | None | None | | WealdenDistrictCouncil | None | None | | WelhatCouncil | None | None | + | WestLindseyDistrictCouncil | None | None | | WestLothianCouncil | http://selenium:4444 | local | | WestSuffolkCouncil | http://selenium:4444 | local | | WiganBoroughCouncil | None | None | diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 89eb452a98..a945f1e6fa 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -255,7 +255,7 @@ "uprn": "200002981143", "url": "https://www.fenland.gov.uk/article/13114/", "wiki_name": "Fenland District Council" - }, + }, "ForestOfDeanDistrictCouncil": { "skip_get_url": true, "house_number": "ELMOGAL, PARKEND ROAD, BREAM, LYDNEY", @@ -504,7 +504,7 @@ "url": "https://www.northumberland.gov.uk/Waste/Bins/Bin-Calendars.aspx", "web_driver": "http://selenium:4444", "wiki_name": "Northumberland Council" - }, + }, "OldhamCouncil": { "url": "https://portal.oldham.gov.uk/bincollectiondates/details?uprn=422000033556", "wiki_name": "Oldham Council", @@ -755,6 +755,14 @@ "url": "https://www.welhat.gov.uk/xfp/form/214", "wiki_name": "Welhat Council" }, + "WestLindseyDistrictCouncil": { + "house_number": "PRIVATE ACCOMODATION", + "postcode": "LN8 2AR", + "skip_get_url": true, + "url": "https://www.west-lindsey.gov.uk/", + "wiki_name": "West Lindsey District Council", + "wiki_note": "Pass the house name/number in the house number parameter, wrapped in double quotes. If a named house or flat, enter this too. If multiple results return, we'll pick the first. You can test it [here](https://www.west-lindsey.gov.uk/bins-waste-recycling/find-your-bin-collection-day)" + }, "WestLothianCouncil": { "house_number": "1 GOSCHEN PLACE", "postcode": "EH52 5JE", diff --git a/uk_bin_collection/uk_bin_collection/councils/WestLindseyDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/WestLindseyDistrictCouncil.py new file mode 100644 index 0000000000..48f29c331d --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/WestLindseyDistrictCouncil.py @@ -0,0 +1,104 @@ +import requests, re, urllib.parse + +from datetime import datetime, timedelta +from bs4 import BeautifulSoup + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import \ + AbstractGetBinDataClass + +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + + data = {"bins": []} + + user_postcode = kwargs.get("postcode") + user_number = kwargs.get("paon") + + user_address = "{} {}".format(user_number,user_postcode) + user_address = urllib.parse.quote(user_address) + + # This first URL checks against a string represenging the users address and returns values used for a second lookup. + stage1_url = "https://wlnk.statmap.co.uk/map/Cluster.svc/findLocation?callback=getAddressesCallback1702938375023&script=%5CCluster%5CCluster.AuroraScript%24&address={}".format(user_address) + + address_data = requests.get(stage1_url).text + + # Strip data and parse the JSON + address_data = json.loads(re.sub('getAddressesCallback[\d]+\(', '', address_data)[:-2]) + + if address_data['TotalHits'] == 0: + raise Exception("No address found for string {}. See Wiki".format(user_address)) + elif address_data['TotalHits'] != 1: + # Multiple hits returned. Lets pick the first one. We could raise an exception here if this causes problems. + pass + + # Pull out the address data needed for the next step + address_id = address_data['Locations'][0]['Id'] + address_x = address_data['Locations'][0]['X'] + address_y = address_data['Locations'][0]['Y'] + + stage2_url = "https://wlnk.statmap.co.uk/map/Cluster.svc/getpage?script=\Cluster\Cluster.AuroraScript$&taskId=bins&format=js&updateOnly=true&query=x%3D{}%3By%3D{}%3Bid%3D{}".format(address_x,address_y,address_id) + + bin_query = requests.get(stage2_url).text + + # Test that what we got is good + if "injectCss" not in bin_query: + raise Exception("Error. Data has not been returned correctly. Please raise an issue on the GitHub page") + + # Return only the HTML contained within the Javascript function payload. + pattern = 'document\.getElementById\("DR1"\)\.innerHTML="(.+)";' + + bin_html = re.findall(pattern, bin_query) + + if len(bin_html) != 1: + # This exception is raised if the regular expression above finds anything other than one expected match. + raise Exception("Incorrect number of matches found during phase 2 search. Please raise an issue on the Github page") + + # Some silly python foo required here to unescape the unicode contained. + bin_html = bin_html[0].encode().decode('unicode-escape') + + soup = BeautifulSoup(bin_html, 'html.parser') + + collection_rows = soup.find("li", {"class": "auroraListItem"}).find_all("li") + + for row in collection_rows: + + # Get bin type + bin_type = row.find("span").text + + # Get bin date + bin_date_text = row.text + pattern = '\d+\/\d+' + bin_dates = re.findall(pattern, bin_date_text) + + input_date_format = "%d/%m" + + for bin_date in bin_dates: + + # The date returned from the webpage only gives DD/MM. So we need to add a year, but we can't simply add this year otherwise we would get it wrong at the end of the year. So we will test to see if the returned date + this year is in the future. If not, add next years date. + bin_dt = datetime.strptime(bin_date, input_date_format) + bin_dt = bin_dt.replace(year = datetime.now().year) + + if bin_dt.date() == datetime.today().date(): # Check if date is today. This is OK + pass + elif bin_dt.date() < datetime.today().date(): # Check if the date is in the past. If so, increment the year + bin_dt = bin_dt.replace(year = bin_dt.year + 1) + elif bin_dt.date() > datetime.today().date(): # Check if date is in the future. This is OK + pass + else: + raise Exception("Date issue has occured. This should never happen. Please raise a bug in GitHub") + + dict_data = { + "type": bin_type, + "collectionDate": bin_dt.strftime(date_format) + } + + data["bins"].append(dict_data) + + return data From 5637974a070c4300e0ddd549446cedbf2cab5ba6 Mon Sep 17 00:00:00 2001 From: skelt0 <16990151+skelt0@users.noreply.github.com> Date: Tue, 19 Dec 2023 21:42:56 +0000 Subject: [PATCH 2/2] feat: Adding West Lindsey District Council --- uk_bin_collection/tests/input.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index a945f1e6fa..288f2e3b4c 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -761,7 +761,7 @@ "skip_get_url": true, "url": "https://www.west-lindsey.gov.uk/", "wiki_name": "West Lindsey District Council", - "wiki_note": "Pass the house name/number in the house number parameter, wrapped in double quotes. If a named house or flat, enter this too. If multiple results return, we'll pick the first. You can test it [here](https://www.west-lindsey.gov.uk/bins-waste-recycling/find-your-bin-collection-day)" + "wiki_note": "Pass the house name/number in the house number parameter, and postcode in the postcode parameter, both wrapped in double quotes. If a named house or flat, enter this in the number field. If multiple results return, we'll pick the first. You can test it [here](https://www.west-lindsey.gov.uk/bins-waste-recycling/find-your-bin-collection-day)" }, "WestLothianCouncil": { "house_number": "1 GOSCHEN PLACE",