From 6984183c4c0d71ccd1e2c485db5dac4a73783909 Mon Sep 17 00:00:00 2001 From: kingamajick Date: Thu, 28 Dec 2023 20:17:01 +0000 Subject: [PATCH] feat: Add Haringey Council. --- .../features/validate_council_outputs.feature | 1 + uk_bin_collection/tests/input.json | 9 +++- .../councils/HaringeyCouncil.py | 47 +++++++++++++++++++ wiki/Councils.md | 13 +++++ 4 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 uk_bin_collection/uk_bin_collection/councils/HaringeyCouncil.py diff --git a/uk_bin_collection/tests/features/validate_council_outputs.feature b/uk_bin_collection/tests/features/validate_council_outputs.feature index 18020726c3..e3c44466c0 100644 --- a/uk_bin_collection/tests/features/validate_council_outputs.feature +++ b/uk_bin_collection/tests/features/validate_council_outputs.feature @@ -51,6 +51,7 @@ Feature: Test each council output matches expected results | GlasgowCityCouncil | None | None | | GuildfordCouncil | None | None | | HaltonBoroughCouncil | http://selenium:4444 | local | + | HaringeyCouncil | None | None | | HarrogateBoroughCouncil | None | None | | HighPeakCouncil | http://selenium:4444 | local | | HuntingdonDistrictCouncil | None | None | diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 48816f6874..64c98894bb 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -46,7 +46,7 @@ "web_driver": "http://selenium:4444", "postcode": "DA5 3AH", "uprn": "100020196143", - "house_number":"1 Dorchester Avenue, Bexley", + "house_number": "1 Dorchester Avenue, Bexley", "wiki_name": "Bexley Council", "wiki_note": "In order to use this parser, you will need to sign up to [Bexley's @Home app](https://www.bexley.gov.uk/services/rubbish-and-recycling/bexley-home-recycling-app/about-app) (available for [iOS](https://apps.apple.com/gb/app/home-collection-reminder/id1050703690) and [Android](https://play.google.com/store/apps/details?id=com.contender.athome.android)).\nComplete the setup by entering your email and setting your address with postcode and address line.\nOnce you can see the calendar, you _should_ be good to run the parser.\nJust pass the email you used in quotes in the UPRN parameter.\n" }, @@ -310,6 +310,13 @@ "wiki_note": "Pass the House number and post code", "web_driver": "http://selenium:4444" }, + "HaringeyCouncil": { + "skip_get_url": true, + "uprn": "100021203052", + "url": " https://wastecollections.haringey.gov.uk/property", + "wiki_name": "Haringey Council", + "wiki_note": "Pass the UPRN which can be found at https://wastecollections.haringey.gov.uk/property/{uprn}." + }, "HarrogateBoroughCouncil": { "skip_get_url": true, "uprn": "100050414307", diff --git a/uk_bin_collection/uk_bin_collection/councils/HaringeyCouncil.py b/uk_bin_collection/uk_bin_collection/councils/HaringeyCouncil.py new file mode 100644 index 0000000000..14e8cc87da --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/HaringeyCouncil.py @@ -0,0 +1,47 @@ +from bs4 import BeautifulSoup + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + + +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + data = {"bins": []} + + uprn = kwargs.get("uprn") + check_uprn(uprn) + + response = requests.post( + f"https://wastecollections.haringey.gov.uk/property/{uprn}" + ) + if response.status_code != 200: + raise ConnectionAbortedError("Issue encountered getting addresses.") + + soup = BeautifulSoup(response.text, features="html.parser") + soup.prettify() + + sections = soup.find_all("div", {"class": "property-service-wrapper"}) + + date_regex = re.compile(r"\d{2}/\d{2}/\d{4}") + for section in sections: + service = section.find("h3", {"class": "service-name"}).text + next_collection = ( + section.find("tbody") + .find("td", {"class": "next-service"}) + .find(text=date_regex) + ) + # Remove Collect and Collect Paid from the start of some bin entry names + # to make the naming more consistant. + dict_data = { + "type": service.replace("Collect ", "").replace("Paid ", "").strip(), + "collectionDate": next_collection.strip(), + } + data["bins"].append(dict_data) + + return data diff --git a/wiki/Councils.md b/wiki/Councils.md index 6c93854d38..5075389a8c 100644 --- a/wiki/Councils.md +++ b/wiki/Councils.md @@ -50,6 +50,7 @@ This document is still a work in progress, don't worry if your council isn't lis - [Gateshead Council](#gateshead-council) - [Glasgow City Council](#glasgow-city-council) - [Guildford Council](#guildford-council) +- [Haringey Council](#haringey-council) - [Harrogate Borough Council](#harrogate-borough-council) - [High Peak Council](#high-peak-council) - [Huntingdon District Council](#huntingdon-district-council) @@ -570,6 +571,18 @@ Note: If the bin day is 'today' then the collectionDate will only show today's d --- +### Haringey Council +```commandline +python collect_data.py HaringeyCouncil https://wastecollections.haringey.gov.uk/property -s -u XXXXXXXX +``` +Additional parameters: +- `-s` - skip get URL +- `-u` - UPRN + +Note: Pass the UPRN which can be found at https://wastecollections.haringey.gov.uk/property/{uprn}. + +--- + ### Harrogate Borough Council ```commandline python collect_data.py HarrogateBoroughCouncil https://secure.harrogate.gov.uk/inmyarea -s -u XXXXXXXX