From 5460fe2e6d13111e7c3bf4e49e696a36c603c220 Mon Sep 17 00:00:00 2001 From: m26dvd <31007572+m26dvd@users.noreply.github.com> Date: Wed, 20 Nov 2024 15:39:01 +0000 Subject: [PATCH 1/7] feat: Adding North Hertfordshire District Council fix: #1028 --- uk_bin_collection/tests/input.json | 7 ++ .../NorthHertfordshireDistrictCouncil.py | 93 +++++++++++++++++++ wiki/Councils.md | 13 +++ 3 files changed, 113 insertions(+) create mode 100644 uk_bin_collection/uk_bin_collection/councils/NorthHertfordshireDistrictCouncil.py diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index fb5204eb3e..f388d80ba7 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -1142,6 +1142,13 @@ "wiki_name": "North East Lincolnshire Council", "wiki_note": "Replace XXXXXXXX with your UPRN." }, + "NorthHertfordshireDistrictCouncil": { + "house_number": "2", + "postcode": "SG6 4BJ", + "url": "https://www.north-herts.gov.uk", + "wiki_name": "North Hertfordshire District Council", + "wiki_note": "Pass the house number and postcode in their respective parameters." + }, "NorthKestevenDistrictCouncil": { "url": "https://www.n-kesteven.org.uk/bins/display?uprn=100030869513", "wiki_command_url_override": "https://www.n-kesteven.org.uk/bins/display?uprn=XXXXXXXX", diff --git a/uk_bin_collection/uk_bin_collection/councils/NorthHertfordshireDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/NorthHertfordshireDistrictCouncil.py new file mode 100644 index 0000000000..8517f8c825 --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/NorthHertfordshireDistrictCouncil.py @@ -0,0 +1,93 @@ +import requests +from bs4 import BeautifulSoup + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + + user_postcode = kwargs.get("postcode") + user_paon = kwargs.get("paon") + check_postcode(user_postcode) + check_paon(user_paon) + bindata = {"bins": []} + + URI = "https://uhtn-wrp.whitespacews.com/" + + session = requests.Session() + + # get link from first page as has some kind of unique hash + r = session.get( + URI, + ) + r.raise_for_status() + soup = BeautifulSoup(r.text, features="html.parser") + + alink = soup.find("a", text="Find my bin collection day") + + if alink is None: + raise Exception("Initial page did not load correctly") + + # greplace 'seq' query string to skip next step + nextpageurl = alink["href"].replace("seq=1", "seq=2") + + data = { + "address_name_number": user_paon, + "address_postcode": user_postcode, + } + + # get list of addresses + r = session.post(nextpageurl, data) + r.raise_for_status() + + soup = BeautifulSoup(r.text, features="html.parser") + + # get first address (if you don't enter enough argument values this won't find the right address) + alink = soup.find("div", id="property_list").find("a") + + if alink is None: + raise Exception("Address not found") + + nextpageurl = URI + alink["href"] + + # get collection page + r = session.get( + nextpageurl, + ) + r.raise_for_status() + soup = BeautifulSoup(r.text, features="html.parser") + + if soup.find("span", id="waste-hint"): + raise Exception("No scheduled services at this address") + + u1s = soup.find("section", id="scheduled-collections").find_all("u1") + + for u1 in u1s: + lis = u1.find_all("li", recursive=False) + + date = lis[1].text.replace("\n", "") + bin_type = lis[2].text.replace("\n", "") + + dict_data = { + "type": bin_type, + "collectionDate": datetime.strptime( + date, + "%d/%m/%Y", + ).strftime(date_format), + } + bindata["bins"].append(dict_data) + + bindata["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), date_format) + ) + + return bindata diff --git a/wiki/Councils.md b/wiki/Councils.md index 4dddfe2247..f5d5679e72 100644 --- a/wiki/Councils.md +++ b/wiki/Councils.md @@ -156,6 +156,7 @@ This document is still a work in progress, don't worry if your council isn't lis - [North Ayrshire Council](#north-ayrshire-council) - [North East Derbyshire District Council](#north-east-derbyshire-district-council) - [North East Lincolnshire Council](#north-east-lincolnshire-council) +- [North Hertfordshire District Council](#north-hertfordshire-district-council) - [North Kesteven District Council](#north-kesteven-district-council) - [North Lanarkshire Council](#north-lanarkshire-council) - [North Lincolnshire Council](#north-lincolnshire-council) @@ -2036,6 +2037,18 @@ Note: Replace XXXXXXXX with your UPRN. --- +### North Hertfordshire District Council +```commandline +python collect_data.py NorthHertfordshireDistrictCouncil https://www.north-herts.gov.uk -p "XXXX XXX" -n XX +``` +Additional parameters: +- `-p` - postcode +- `-n` - house number + +Note: Pass the house number and postcode in their respective parameters. + +--- + ### North Kesteven District Council ```commandline python collect_data.py NorthKestevenDistrictCouncil https://www.n-kesteven.org.uk/bins/display?uprn=XXXXXXXX From c1775eda3c551b45aaaccb56fe27fc888305676a Mon Sep 17 00:00:00 2001 From: m26dvd <31007572+m26dvd@users.noreply.github.com> Date: Wed, 20 Nov 2024 15:47:39 +0000 Subject: [PATCH 2/7] feat: Adding Cumberland Council (Allerdale District) fix: #1029 --- uk_bin_collection/tests/input.json | 7 ++ .../councils/CumberlandAllerdaleCouncil.py | 93 +++++++++++++++++++ wiki/Councils.md | 13 +++ 3 files changed, 113 insertions(+) create mode 100644 uk_bin_collection/uk_bin_collection/councils/CumberlandAllerdaleCouncil.py diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index f388d80ba7..02d60cd86b 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -468,6 +468,13 @@ "wiki_name": "Croydon Council", "wiki_note": "Pass the house number and postcode in their respective parameters." }, + "CumberlandAllerdaleCouncil": { + "house_number": "2", + "postcode": "CA13 0DE", + "url": "https://www.allerdale.gov.uk", + "wiki_name": "Cumberland Council - Allerdale District", + "wiki_note": "Pass the house number and postcode in their respective parameters." + }, "DacorumBoroughCouncil": { "house_number": "13", "postcode": "HP3 9JY", diff --git a/uk_bin_collection/uk_bin_collection/councils/CumberlandAllerdaleCouncil.py b/uk_bin_collection/uk_bin_collection/councils/CumberlandAllerdaleCouncil.py new file mode 100644 index 0000000000..c50d61cc2b --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/CumberlandAllerdaleCouncil.py @@ -0,0 +1,93 @@ +import requests +from bs4 import BeautifulSoup + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + + user_postcode = kwargs.get("postcode") + user_paon = kwargs.get("paon") + check_postcode(user_postcode) + check_paon(user_paon) + bindata = {"bins": []} + + URI = "https://abc-wrp.whitespacews.com/" + + session = requests.Session() + + # get link from first page as has some kind of unique hash + r = session.get( + URI, + ) + r.raise_for_status() + soup = BeautifulSoup(r.text, features="html.parser") + + alink = soup.find("a", text="View My Collections") + + if alink is None: + raise Exception("Initial page did not load correctly") + + # greplace 'seq' query string to skip next step + nextpageurl = alink["href"].replace("seq=1", "seq=2") + + data = { + "address_name_number": user_paon, + "address_postcode": user_postcode, + } + + # get list of addresses + r = session.post(nextpageurl, data) + r.raise_for_status() + + soup = BeautifulSoup(r.text, features="html.parser") + + # get first address (if you don't enter enough argument values this won't find the right address) + alink = soup.find("div", id="property_list").find("a") + + if alink is None: + raise Exception("Address not found") + + nextpageurl = URI + alink["href"] + + # get collection page + r = session.get( + nextpageurl, + ) + r.raise_for_status() + soup = BeautifulSoup(r.text, features="html.parser") + + if soup.find("span", id="waste-hint"): + raise Exception("No scheduled services at this address") + + u1s = soup.find("section", id="scheduled-collections").find_all("u1") + + for u1 in u1s: + lis = u1.find_all("li", recursive=False) + + date = lis[1].text.replace("\n", "") + bin_type = lis[2].text.replace("\n", "") + + dict_data = { + "type": bin_type, + "collectionDate": datetime.strptime( + date, + "%d/%m/%Y", + ).strftime(date_format), + } + bindata["bins"].append(dict_data) + + bindata["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), date_format) + ) + + return bindata diff --git a/wiki/Councils.md b/wiki/Councils.md index f5d5679e72..a51a53e4dc 100644 --- a/wiki/Councils.md +++ b/wiki/Councils.md @@ -70,6 +70,7 @@ This document is still a work in progress, don't worry if your council isn't lis - [Cotswold District Council](#cotswold-district-council) - [Crawley Borough Council](#crawley-borough-council) - [Croydon Council](#croydon-council) +- [Cumberland Council - Allerdale District](#cumberland-council---allerdale-district) - [Dacorum Borough Council](#dacorum-borough-council) - [Dartford Borough Council](#dartford-borough-council) - [Derby City Council](#derby-city-council) @@ -1001,6 +1002,18 @@ Note: Pass the house number and postcode in their respective parameters. --- +### Cumberland Council - Allerdale District +```commandline +python collect_data.py CumberlandAllerdaleCouncil https://www.allerdale.gov.uk -p "XXXX XXX" -n XX +``` +Additional parameters: +- `-p` - postcode +- `-n` - house number + +Note: Pass the house number and postcode in their respective parameters. + +--- + ### Dacorum Borough Council ```commandline python collect_data.py DacorumBoroughCouncil https://webapps.dacorum.gov.uk/bincollections/ -s -p "XXXX XXX" -n XX -w http://HOST:PORT/ From 0254a94fdd95da80fe6a501f505fac50741c3390 Mon Sep 17 00:00:00 2001 From: m26dvd <31007572+m26dvd@users.noreply.github.com> Date: Wed, 20 Nov 2024 16:02:37 +0000 Subject: [PATCH 3/7] fix: Mid Siffolk and Babergh Garden Collection Day fix: #1026 This will require the use of a DAY to be added to the UPRN field --- uk_bin_collection/tests/input.json | 6 ++++-- .../councils/BaberghDistrictCouncil.py | 4 +++- .../councils/MidSuffolkDistrictCouncil.py | 4 +++- wiki/Councils.md | 10 ++++++---- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 02d60cd86b..da57fa9d8c 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -88,9 +88,10 @@ "skip_get_url": true, "house_number": "Monday", "postcode": "Week 1", + "uprn": "Tuesday", "url": "https://www.babergh.gov.uk", "wiki_name": "Babergh District Council", - "wiki_note": "Use the House Number field to pass the DAY of the week for your collections. Monday/Tuesday/Wednesday/Thursday/Friday. [OPTIONAL] Use the 'postcode' field to pass the WEEK for your garden collection. [Week 1/Week 2]" + "wiki_note": "Use the House Number field to pass the DAY of the week for your NORMAL collections. [Monday/Tuesday/Wednesday/Thursday/Friday]. [OPTIONAL] Use the 'postcode' field to pass the WEEK for your garden collection. [Week 1/Week 2]. [OPTIONAL] Use the 'uprn' field to pass the DAY for your garden collection. [Monday/Tuesday/Wednesday/Thursday/Friday]" }, "BCPCouncil": { "skip_get_url": true, @@ -1054,9 +1055,10 @@ "skip_get_url": true, "house_number": "Monday", "postcode": "Week 2", + "uprn": "Monday", "url": "https://www.midsuffolk.gov.uk", "wiki_name": "Mid Suffolk District Council", - "wiki_note": "Use the House Number field to pass the DAY of the week for your collections. Monday/Tuesday/Wednesday/Thursday/Friday. [OPTIONAL] Use the 'postcode' field to pass the WEEK for your garden collection. [Week 1/Week 2]" + "wiki_note": "Use the House Number field to pass the DAY of the week for your NORMAL collections. [Monday/Tuesday/Wednesday/Thursday/Friday]. [OPTIONAL] Use the 'postcode' field to pass the WEEK for your garden collection. [Week 1/Week 2]. [OPTIONAL] Use the 'uprn' field to pass the DAY for your garden collection. [Monday/Tuesday/Wednesday/Thursday/Friday]" }, "MidSussexDistrictCouncil": { "house_number": "OAKLANDS, OAKLANDS ROAD RH16 1SS", diff --git a/uk_bin_collection/uk_bin_collection/councils/BaberghDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BaberghDistrictCouncil.py index cc178d9bfb..cd67829d44 100644 --- a/uk_bin_collection/uk_bin_collection/councils/BaberghDistrictCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/BaberghDistrictCouncil.py @@ -24,6 +24,7 @@ def parse_data(self, page: str, **kwargs) -> dict: collection_day = kwargs.get("paon") garden_collection_week = kwargs.get("postcode") + garden_collection_day = kwargs.get("uprn") bindata = {"bins": []} days_of_week = [ @@ -42,6 +43,7 @@ def parse_data(self, page: str, **kwargs) -> dict: recyclingstartDate = datetime(2024, 11, 11) offset_days = days_of_week.index(collection_day) + offset_days_garden = days_of_week.index(garden_collection_day) if garden_collection_week: garden_collection = garden_week.index(garden_collection_week) @@ -155,7 +157,7 @@ def parse_data(self, page: str, **kwargs) -> dict: collection_date = ( datetime.strptime(gardenDate, "%d/%m/%Y") - + timedelta(days=offset_days) + + timedelta(days=offset_days_garden) ).strftime("%d/%m/%Y") garden_holiday = next( diff --git a/uk_bin_collection/uk_bin_collection/councils/MidSuffolkDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/MidSuffolkDistrictCouncil.py index a9e7d1458e..670003639a 100644 --- a/uk_bin_collection/uk_bin_collection/councils/MidSuffolkDistrictCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/MidSuffolkDistrictCouncil.py @@ -24,6 +24,7 @@ def parse_data(self, page: str, **kwargs) -> dict: collection_day = kwargs.get("paon") garden_collection_week = kwargs.get("postcode") + garden_collection_day = kwargs.get("uprn") bindata = {"bins": []} days_of_week = [ @@ -42,6 +43,7 @@ def parse_data(self, page: str, **kwargs) -> dict: recyclingstartDate = datetime(2024, 11, 4) offset_days = days_of_week.index(collection_day) + offset_days_garden = days_of_week.index(garden_collection_day) if garden_collection_week: garden_collection = garden_week.index(garden_collection_week) @@ -155,7 +157,7 @@ def parse_data(self, page: str, **kwargs) -> dict: collection_date = ( datetime.strptime(gardenDate, "%d/%m/%Y") - + timedelta(days=offset_days) + + timedelta(days=offset_days_garden) ).strftime("%d/%m/%Y") garden_holiday = next( diff --git a/wiki/Councils.md b/wiki/Councils.md index a51a53e4dc..c7cefee6fe 100644 --- a/wiki/Councils.md +++ b/wiki/Councils.md @@ -397,14 +397,15 @@ Note: To get the UPRN, please use [FindMyAddress](https://www.findmyaddress.co.u ### Babergh District Council ```commandline -python collect_data.py BaberghDistrictCouncil https://www.babergh.gov.uk -s -p "XXXX XXX" -n XX +python collect_data.py BaberghDistrictCouncil https://www.babergh.gov.uk -s -u XXXXXXXX -p "XXXX XXX" -n XX ``` Additional parameters: - `-s` - skip get URL +- `-u` - UPRN - `-p` - postcode - `-n` - house number -Note: Use the House Number field to pass the DAY of the week for your collections. Monday/Tuesday/Wednesday/Thursday/Friday. [OPTIONAL] Use the 'postcode' field to pass the WEEK for your garden collection. [Week 1/Week 2] +Note: Use the House Number field to pass the DAY of the week for your NORMAL collections. [Monday/Tuesday/Wednesday/Thursday/Friday]. [OPTIONAL] Use the 'postcode' field to pass the WEEK for your garden collection. [Week 1/Week 2]. [OPTIONAL] Use the 'uprn' field to pass the DAY for your garden collection. [Monday/Tuesday/Wednesday/Thursday/Friday] --- @@ -1895,14 +1896,15 @@ Note: Pass the house name/number wrapped in double quotes along with the postcod ### Mid Suffolk District Council ```commandline -python collect_data.py MidSuffolkDistrictCouncil https://www.midsuffolk.gov.uk -s -p "XXXX XXX" -n XX +python collect_data.py MidSuffolkDistrictCouncil https://www.midsuffolk.gov.uk -s -u XXXXXXXX -p "XXXX XXX" -n XX ``` Additional parameters: - `-s` - skip get URL +- `-u` - UPRN - `-p` - postcode - `-n` - house number -Note: Use the House Number field to pass the DAY of the week for your collections. Monday/Tuesday/Wednesday/Thursday/Friday. [OPTIONAL] Use the 'postcode' field to pass the WEEK for your garden collection. [Week 1/Week 2] +Note: Use the House Number field to pass the DAY of the week for your NORMAL collections. [Monday/Tuesday/Wednesday/Thursday/Friday]. [OPTIONAL] Use the 'postcode' field to pass the WEEK for your garden collection. [Week 1/Week 2]. [OPTIONAL] Use the 'uprn' field to pass the DAY for your garden collection. [Monday/Tuesday/Wednesday/Thursday/Friday] --- From 5c22feb5f6c32714f36f6749006fecb852ba8950 Mon Sep 17 00:00:00 2001 From: m26dvd <31007572+m26dvd@users.noreply.github.com> Date: Wed, 20 Nov 2024 16:16:39 +0000 Subject: [PATCH 4/7] feat: Add Garden collection to EnvironmentFirst fix: #1025 --- .../uk_bin_collection/councils/EnvironmentFirst.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/uk_bin_collection/uk_bin_collection/councils/EnvironmentFirst.py b/uk_bin_collection/uk_bin_collection/councils/EnvironmentFirst.py index 373f4ca912..cf0eb20f7c 100644 --- a/uk_bin_collection/uk_bin_collection/councils/EnvironmentFirst.py +++ b/uk_bin_collection/uk_bin_collection/councils/EnvironmentFirst.py @@ -1,4 +1,5 @@ from bs4 import BeautifulSoup + from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass @@ -45,4 +46,17 @@ def parse_data(self, page: str, **kwargs) -> dict: } data["bins"].append(dict_data) + if len(page_text) > 5: + garden_day = datetime.strptime( + remove_ordinal_indicator_from_date_string( + page_text[6].find_next("strong").text + ), + "%d %B %Y", + ).strftime(date_format) + dict_data = { + "type": "Garden", + "collectionDate": garden_day, + } + data["bins"].append(dict_data) + return data From da60317b18e0bcecad8292d3acbb08c887b2c755 Mon Sep 17 00:00:00 2001 From: m26dvd <31007572+m26dvd@users.noreply.github.com> Date: Wed, 20 Nov 2024 20:50:57 +0000 Subject: [PATCH 5/7] feat: Adding London Borough of Havering fix: #769 --- uk_bin_collection/tests/input.json | 6 ++ .../councils/LondonBoroughHavering.py | 75 +++++++++++++++++++ wiki/Councils.md | 12 +++ 3 files changed, 93 insertions(+) create mode 100644 uk_bin_collection/uk_bin_collection/councils/LondonBoroughHavering.py diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index da57fa9d8c..fc5c8b5924 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -958,6 +958,12 @@ "wiki_name": "London Borough Harrow", "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN." }, + "LondonBoroughHavering": { + "url": "https://www.havering.gov.uk", + "uprn": "100021380730", + "wiki_name": "London Borough Havering", + "wiki_note": "Pass the UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search)." + }, "LondonBoroughHounslow": { "skip_get_url": true, "uprn": "100021577765", diff --git a/uk_bin_collection/uk_bin_collection/councils/LondonBoroughHavering.py b/uk_bin_collection/uk_bin_collection/councils/LondonBoroughHavering.py new file mode 100644 index 0000000000..e0daae44b2 --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/LondonBoroughHavering.py @@ -0,0 +1,75 @@ +import time + +import requests +from bs4 import BeautifulSoup + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + + user_uprn = kwargs.get("uprn") + check_uprn(user_uprn) + bindata = {"bins": []} + + URI = "https://lbhapiprod.azure-api.net" + endpoint = f"{URI}/whitespace/GetCollectionByUprnAndDate" + subscription_key = "2ea6a75f9ea34bb58d299a0c9f84e72e" + + # Get today's date in 'YYYY-MM-DD' format + collection_date = datetime.now().strftime("%Y-%m-%d") + + # Define the request headers + headers = { + "Content-Type": "application/json", + "Ocp-Apim-Subscription-Key": subscription_key, + } + + # Define the request body + data = { + "getCollectionByUprnAndDate": { + "getCollectionByUprnAndDateInput": { + "uprn": user_uprn, + "nextCollectionFromDate": collection_date, + } + } + } + # Make the POST request + response = requests.post(endpoint, headers=headers, data=json.dumps(data)) + response.raise_for_status() # Raise an exception for HTTP errors + + # Parse the JSON response + response_data = response.json() + + collections = ( + response_data.get("getCollectionByUprnAndDateResponse", {}) + .get("getCollectionByUprnAndDateResult", {}) + .get("Collections", []) + ) + + for collection in collections: + bin_type = collection["service"] + collection_date = collection["date"] + + dict_data = { + "type": bin_type, + "collectionDate": datetime.strptime( + collection_date, + "%d/%m/%Y %H:%M:%S", + ).strftime(date_format), + } + bindata["bins"].append(dict_data) + bindata["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), date_format) + ) + + return bindata diff --git a/wiki/Councils.md b/wiki/Councils.md index c7cefee6fe..1d7b29fd94 100644 --- a/wiki/Councils.md +++ b/wiki/Councils.md @@ -131,6 +131,7 @@ This document is still a work in progress, don't worry if your council isn't lis - [Liverpool City Council](#liverpool-city-council) - [London Borough Ealing](#london-borough-ealing) - [London Borough Harrow](#london-borough-harrow) +- [London Borough Havering](#london-borough-havering) - [London Borough Hounslow](#london-borough-hounslow) - [London Borough Lambeth](#london-borough-lambeth) - [London Borough Redbridge](#london-borough-redbridge) @@ -1741,6 +1742,17 @@ Note: You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/searc --- +### London Borough Havering +```commandline +python collect_data.py LondonBoroughHavering https://www.havering.gov.uk -u XXXXXXXX +``` +Additional parameters: +- `-u` - UPRN + +Note: Pass the UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search). + +--- + ### London Borough Hounslow ```commandline python collect_data.py LondonBoroughHounslow https://www.hounslow.gov.uk/homepage/86/recycling_and_waste_collection_day_finder -s -u XXXXXXXX From 035fea1a33be74a5865ff181cc4a7876b5761a79 Mon Sep 17 00:00:00 2001 From: m26dvd <31007572+m26dvd@users.noreply.github.com> Date: Wed, 20 Nov 2024 21:31:26 +0000 Subject: [PATCH 6/7] feat: Adding Newcastle Under Lyme Council fix: #778 --- uk_bin_collection/tests/input.json | 6 ++ .../councils/NewcastleUnderLymeCouncil.py | 66 +++++++++++++++++++ wiki/Councils.md | 12 ++++ 3 files changed, 84 insertions(+) create mode 100644 uk_bin_collection/uk_bin_collection/councils/NewcastleUnderLymeCouncil.py diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index fc5c8b5924..6bfaf09308 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -1119,6 +1119,12 @@ "wiki_name": "Newcastle City Council", "wiki_note": "Replace XXXXXXXX with your UPRN." }, + "NewcastleUnderLymeCouncil": { + "url": "https://www.newcastle-staffs.gov.uk", + "uprn": "100031725433", + "wiki_name": "Newcastle Under Lyme Council", + "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find your UPRN." + }, "NewhamCouncil": { "skip_get_url": true, "url": "https://bincollection.newham.gov.uk/Details/Index/000046029461", diff --git a/uk_bin_collection/uk_bin_collection/councils/NewcastleUnderLymeCouncil.py b/uk_bin_collection/uk_bin_collection/councils/NewcastleUnderLymeCouncil.py new file mode 100644 index 0000000000..274d23985e --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/NewcastleUnderLymeCouncil.py @@ -0,0 +1,66 @@ +import requests +from bs4 import BeautifulSoup +from dateutil.relativedelta import relativedelta + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + + user_uprn = kwargs.get("uprn") + check_uprn(user_uprn) + bindata = {"bins": []} + + URI = f"https://www.newcastle-staffs.gov.uk/homepage/97/check-your-bin-day?uprn={user_uprn}" + + # Make the GET request + response = requests.get(URI) + response.raise_for_status() + soup = BeautifulSoup(response.text, features="html.parser") + soup.prettify() + + # Find the table + table = soup.find("table", {"class": "data-table"}) + + if table: + rows = table.find("tbody").find_all("tr") + for row in rows: + date = datetime.strptime( + ( + row.find_all("td")[0] + .get_text(strip=True) + .replace("Date:", "") + .strip() + ), + "%A %d %B", + ).replace(year=datetime.now().year) + if datetime.now().month > 10 and date.month < 3: + date = date + relativedelta(years=1) + bin_types = ( + row.find_all("td")[1] + .text.replace("Collection Type:", "") + .splitlines() + ) + for bin_type in bin_types: + bin_type = bin_type.strip() + if bin_type: + dict_data = { + "type": bin_type.strip(), + "collectionDate": date.strftime("%d/%m/%Y"), + } + bindata["bins"].append(dict_data) + + bindata["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y") + ) + + return bindata diff --git a/wiki/Councils.md b/wiki/Councils.md index 1d7b29fd94..543909afd4 100644 --- a/wiki/Councils.md +++ b/wiki/Councils.md @@ -153,6 +153,7 @@ This document is still a work in progress, don't worry if your council isn't lis - [New Forest Council](#new-forest-council) - [Newark and Sherwood District Council](#newark-and-sherwood-district-council) - [Newcastle City Council](#newcastle-city-council) +- [Newcastle Under Lyme Council](#newcastle-under-lyme-council) - [Newham Council](#newham-council) - [Newport City Council](#newport-city-council) - [North Ayrshire Council](#north-ayrshire-council) @@ -2004,6 +2005,17 @@ Note: Replace XXXXXXXX with your UPRN. --- +### Newcastle Under Lyme Council +```commandline +python collect_data.py NewcastleUnderLymeCouncil https://www.newcastle-staffs.gov.uk -u XXXXXXXX +``` +Additional parameters: +- `-u` - UPRN + +Note: You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find your UPRN. + +--- + ### Newham Council ```commandline python collect_data.py NewhamCouncil https://bincollection.newham.gov.uk/Details/Index/XXXXXXXXXXX -s From 3bdfc5dddd327c5d4f270e2bbde3ba3e09eca8ae Mon Sep 17 00:00:00 2001 From: m26dvd <31007572+m26dvd@users.noreply.github.com> Date: Wed, 20 Nov 2024 21:44:46 +0000 Subject: [PATCH 7/7] feat: Adding Hartlepool Borough Council fix: #844 --- uk_bin_collection/tests/input.json | 6 ++ .../councils/HartlepoolBoroughCouncil.py | 83 +++++++++++++++++++ wiki/Councils.md | 12 +++ 3 files changed, 101 insertions(+) create mode 100644 uk_bin_collection/uk_bin_collection/councils/HartlepoolBoroughCouncil.py diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 6bfaf09308..d1053ccbc8 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -818,6 +818,12 @@ "wiki_name": "Harrogate Borough Council", "wiki_note": "Pass the UPRN, which can be found at [this site](https://secure.harrogate.gov.uk/inmyarea). URL doesn't need to be passed." }, + "HartlepoolBoroughCouncil": { + "url": "https://www.hartlepool.gov.uk", + "uprn": "100110019551", + "wiki_name": "Hartlepool Borough Council", + "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find your UPRN." + }, "HertsmereBoroughCouncil": { "house_number": "1", "postcode": "WD7 9HZ", diff --git a/uk_bin_collection/uk_bin_collection/councils/HartlepoolBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/HartlepoolBoroughCouncil.py new file mode 100644 index 0000000000..9be7711d61 --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/HartlepoolBoroughCouncil.py @@ -0,0 +1,83 @@ +import time + +import requests +from bs4 import BeautifulSoup + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + + user_uprn = kwargs.get("uprn") + check_uprn(user_uprn) + bindata = {"bins": []} + + SESSION_URL = "https://online.hartlepool.gov.uk/authapi/isauthenticated?uri=https%253A%252F%252Fonline.hartlepool.gov.uk%252Fservice%252FRefuse_and_recycling___check_bin_day&hostname=online.hartlepool.gov.uk&withCredentials=true" + + API_URL = "https://online.hartlepool.gov.uk/apibroker/runLookup" + + headers = { + "Content-Type": "application/json", + "Accept": "application/json", + "User-Agent": "Mozilla/5.0", + "X-Requested-With": "XMLHttpRequest", + "Referer": "https://online.hartlepool.gov.uk/fillform/?iframe_id=fillform-frame-1&db_id=", + } + s = requests.session() + r = s.get(SESSION_URL) + r.raise_for_status() + session_data = r.json() + sid = session_data["auth-session"] + params = { + "id": "5ec67e019ffdd", + "repeat_against": "", + "noRetry": "true", + "getOnlyTokens": "undefined", + "log_id": "", + "app_name": "AF-Renderer::Self", + # unix_timestamp + "_": str(int(time.time() * 1000)), + "sid": sid, + } + + data = { + "formValues": { + "Section 1": { + "collectionLocationUPRN": { + "value": user_uprn, + }, + }, + }, + } + + r = s.post(API_URL, json=data, headers=headers, params=params) + r.raise_for_status() + + data = r.json() + rows_data = data["integration"]["transformed"]["rows_data"]["0"] + if not isinstance(rows_data, dict): + raise ValueError("Invalid data returned from API") + + soup = BeautifulSoup(rows_data["HTMLCollectionDatesText"], "html.parser") + + # Find all div elements containing the bin schedule + for div in soup.find_all("div"): + # Extract bin type and date from the span tag + text = div.find("span").text.strip() + bin_type, date = text.split(" ", 1) + dict_data = { + "type": bin_type, + "collectionDate": date, + } + bindata["bins"].append(dict_data) + + return bindata diff --git a/wiki/Councils.md b/wiki/Councils.md index 543909afd4..855d3e06e9 100644 --- a/wiki/Councils.md +++ b/wiki/Councils.md @@ -113,6 +113,7 @@ This document is still a work in progress, don't worry if your council isn't lis - [Harborough District Council](#harborough-district-council) - [Haringey Council](#haringey-council) - [Harrogate Borough Council](#harrogate-borough-council) +- [Hartlepool Borough Council](#hartlepool-borough-council) - [Hertsmere Borough Council](#hertsmere-borough-council) - [Highland Council](#highland-council) - [High Peak Council](#high-peak-council) @@ -1524,6 +1525,17 @@ Note: Pass the UPRN, which can be found at [this site](https://secure.harrogate. --- +### Hartlepool Borough Council +```commandline +python collect_data.py HartlepoolBoroughCouncil https://www.hartlepool.gov.uk -u XXXXXXXX +``` +Additional parameters: +- `-u` - UPRN + +Note: You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find your UPRN. + +--- + ### Hertsmere Borough Council ```commandline python collect_data.py HertsmereBoroughCouncil https://www.hertsmere.gov.uk -s -p "XXXX XXX" -n XX -w http://HOST:PORT/