From d781bcb47d65d91872b9f78cd4e3c765478489a7 Mon Sep 17 00:00:00 2001 From: Oliver Cullimore Date: Wed, 27 Sep 2023 21:53:27 +0100 Subject: [PATCH] feat: Add support for Reigate and Banstead Borough Council --- .../ReigateAndBansteadBoroughCouncil.schema | 39 +++++++++++ .../features/validate_council_outputs.feature | 1 + uk_bin_collection/tests/input.json | 7 ++ .../ReigateAndBansteadBoroughCouncil.json | 52 ++++++++++++++ .../ReigateAndBansteadBoroughCouncil.py | 69 +++++++++++++++++++ 5 files changed, 168 insertions(+) create mode 100644 uk_bin_collection/tests/council_schemas/ReigateAndBansteadBoroughCouncil.schema create mode 100644 uk_bin_collection/tests/outputs/ReigateAndBansteadBoroughCouncil.json create mode 100644 uk_bin_collection/uk_bin_collection/councils/ReigateAndBansteadBoroughCouncil.py diff --git a/uk_bin_collection/tests/council_schemas/ReigateAndBansteadBoroughCouncil.schema b/uk_bin_collection/tests/council_schemas/ReigateAndBansteadBoroughCouncil.schema new file mode 100644 index 0000000000..f77b5ec089 --- /dev/null +++ b/uk_bin_collection/tests/council_schemas/ReigateAndBansteadBoroughCouncil.schema @@ -0,0 +1,39 @@ +{ + "$schema": "http://json-schema.org/draft-06/schema#", + "$ref": "#/definitions/Welcome5", + "definitions": { + "Welcome5": { + "type": "object", + "additionalProperties": false, + "properties": { + "bins": { + "type": "array", + "items": { + "$ref": "#/definitions/Bin" + } + } + }, + "required": [ + "bins" + ], + "title": "Welcome5" + }, + "Bin": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { + "type": "string" + }, + "collectionDate": { + "type": "string" + } + }, + "required": [ + "collectionDate", + "type" + ], + "title": "Bin" + } + } +} diff --git a/uk_bin_collection/tests/features/validate_council_outputs.feature b/uk_bin_collection/tests/features/validate_council_outputs.feature index f7577e7e8e..17287d30b3 100644 --- a/uk_bin_collection/tests/features/validate_council_outputs.feature +++ b/uk_bin_collection/tests/features/validate_council_outputs.feature @@ -63,6 +63,7 @@ Feature: Test each council output matches expected results in /outputs | NorthTynesideCouncil | | NorthumberlandCouncil | | PrestonCityCouncil | + | ReigateAndBansteadBoroughCouncil | | RochdaleCouncil | | RushcliffeBoroughCouncil | | RushmoorCouncil | diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 3a0987228d..21468e81f8 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -358,6 +358,13 @@ "url": "https://selfservice.preston.gov.uk/service/Forms/FindMyNearest.aspx?Service=bins", "wiki_name": "Preston City Council" }, + "ReigateAndBansteadBoroughCouncil": { + "SKIP_GET_URL": "SKIP_GET_URL", + "uprn": "68134867", + "url": "https://www.reigate-banstead.gov.uk/", + "wiki_name": "Reigate and Banstead Borough Council", + "wiki_note": "To get the UPRN, you can use [FindMyAddress](https://www.findmyaddress.co.uk/search)" + }, "RochdaleCouncil": { "SKIP_GET_URL": "SKIP_GET_URL", "postcode": "OL11 5BE", diff --git a/uk_bin_collection/tests/outputs/ReigateAndBansteadBoroughCouncil.json b/uk_bin_collection/tests/outputs/ReigateAndBansteadBoroughCouncil.json new file mode 100644 index 0000000000..8b25eb4371 --- /dev/null +++ b/uk_bin_collection/tests/outputs/ReigateAndBansteadBoroughCouncil.json @@ -0,0 +1,52 @@ +{ + "bins": [ + { + "type": "Food waste", + "collectionDate": "03/10/2023" + }, + { + "type": "Paper and cardboard", + "collectionDate": "03/10/2023" + }, + { + "type": "Food waste", + "collectionDate": "10/10/2023" + }, + { + "type": "Paper and cardboard", + "collectionDate": "10/10/2023" + }, + { + "type": "Mixed recycling", + "collectionDate": "10/10/2023" + }, + { + "type": "Refuse", + "collectionDate": "10/10/2023" + }, + { + "type": "Food waste", + "collectionDate": "17/10/2023" + }, + { + "type": "Paper and cardboard", + "collectionDate": "17/10/2023" + }, + { + "type": "Food waste", + "collectionDate": "24/10/2023" + }, + { + "type": "Paper and cardboard", + "collectionDate": "24/10/2023" + }, + { + "type": "Mixed recycling", + "collectionDate": "24/10/2023" + }, + { + "type": "Refuse", + "collectionDate": "24/10/2023" + } + ] +} \ No newline at end of file diff --git a/uk_bin_collection/uk_bin_collection/councils/ReigateAndBansteadBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/ReigateAndBansteadBoroughCouncil.py new file mode 100644 index 0000000000..43a2756875 --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/ReigateAndBansteadBoroughCouncil.py @@ -0,0 +1,69 @@ +import time +from bs4 import BeautifulSoup +from selenium import webdriver +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import \ + AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the base + class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + # Set up Selenium to run 'headless' + options = webdriver.ChromeOptions() + options.add_argument("--headless") + options.add_argument("--no-sandbox") + options.add_argument("--disable-gpu") + options.add_argument("--disable-dev-shm-usage") + options.add_experimental_option("excludeSwitches", ["enable-logging"]) + + user_uprn = kwargs.get("uprn") + check_uprn(user_uprn) + # Pad UPRN with 0's at the start for any that aren't 12 chars + user_uprn = user_uprn.zfill(12) + + # Create Selenium webdriver + driver = webdriver.Chrome(options=options) + driver.get(f"https://my.reigate-banstead.gov.uk/en/service/Bins_and_recycling___collections_calendar?uprn={user_uprn}") + + # Wait for iframe to load and switch to it + WebDriverWait(driver, 30).until(EC.frame_to_be_available_and_switch_to_it((By.ID, 'fillform-frame-1'))) + + # Wait for form + WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'span[data-name="html2"] > div'))) + + # Make a BS4 object + soup = BeautifulSoup(driver.page_source, features="html.parser") + soup.prettify() + + data = {"bins": []} + section = soup.find("span", {"data-name": "html2"}) + dates = section.find_all("div") + for d in dates: + date = d.find("h3") + collections = d.find_all("li") + if date and collections: + collection_date = datetime.strptime(date.get_text(strip=True), "%A %d %B %Y").strftime(date_format) + for c in collections: + collection_type = c.get_text(strip=True) + if c.get_text(strip=True): + dict_data = { + "type": collection_type, + "collectionDate": collection_date, + } + data["bins"].append(dict_data) + + data["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), date_format) + ) + + return data