diff --git a/uk_bin_collection/tests/features/validate_council_outputs.feature b/uk_bin_collection/tests/features/validate_council_outputs.feature index ab1ba231fd..153a2479e4 100644 --- a/uk_bin_collection/tests/features/validate_council_outputs.feature +++ b/uk_bin_collection/tests/features/validate_council_outputs.feature @@ -327,6 +327,11 @@ Feature: Test each council output matches expected results | council | selenium_url | selenium_mode | | MiltonKeynesCityCouncil | None | None | + @MoleValleyDistrictCouncil + Examples: MoleValleyDistrictCouncil + | council | selenium_url | selenium_mode | + | MoleValleyDistrictCouncil | None | None | + @NeathPortTalbotCouncil Examples: NeathPortTalbotCouncil | council | selenium_url | selenium_mode | diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 4b8b2877ab..6efb849376 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -467,6 +467,14 @@ "wiki_name": "Milton Keynes City Council", "wiki_note": "Pass the name of the estate with the UPRN parameter, wrapped in double quotes" }, + "MoleValleyDistrictCouncil": { + "postcode": "RH4 1SJ", + "skip_get_url": true, + "uprn": "200000171235", + "url": "https://molevalley.cloudmappin.com/mmv/", + "wiki_name": "Mole Valley District Council", + "wiki_note": "UPRN can only be parsed with a valid postcode." + }, "NeathPortTalbotCouncil": { "postcode": "SA13 3BA", "skip_get_url": true, diff --git a/uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py new file mode 100644 index 0000000000..32d692beb5 --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/MoleValleyDistrictCouncil.py @@ -0,0 +1,81 @@ +from bs4 import BeautifulSoup +from datetime import datetime +import re +import requests +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + + user_postcode = kwargs.get("postcode") + check_postcode(user_postcode) + + root_url = "https://molevalley.cloudmappin.com/my-mv-address-search/search/{}/0".format(user_postcode) + response = requests.get(root_url) + + if not response.ok: + raise ValueError("Invalid server response code retreiving data.") + + jsonData = response.json() + + if len(jsonData["results"]) == 0: + raise ValueError("No collection data found for postcode provided.") + + properties_found = jsonData["results"][0]["items"] + + # If UPRN is provided, we can check a specific address. + html_data = None + uprn = kwargs.get("uprn") + if uprn: + check_uprn(uprn) + for n, item in enumerate(properties_found): + if uprn == str(int(item["info"][0][1]["value"])): + html_data = properties_found[n]["info"][2][1]["value"] + break + if html_data is None: + raise ValueError("No collection data found for UPRN provided.") + else: + # If UPRN not provided, just use the first result + html_data = properties_found[0]["info"][2][1]["value"] + + soup = BeautifulSoup(html_data, features="html.parser") + soup.prettify() + + data = {"bins": []} + all_collection_dates = [] + regex_date = re.compile(r'.* ([\d]+\/[\d]+\/[\d]+)') + regex_additional_collection = re.compile(r'We also collect (.*) on (.*) -') + + # Search for the 'Bins and Recycling' panel + for panel in soup.select('div[class*="panel"]'): + if panel.h2.text.strip() == "Bins and Recycling": + + # Gather the bin types and dates + for collection in panel.select('div > strong'): + bin_type = collection.text.strip() + collection_string = collection.find_next('p').text.strip() + m = regex_date.match(collection_string) + if m: + collection_date = datetime.strptime(m.group(1),'%d/%m/%Y').date() + data["bins"].append({"type": bin_type, "collectionDate": collection_date.strftime('%d/%m/%Y')}) + all_collection_dates.append(collection_date) + + # Search for additional collections + for p in panel.select('p'): + m2 = regex_additional_collection.match(p.text.strip()) + if m2: + bin_type = m2.group(1) + if "each collection day" in m2.group(2): + collection_date = min(all_collection_dates) + data["bins"].append({"type": bin_type, "collectionDate": collection_date.strftime('%d/%m/%Y')}) + break + + return data diff --git a/wiki/Councils.md b/wiki/Councils.md index bd23366ceb..1dfade263d 100644 --- a/wiki/Councils.md +++ b/wiki/Councils.md @@ -74,6 +74,7 @@ This document is still a work in progress, don't worry if your council isn't lis - [Mid and East Antrim Borough Council](#mid-and-east-antrim-borough-council) - [Mid Sussex District Council](#mid-sussex-district-council) - [Milton Keynes City Council](#milton-keynes-city-council) +- [Mole Valley District Council](#mole-valley-district-council) - [Neath Port Talbot Council](#neath-port-talbot-council) - [Newark and Sherwood District Council](#newark-and-sherwood-district-council) - [Newcastle City Council](#newcastle-city-council) @@ -890,6 +891,19 @@ Note: Pass the name of the estate with the UPRN parameter, wrapped in double quo --- +### Mole Valley District Council +```commandline +python collect_data.py MoleValleyDistrictCouncil https://molevalley.cloudmappin.com/mmv/ -s -p "XXXX XXX" -u XXXXXXXX +``` +Additional parameters: +- `-s` - skip get URL +- `-u` - UPRN +- `-p` - postcode + +Note: In order to use this parser, you must provide a valid postcode and optionally a UPRN for your specific address. + +--- + ### Neath Port Talbot Council ```commandline python collect_data.py NeathPortTalbotCouncil https://www.npt.gov.uk -s -u XXXXXXXX -p "XXXX XXX" -w http://HOST:PORT/