From a14984cd52fc438a5cd8891c6ba2216f4d56e1a9 Mon Sep 17 00:00:00 2001 From: Oliver Cullimore Date: Sun, 29 Oct 2023 18:14:03 +0000 Subject: [PATCH] fix: Fix Chelmsford City Council --- .../ChelmsfordCityCouncil.schema | 10 +----- uk_bin_collection/tests/input.json | 2 +- .../tests/outputs/ChelmsfordCityCouncil.json | 34 +++++-------------- .../councils/ChelmsfordCityCouncil.py | 18 +++++++--- 4 files changed, 25 insertions(+), 39 deletions(-) diff --git a/uk_bin_collection/tests/council_schemas/ChelmsfordCityCouncil.schema b/uk_bin_collection/tests/council_schemas/ChelmsfordCityCouncil.schema index ee8247df88..6df5db59ee 100644 --- a/uk_bin_collection/tests/council_schemas/ChelmsfordCityCouncil.schema +++ b/uk_bin_collection/tests/council_schemas/ChelmsfordCityCouncil.schema @@ -23,7 +23,7 @@ "additionalProperties": false, "properties": { "type": { - "$ref": "#/definitions/Type" + "type": "string" }, "collectionDate": { "type": "string" @@ -34,14 +34,6 @@ "type" ], "title": "Bin" - }, - "Type": { - "type": "string", - "enum": [ - "Food waste, black bin, green box, card sack", - "Food waste, brown bin, paper sack, plastic and cartons bag" - ], - "title": "Type" } } } diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 8b44081a61..c44f2bed35 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -83,7 +83,7 @@ "wiki_note": "Replace XXXXXXXX with UPRN keeping \"cbc\" before it." }, "ChelmsfordCityCouncil": { - "url": "https://www.chelmsford.gov.uk/myhome/?entry[655e95fb-5f84-4d1b-ab65-878a2f3e3ce4]=5527623", + "url": "https://www.chelmsford.gov.uk/myhome/?entry[655e95fb-5f84-4d1b-ab65-878a2f3e3ce4]=14368859", "wiki_name": "Chelmsford City Council", "wiki_command_url_override": "https://www.chelmsford.gov.uk/myhome/XXXXXX", "wiki_note": "Follow the instructions [here](https://www.chelmsford.gov.uk/myhome/) until you get the page listing your \"Address\", \"Ward\" etc then copy the URL and replace the URL in the command." diff --git a/uk_bin_collection/tests/outputs/ChelmsfordCityCouncil.json b/uk_bin_collection/tests/outputs/ChelmsfordCityCouncil.json index 6a779d8f2c..3ba40ec281 100644 --- a/uk_bin_collection/tests/outputs/ChelmsfordCityCouncil.json +++ b/uk_bin_collection/tests/outputs/ChelmsfordCityCouncil.json @@ -1,56 +1,40 @@ { "bins": [ - { - "type": "Food waste, brown bin, paper sack, plastic and cartons bag", - "collectionDate": "04/07/2023" - }, - { - "type": "Food waste, black bin, green box, card sack", - "collectionDate": "11/07/2023" - }, - { - "type": "Food waste, brown bin, paper sack, plastic and cartons bag", - "collectionDate": "18/07/2023" - }, { "type": "Food waste, black bin, green box, card sack", - "collectionDate": "25/07/2023" + "collectionDate": "03/10/2023" }, { "type": "Food waste, brown bin, paper sack, plastic and cartons bag", - "collectionDate": "01/08/2023" + "collectionDate": "10/10/2023" }, { "type": "Food waste, black bin, green box, card sack", - "collectionDate": "08/08/2023" + "collectionDate": "17/10/2023" }, { "type": "Food waste, brown bin, paper sack, plastic and cartons bag", - "collectionDate": "15/08/2023" + "collectionDate": "24/10/2023" }, { "type": "Food waste, black bin, green box, card sack", - "collectionDate": "22/08/2023" + "collectionDate": "31/10/2023" }, { "type": "Food waste, brown bin, paper sack, plastic and cartons bag", - "collectionDate": "29/08/2023" + "collectionDate": "07/11/2023" }, { "type": "Food waste, black bin, green box, card sack", - "collectionDate": "05/09/2023" + "collectionDate": "14/11/2023" }, { "type": "Food waste, brown bin, paper sack, plastic and cartons bag", - "collectionDate": "12/09/2023" + "collectionDate": "21/11/2023" }, { "type": "Food waste, black bin, green box, card sack", - "collectionDate": "19/09/2023" - }, - { - "type": "Food waste, brown bin, paper sack, plastic and cartons bag", - "collectionDate": "26/09/2023" + "collectionDate": "28/11/2023" } ] } \ No newline at end of file diff --git a/uk_bin_collection/uk_bin_collection/councils/ChelmsfordCityCouncil.py b/uk_bin_collection/uk_bin_collection/councils/ChelmsfordCityCouncil.py index d838d7ceac..3e02eb4f3f 100644 --- a/uk_bin_collection/uk_bin_collection/councils/ChelmsfordCityCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/ChelmsfordCityCouncil.py @@ -1,4 +1,7 @@ +import re +import requests from bs4 import BeautifulSoup + from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import \ AbstractGetBinDataClass @@ -32,10 +35,12 @@ def parse_data(self, page: str, **kwargs) -> dict: # Loop the months for month in soup.find_all("div", {"class": "usercontent"}): - if month.find("h2"): - year = datetime.strptime( - month.find("h2").get_text(strip=True), "%B %Y" - ).strftime("%Y") + year = "" + if month.find("h2") and 'calendar' not in month.find("h2").get_text(strip=True): + year = datetime.strptime(month.find("h2").get_text(strip=True), "%B %Y").strftime("%Y") + elif month.find("h3"): + year = datetime.strptime(month.find("h3").get_text(strip=True), "%B %Y").strftime("%Y") + if year != "": for row in month.find_all("li"): results = re.search( "([A-Za-z]+ \\d\\d? [A-Za-z]+): (.+)", row.get_text(strip=True) @@ -49,4 +54,9 @@ def parse_data(self, page: str, **kwargs) -> dict: } data["bins"].append(dict_data) + # Sort collections + data["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y") + ) + return data