From 7d925a0ada749a8d450639bfab21454590f3b6b9 Mon Sep 17 00:00:00 2001 From: Phil Harle Date: Fri, 24 Apr 2020 14:41:00 +0100 Subject: [PATCH] Add Newcastle City Council --- outputs/NewcastleCityCouncil.json | 16 +++++++++++ scripts/NewcastleCityCouncil.py | 46 +++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 outputs/NewcastleCityCouncil.json create mode 100644 scripts/NewcastleCityCouncil.py diff --git a/outputs/NewcastleCityCouncil.json b/outputs/NewcastleCityCouncil.json new file mode 100644 index 0000000000..da2a1e3124 --- /dev/null +++ b/outputs/NewcastleCityCouncil.json @@ -0,0 +1,16 @@ +{ + "bins": [ + { + "BinType": "Domestic Waste", + "NextCollectionDate": "2020-05-05" + }, + { + "BinType": "Recycling", + "NextCollectionDate": "2020-04-28" + }, + { + "BinType": "Garden Waste", + "NextCollectionDate": "2020-04-24" + } + ] +} \ No newline at end of file diff --git a/scripts/NewcastleCityCouncil.py b/scripts/NewcastleCityCouncil.py new file mode 100644 index 0000000000..d24f6774f7 --- /dev/null +++ b/scripts/NewcastleCityCouncil.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +from urllib.request import Request, urlopen +import json +from bs4 import BeautifulSoup +from datetime import datetime + +user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)' +headers = {'User-Agent': user_agent} + +#Replace URL +req = Request('https://community.newcastle.gov.uk/my-neighbourhood/ajax/getBinsNew.php?uprn=XXXXXXXXXXXX') +req.add_header('User-Agent', user_agent) + +fp = urlopen(req).read() +page = fp.decode("utf8") + +soup = BeautifulSoup(page, features="html.parser") +soup.prettify() + +#Form a JSON wrapper +data = {"bins":[]} + +#Loops the strong elements +for element in soup.find_all("strong"): + #Domestic Waste is formatted differenty to other bins + if "Green Bin (Domestic Waste) details:" in str(element): + collectionInfo = element.next_sibling.find('br').next_element + else: + collectionInfo = element.next_sibling.next_sibling.next_sibling.next_sibling + + binType = str(element)[str(element).find("(")+1:str(element).find(")")] + collectionDate = str(datetime.strptime(str(collectionInfo).replace('Next collection : ',''), '%d-%b-%Y').date()) + + dict_data = { + "BinType": binType, + "NextCollectionDate": collectionDate + } + + #Add data to the main JSON Wrapper + data["bins"].append(dict_data) + + +##Make the JSON +json_data = json.dumps(data,sort_keys=True, indent=4) + +print(json_data) \ No newline at end of file