Skip to content

Commit

Permalink
Merge pull request #793 from thjont/belfast-city-council
Browse files Browse the repository at this point in the history
Feat: Belfast City Council
  • Loading branch information
robbrad authored Sep 2, 2024
2 parents e5c01dd + b872022 commit 419f70e
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 0 deletions.
7 changes: 7 additions & 0 deletions uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,13 @@
"wiki_name": "Bedfordshire Council",
"wiki_note": "In order to use this parser, you must provide a valid postcode and a uprn retrieved from the councils website for your specific address"
},
"BelfastCityCouncil": {
"skip_get_url": true,
"url": "https://online.belfastcity.gov.uk/find-bin-collection-day/Default.aspx",
"wiki_name": "BelfastCityCouncil",
"postcode": "BT10 0GY",
"uprn": "185086469"
},
"BexleyCouncil": {
"house_number": "1 Dorchester Avenue, Bexley",
"postcode": "DA5 3AH",
Expand Down
100 changes: 100 additions & 0 deletions uk_bin_collection/uk_bin_collection/councils/BelfastCityCouncil.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import logging
from datetime import datetime

import requests
import urllib

from bs4 import BeautifulSoup
from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass



# import the wonderful Beautiful Soup and the URL grabber
class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
base class. They can also override some operations with a default
implementation.
"""

def get_session_variable(self, soup, id) -> str:
"""Extract ASP.NET variable from the HTML."""
element = soup.find("input", {"id": id})
if element:
return element.get("value")
else:
raise ValueError(f"Unable to find element with id: {id}")

def parse_data(self, page: str, **kwargs) -> dict:
bin_data = {"bins": []}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:129.0) Gecko/20100101 Firefox/119.0"
}

session = requests.Session()
session.headers.update(headers)

user_uprn = kwargs.get("uprn")
user_postcode = kwargs.get("postcode")
URL = "https://online.belfastcity.gov.uk/find-bin-collection-day/Default.aspx"

# Build initial ASP.NET variables for Postcode Find address
response = session.get(URL)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
form_data = {
"__EVENTTARGET": "",
"__EVENTARGUMENT": "",
"__VIEWSTATE": self.get_session_variable(soup, "__VIEWSTATE"),
"__VIEWSTATEGENERATOR": self.get_session_variable(soup, "__VIEWSTATEGENERATOR"),
"__SCROLLPOSITIONX": "0",
"__SCROLLPOSITIONY": "0",
"__EVENTVALIDATION": self.get_session_variable(soup, "__EVENTVALIDATION"),
"ctl00$MainContent$searchBy_radio": "P",
"ctl00$MainContent$Street_textbox": "",
"ctl00$MainContent$Postcode_textbox": user_postcode,
"ctl00$MainContent$AddressLookup_button": "Find address"
}

# Build intermediate ASP.NET variables for uprn Select address
response = session.post(URL, data=form_data)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
form_data = {
"__EVENTTARGET": "",
"__EVENTARGUMENT": "",
"__VIEWSTATE": self.get_session_variable(soup, "__VIEWSTATE"),
"__VIEWSTATEGENERATOR": self.get_session_variable(soup, "__VIEWSTATEGENERATOR"),
"__SCROLLPOSITIONX": "0",
"__SCROLLPOSITIONY": "0",
"__EVENTVALIDATION": self.get_session_variable(soup, "__EVENTVALIDATION"),
"ctl00$MainContent$searchBy_radio": "P",
"ctl00$MainContent$Street_textbox": "",
"ctl00$MainContent$Postcode_textbox": user_postcode,
"ctl00$MainContent$lstAddresses": user_uprn,
"ctl00$MainContent$SelectAddress_button": "Select address"
}

# Actual http call to get Bins Data
response = session.post(URL, data=form_data)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")

# Find Bins table and data
table = soup.find("div", {"id": "binsGrid"})
if table:
rows = table.find_all("tr")
for row in rows:
columns = row.find_all("td")
if len(columns) >= 4:
collection_type = columns[0].get_text(strip=True)
collection_date_raw = columns[3].get_text(strip=True)
# if the month number is a single digit there are 2 spaces, stripping all spaces to make it consistent
collection_date = datetime.strptime(collection_date_raw.replace(" ", ""),'%a%b%d%Y')
bin_entry = {
"type": collection_type,
"collectionDate": collection_date.strftime(date_format),
}
bin_data["bins"].append(bin_entry)
return bin_data
11 changes: 11 additions & 0 deletions wiki/Councils.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ This document is still a work in progress, don't worry if your council isn't lis
- [Basingstoke Council](#basingstoke-council)
- [Bath and North East Somerset Council](#bath-and-north-east-somerset-council)
- [Bedfordshire Council](#bedfordshire-council)
- [BelfastCityCouncil](#belfastcitycouncil)
- [Bexley Council](#bexley-council)
- [Blackburn Council](#blackburn-council)
- [Bolton Council](#bolton-council)
Expand Down Expand Up @@ -239,6 +240,16 @@ Note: In order to use this parser, you must provide a valid postcode and a uprn

---

### BelfastCityCouncil
```commandline
python collect_data.py BelfastCityCouncil https://online.belfastcity.gov.uk/find-bin-collection-day/Default.aspx -u XXXXXXXX -p "XXXX XXX"
```
- `-u` - UPRN
- `-p` - postcode

Note: The UPRN can be found using developer tools, look for "ctl00$MainContent$lstAddresses" variable in the Request headers.
---

### Bexley Council
```commandline
python collect_data.py BexleyCouncil https://mybexley.bexley.gov.uk/service/When_is_my_collection_day -s -u XXXXXXXX -p "XXXX XXX" -n XX -w http://HOST:PORT/
Expand Down

0 comments on commit 419f70e

Please sign in to comment.