Skip to content

Commit

Permalink
Merge pull request #341 from OliverCullimore/339-wakefield
Browse files Browse the repository at this point in the history
Fix Wakefield City Council
  • Loading branch information
robbrad authored Sep 19, 2023
2 parents d7006a2 + 64fbc89 commit a937fb4
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 93 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"$schema": "http://json-schema.org/draft-06/schema#",
"$ref": "#/definitions/Welcome8",
"$ref": "#/definitions/Welcome5",
"definitions": {
"Welcome8": {
"Welcome5": {
"type": "object",
"additionalProperties": false,
"properties": {
Expand All @@ -16,14 +16,14 @@
"required": [
"bins"
],
"title": "Welcome8"
"title": "Welcome5"
},
"Bin": {
"type": "object",
"additionalProperties": false,
"properties": {
"type": {
"type": "string"
"$ref": "#/definitions/Type"
},
"collectionDate": {
"type": "string"
Expand All @@ -34,6 +34,15 @@
"type"
],
"title": "Bin"
},
"Type": {
"type": "string",
"enum": [
"Mixed recycling",
"Household waste",
"Garden waste recycling"
],
"title": "Type"
}
}
}
7 changes: 3 additions & 4 deletions uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -481,11 +481,10 @@
},
"WakefieldCityCouncil": {
"SKIP_GET_URL": "SKIP_GET_URL",
"uprn": "63035490",
"url": "https://www.wakefield.gov.uk/site/Where-I-Live-Results?uprn=63035490",
"url": "https://www.wakefield.gov.uk/where-i-live/?uprn=63035490&a=115%20Elizabeth%20Drive%20Castleford%20WF10%203RR&usrn=41801243&e=445418&n=426091&p=WF10%203RR",
"wiki_name": "Wakefield City Council",
"wiki_command_url_override": "https://www.wakefield.gov.uk/site/Where-I-Live-Results?uprn=XXXXXXXX",
"wiki_note": "Replace XXXXXXXX with UPRN and also pass in -u parameter"
"wiki_command_url_override": "https://www.wakefield.gov.uk/where-i-live/?uprn=XXXXXXXXXXX&a=XXXXXXXXXXX&usrn=XXXXXXXXXXX&e=XXXXXXXXXXX&n=XXXXXXXXXXX&p=XXXXXXXXXXX",
"wiki_note": "Follow the instructions [here](https://www.wakefield.gov.uk/where-i-live/) until you get the page that includes a \"Bin Collections\" section then copy the URL and replace the URL in the command."
},
"WarwickDistrictCouncil": {
"url": "https://estates7.warwickdc.gov.uk/PropertyPortal/Property/Recycling/100070263793",
Expand Down
80 changes: 78 additions & 2 deletions uk_bin_collection/tests/outputs/WakefieldCityCouncil.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,88 @@
{
"bins": [
{
"type": "Mixed recycling",
"collectionDate": "20/09/2023"
},
{
"type": "Household waste",
"collectionDate": "27/09/2023"
},
{
"type": "Garden waste recycling",
"collectionDate": "27/09/2023"
},
{
"type": "Mixed recycling",
"collectionDate": "04/10/2023"
},
{
"type": "Household waste",
"collectionDate": "11/10/2023"
},
{
"type": "Garden waste recycling",
"collectionDate": "11/10/2023"
},
{
"type": "Mixed recycling",
"collectionDate": "18/10/2023"
},
{
"type": "Household waste",
"collectionDate": "25/10/2023"
},
{
"type": "Garden waste recycling",
"collectionDate": "25/10/2023"
},
{
"type": "Mixed recycling",
"collectionDate": "01/11/2023"
},
{
"type": "Household waste",
"collectionDate": "08/11/2023"
},
{
"type": "Garden waste recycling",
"collectionDate": "08/11/2023"
},
{
"type": "Mixed recycling",
"collectionDate": "15/11/2023"
},
{
"type": "Household waste",
"collectionDate": "22/11/2023"
},
{
"type": "Garden waste recycling",
"collectionDate": "22/11/2023"
},
{
"type": "Mixed recycling",
"collectionDate": "29/11/2023"
},
{
"type": "Household waste",
"collectionDate": "06/12/2023"
},
{
"type": "Mixed recycling",
"collectionDate": "13/12/2023"
},
{
"type": "Household waste",
"collectionDate": "20/12/2023"
},
{
"type": "Household waste",
"collectionDate": "10/4/2020"
"collectionDate": "03/01/2024"
},
{
"type": "Mixed recycling",
"collectionDate": "3/4/2020"
"collectionDate": "10/01/2024"
}
]
}
134 changes: 52 additions & 82 deletions uk_bin_collection/uk_bin_collection/councils/WakefieldCityCouncil.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# This script pulls (in one hit) the data
# from Warick District Council Bins Data
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import \
AbstractGetBinDataClass
Expand All @@ -16,89 +14,61 @@ class CouncilClass(AbstractGetBinDataClass):
"""

def parse_data(self, page: str, **kwargs) -> dict:
# UPRN passed in as an argument
user_uprn = kwargs.get("uprn")
check_uprn(user_uprn)
# Set up Selenium to run 'headless'
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--disable-dev-shm-usage")
options.add_experimental_option("excludeSwitches", ["enable-logging"])

# cookies = {
# 'visid_incap_2049675': 'xZCc/tFgSzaFmZD7XkN3koJGuGMAAAAAQUIPAAAAAAB7QGC8d+Jmlk0i3y06Zer6',
# 'WSS_FullScreenMode': 'false',
# 'incap_ses_1184_2049675': 'a2ZQQ9lCM3wa4+23mWpuEHnAuGMAAAAAfl4ebLXAvItl6dCfbMEWoQ==',
# }
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"
}
# Create Selenium webdriver
driver = webdriver.Chrome(options=options)
driver.get(kwargs.get("url"))

params = {
"uprn": user_uprn,
}

requests.packages.urllib3.disable_warnings()
s = requests.Session() # gets cookies and keeps them

wakefield_session = s.get("https://www.wakefield.gov.uk/", headers=headers)
# Make a GET for the data with correct params and cookies
response = s.get(
"https://www.wakefield.gov.uk/site/Where-I-Live-Results",
params=params,
headers=headers,
verify=False,
)

# Have BS4 process the page
soup = BeautifulSoup(response.text, features="html.parser")
# Make a BS4 object
soup = BeautifulSoup(driver.page_source, features="html.parser")
soup.prettify()
data = {"bins": []}

# Start a tuple for collections with (TYPE:DATE). Add the first for the bin types since they're separate
# elements on the page. All dates are parsed from text to datetime
collections = [
(
"Household waste",
datetime.strptime(
soup.select(
"#ctl00_PlaceHolderMain_Waste_output > div:nth-child(4) > "
"div:nth-child(3) > div:nth-child(2)"
)[0].text,
"%d/%m/%Y",
),
),
(
"Mixed recycling",
datetime.strptime(
soup.select(
"#ctl00_PlaceHolderMain_Waste_output > div:nth-child(6) > "
"div:nth-child(3) > div:nth-child(2)"
)[0].text,
"%d/%m/%Y",
),
),
]

# Process the hidden future collection dates by adding them to the tuple
household_future_table = soup.find(
"table", {"class": "mb10 wilWasteContent RESIDUAL (D)FutureData"}
).find_all("td")
for x in household_future_table:
collections.append(
("Household waste", datetime.strptime(x.text, "%d/%m/%Y"))
)
recycling_future_table = soup.find(
"table", {"class": "mb10 wilWasteContent RECYCLING (D)FutureData"}
).find_all("td")
for x in recycling_future_table:
collections.append(
("Mixed recycling", datetime.strptime(x.text, "%d/%m/%Y"))
)

# Order the data by datetime, then add to and return it as a dictionary
ordered_data = sorted(collections, key=lambda x: x[1])
data = {"bins": []}
for item in ordered_data:
dict_data = {
"type": item[0],
"collectionDate": item[1].strftime(date_format),
}
data["bins"].append(dict_data)
sections = soup.find_all("div", {"class": "wil_c-content-section_heading"})
for s in sections:
if s.get_text(strip=True).lower() == "bin collections":
rows = s.find_next_sibling("div", {"class": "c-content-section_body"}).find_all(
"div", {"class": "u-mb-8"}
)
for row in rows:
title = row.find("div", {"class": "u-mb-4"})
collections = row.find_all("div", {"class": "u-mb-2"})
if title and collections:
for c in collections:
if c.get_text(strip=True).lower().startswith('next collection'):
# add next collection
next_collection_date = datetime.strptime(
c.get_text(strip=True).replace('Next collection - ', ''),
"%A, %d %B %Y",
).strftime(date_format)
dict_data = {
"type": title.get_text(strip=True).capitalize(),
"collectionDate": next_collection_date,
}
data["bins"].append(dict_data)
# add future collections without duplicating next collection
future_collections = row.find("ul", {"class": "u-mt-4"}).find_all("li")
for c in future_collections:
future_collection_date = datetime.strptime(
c.get_text(strip=True),
"%A, %d %B %Y",
).strftime(date_format)
if future_collection_date != next_collection_date:
dict_data = {
"type": title.get_text(strip=True).capitalize(),
"collectionDate": future_collection_date,
}
data["bins"].append(dict_data)

data["bins"].sort(
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
)

return data
2 changes: 1 addition & 1 deletion uk_bin_collection/uk_bin_collection/get_bin_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def template_method(self, address_url: str, **kwargs) -> None: # pragma: no cov
json_output = self.output_json(bin_data_dict)
else:
bin_data_dict = self.parse_data(
"", postcode=this_postcode, paon=this_paon, uprn=this_uprn, usrn=this_usrn
"", postcode=this_postcode, paon=this_paon, uprn=this_uprn, usrn=this_usrn, url=this_url
)
json_output = self.output_json(bin_data_dict)

Expand Down

0 comments on commit a937fb4

Please sign in to comment.