Skip to content

Commit

Permalink
fix: #141 Leeds speed up
Browse files Browse the repository at this point in the history
  • Loading branch information
robbrad committed Dec 16, 2023
1 parent d127a9f commit 56742da
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 68 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ Feature: Test each council output matches expected results
| HuntingdonDistrictCouncil | None | None |
| KingstonUponThamesCouncil | None | None |
| LancasterCityCouncil | None | None |
| LeedsCityCouncil | None | None |
| LeedsCityCouncil | http://selenium:4444 | local |
| LisburnCastlereaghCityCouncil | None | None |
| LiverpoolCityCouncil | None | None |
| LondonBoroughHounslow | None | None |
Expand Down
5 changes: 3 additions & 2 deletions uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@
},
"LeedsCityCouncil": {
"house_number": "1",
"uprn": "72506983",
"postcode": "LS6 2SE",
"skip_get_url": true,
"url": "https://www.leeds.gov.uk/residents/bins-and-recycling/check-your-bin-day",
Expand Down Expand Up @@ -565,9 +566,9 @@
"wiki_name": "Rushcliffe Borough Council"
},
"RushmoorCouncil": {
"url": "https://www.rushmoor.gov.uk/recycling-rubbish-and-environment/bins-and-recycling/download-or-print-your-bin-collection-calendar/?uprn=XXXXXXXXXX&weeks=16",
"url": "https://www.rushmoor.gov.uk/Umbraco/Api/BinLookUpWorkAround/Get?selectedAddress=100060545034",
"wiki_name": "Rushmoor Council",
"wiki_note": "Replace XXXXXXXXXX in URL with your own UPRN."
"wiki_note": "Replace selectedAddress number in URL with your own UPRN."
},
"SalfordCityCouncil": {
"skip_get_url": true,
Expand Down
136 changes: 71 additions & 65 deletions uk_bin_collection/uk_bin_collection/councils/LeedsCityCouncil.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@
from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass

from bs4 import BeautifulSoup
from datetime import datetime
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.wait import WebDriverWait

import pandas as pd
import urllib.request

Expand All @@ -17,78 +24,77 @@ def parse_data(self, page: str, **kwargs) -> dict:
"""
Parse council provided CSVs to get the latest bin collections for address
"""
# URLs to data sources
address_csv_url = "https://opendata.leeds.gov.uk/downloads/bins/dm_premises.csv"
collections_csv_url = "https://opendata.leeds.gov.uk/downloads/bins/dm_jobs.csv"

user_uprn = kwargs.get("uprn")
user_postcode = kwargs.get("postcode")
user_paon = kwargs.get("paon")

web_driver = kwargs.get("web_driver")
check_uprn(user_uprn)
check_postcode(user_postcode)
check_paon(user_paon)
# Create Selenium webdriver
page = f"https://www.leeds.gov.uk/residents/bins-and-recycling/check-your-bin-day"

driver = create_webdriver(web_driver)
driver.get(page)

# If you bang in the house number (or property name) and postcode in the box it should find your property

#iframe_presense = WebDriverWait(driver, 30).until(
# EC.presence_of_element_located((By.ID, "fillform-frame-1"))
#)

#driver.switch_to.frame(iframe_presense)
wait = WebDriverWait(driver, 60)

postcode_box = wait.until(
EC.element_to_be_clickable((By.ID, 'ctl00_ctl48_g_eea1a8ba_4306_488e_96f2_97f22038e29f_ctl00_txtPostCode'))
)
postcode_box.send_keys(user_postcode)

postcode_btn = wait.until(
EC.element_to_be_clickable((By.ID, 'ctl00_ctl48_g_eea1a8ba_4306_488e_96f2_97f22038e29f_ctl00_btnSearchAddress'))
)

postcode_btn.click()
address_dropdown = wait.until(
EC.element_to_be_clickable((By.ID, 'ctl00_ctl48_g_eea1a8ba_4306_488e_96f2_97f22038e29f_ctl00_ddlAddressList'))
)
dropdownSelect = Select(address_dropdown)
dropdownSelect.select_by_value(str(user_uprn))
results = wait.until(
EC.presence_of_element_located((By.ID, "ctl00_ctl48_g_eea1a8ba_4306_488e_96f2_97f22038e29f_ctl00_BinResultsDetails"))
)


data = {"bins": []} # dictionary for data
prop_id = 0 # LCC use city wide URPNs in this dataset
result_row = None # store the property as a row

# Get address csv and give it headers (pandas bypasses downloading the file)
# print("Getting address data...")
with urllib.request.urlopen(address_csv_url) as response:
addr = pd.read_csv(
response,
names=[
"PropertyId",
"PropertyName",
"PropertyNo",
"Street",
"Town",
"City",
"Postcode",
],
sep=",",
)

# Get collections csv and give it headers
# print("Getting collection data...")
with urllib.request.urlopen(collections_csv_url) as response:
coll = pd.read_csv(
response, names=["PropertyId", "BinType", "CollectionDate"], sep=","
)

# Find the property id from the address data
# ("Finding property reference...")
for row in addr.itertuples():
if (
str(row.Postcode).replace(" ", "").lower()
== user_postcode.replace(" ", "").lower()
):
if row.PropertyNo == user_paon:
prop_id = row.PropertyId
# print(f"Reference: {str(prop_id)}")
continue

# For every match on the property id in the collections data, add the bin type and date to list
# Note: time is 7am as that's when LCC ask bins to be out by
job_list = []
# print(f"Finding collections for property reference: {user_paon} {result_row.Street} "
# f"{result_row.Postcode}...")
for row in coll.itertuples():
if row.PropertyId == prop_id:
job_list.append([row.BinType, datetime.strptime(row.CollectionDate, "%d/%m/%y").strftime(date_format)])

# If jobs exist, sort list by date order. Load list into dictionary to return
# print("Processing collections...")
if len(job_list) > 0:
job_list.sort(key=lambda x: datetime.strptime(x[1], date_format))
for i in range(len(job_list)):
job_date = datetime.strptime(job_list[i][1], date_format)
if datetime.now() < job_date:
soup = BeautifulSoup(driver.page_source, 'html.parser')


bin_types = soup.find_all('ul', class_='binCollectionTimesList')

for bin_collection_dates in bin_types:
bin_collection_list = bin_collection_dates.find_all('li', class_='')

if bin_collection_list:
collection_dates = [
date.text.strip()
for date in bin_collection_list
]

# Convert the collection dates to the desired format
formatted_dates = [
datetime.strptime(date, "%A %d %b %Y").strftime(date_format)
for date in collection_dates
]

# Extract the type of bin from the header
bin_type = bin_collection_dates.find_previous('h3').text.split()[0]

# Adding data to the 'bins' dictionary for each date
for date in formatted_dates:
dict_data = {
"type": job_list[i][0],
"collectionDate": job_list[i][1],
"type": bin_type,
"collectionDate": date
}
data["bins"].append(dict_data)
else:
print("No bin collections found for property!")

return data

0 comments on commit 56742da

Please sign in to comment.