Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: #693 Cheshire West & Chester Council Sensor Bug #724

Merged
merged 2 commits into from
May 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,9 @@
"wiki_note": "Both the UPRN and a one-line address are passed in the URL, which needs to be wrapped in double quotes. The one-line address is made up of the house number, street name and postcode.\nUse the form [here](https://online.cheshireeast.gov.uk/mycollectionday/) to find them, then take the first line and post code and replace all spaces with `%20`."
},
"CheshireWestAndChesterCouncil": {
"house_number": "21",
"postcode": "CW8 1DX",
"house_number": "Hill View House",
"postcode": "CH3 9ER",
"uprn": "100012346655",
"skip_get_url": true,
"url": "https://www.cheshirewestandchester.gov.uk/residents/waste-and-recycling/your-bin-collection/collection-day",
"web_driver": "http://selenium:4444",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,29 +1,23 @@
import time

import logging
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.wait import WebDriverWait

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# import the wonderful Beautiful Soup and the URL grabber
class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
base class. They can also override some operations with a default
implementation.
"""

def parse_data(self, page: str, **kwargs) -> dict:
driver = None
try:
data = {"bins": []}
collections = []

user_uprn = kwargs.get("uprn")
user_paon = kwargs.get("paon")
user_postcode = kwargs.get("postcode")
web_driver = kwargs.get("web_driver")
Expand All @@ -33,93 +27,74 @@ def parse_data(self, page: str, **kwargs) -> dict:

# Create Selenium webdriver
driver = create_webdriver(web_driver, headless)
driver.get(
"https://www.cheshirewestandchester.gov.uk/residents/waste-and-recycling/your-bin-collection/collection-day"
)

time.sleep(5)

cookie_close_button = WebDriverWait(driver, timeout=15).until(
EC.presence_of_element_located((By.ID, "ccc-close"))
)
cookie_close_button.click()

find_collection_button = WebDriverWait(driver, timeout=10).until(
EC.presence_of_element_located(
(By.LINK_TEXT, "Find your collection day")
)
)
find_collection_button.click()

banner_close_button = WebDriverWait(driver, timeout=30).until(
EC.presence_of_element_located((By.ID, "close-cookie-message"))
)
banner_close_button.click()

time.sleep(5)

frame = driver.find_element(
By.XPATH, "/html/body/div[4]/section/div/div[2]/div[2]/div/iframe"
)
driver.switch_to.frame(frame)

# Wait for the postcode field to appear then populate it
inputElement_postcode = WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.NAME, "postcode_search"))
)
inputElement_postcode.send_keys(user_postcode)

address_box_text = WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.ID, "label_Choose_Address"))
)
address_box_text.click()
time.sleep(2)

address_selection_menu = Select(
driver.find_element(By.ID, "Choose_Address")
)
for idx, addr_option in enumerate(address_selection_menu.options):
option_name = addr_option.text[0 : len(user_paon)]
if option_name == user_paon:
selected_address = addr_option
break
address_selection_menu.select_by_visible_text(selected_address.text)

WebDriverWait(driver, 30).until(
EC.presence_of_element_located(
(By.XPATH, '//*[@id="bin-schedule-content"]/div/h3')
)
)
if headless:
driver.set_window_size(1920, 1080)

driver.get("https://www.cheshirewestandchester.gov.uk/residents/waste-and-recycling/your-bin-collection/collection-day")
wait = WebDriverWait(driver, 60)

def click_element(by, value):
element = wait.until(EC.element_to_be_clickable((by, value)))
driver.execute_script("arguments[0].scrollIntoView();", element)
element.click()

logging.info("Accepting cookies")
click_element(By.ID, "ccc-close")

logging.info("Finding collection day")
click_element(By.LINK_TEXT, "Find your collection day")

logging.info("Switching to iframe")
iframe_presence = wait.until(EC.presence_of_element_located((By.ID, "fillform-frame-1")))
driver.switch_to.frame(iframe_presence)

logging.info("Entering postcode")
input_element_postcode = wait.until(EC.presence_of_element_located((By.XPATH, '//input[@id="postcode_search"]')))
input_element_postcode.send_keys(user_postcode)

pcsearch_btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//input[@id='postcode_search']")))
click_element(By.XPATH, "//input[@id='postcode_search']")

logging.info("Selecting address")
dropdown = wait.until(EC.element_to_be_clickable((By.ID, "Choose_Address")))
dropdown_options = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "lookup-option")))
drop_down_values = Select(dropdown)
option_element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, f'option.lookup-option[value="{str(user_uprn)}"]')))
driver.execute_script("arguments[0].scrollIntoView();", option_element)
drop_down_values.select_by_value(str(user_uprn))

logging.info("Waiting for bin schedule")
wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'bin-schedule-content-bin-card')))

logging.info("Extracting bin collection data")
soup = BeautifulSoup(driver.page_source, features="html.parser")
soup.prettify()
bin_cards = soup.find_all("div", {"class": "bin-schedule-content-bin-card"})
collections = []

# Get collections
bin_cards = soup.find_all("div", {"class": "bin-schedule-content-info"})
for card in bin_cards:
bin_name = card.contents[0].text.strip() + " bin"
bin_date = datetime.strptime(
card.contents[1].text.split(":")[1].strip(), "%A, %d %B %Y"
)
bin_info = card.find("div", {"class": "bin-schedule-content-info"})
bin_name = bin_info.find_all("p")[0].text.strip() + " bin"
bin_date_str = bin_info.find_all("p")[1].text.split(":")[1].strip()
bin_date = datetime.strptime(bin_date_str, "%A, %B %d, %Y")
collections.append((bin_name, bin_date))

ordered_data = sorted(collections, key=lambda x: x[1])

for item in ordered_data:
dict_data = {
"type": item[0].capitalize(),
"collectionDate": item[1].strftime(date_format),
}
data["bins"].append(dict_data)

logging.info("Data extraction complete")
return data

except Exception as e:
# Here you can log the exception if needed
print(f"An error occurred: {e}")
# Optionally, re-raise the exception if you want it to propagate
logging.error(f"An error occurred: {e}")
raise

finally:
# This block ensures that the driver is closed regardless of an exception
if driver:
driver.quit()

return data
Loading