Skip to content

Commit

Permalink
fix: St Helens Borough Council
Browse files Browse the repository at this point in the history
fix: #753
  • Loading branch information
m26dvd committed Nov 1, 2024
1 parent b38004c commit 7c5ce18
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 47 deletions.
7 changes: 5 additions & 2 deletions uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -1216,10 +1216,13 @@
"wiki_name": "St Albans City and District Council"
},
"StHelensBC": {
"house_number": "15",
"postcode": "L34 2GA",
"skip_get_url": true,
"uprn": "39081672",
"url": "https://www.sthelens.gov.uk/",
"wiki_name": "St Helens Borough Council"
"web_driver": "http://selenium:4444",
"wiki_name": "St Helens Borough Council",
"wiki_note": "Pass the house name/number in the house number parameter, wrapped in double quotes"
},
"StaffordBoroughCouncil": {
"uprn": "100032203010",
Expand Down
149 changes: 107 additions & 42 deletions uk_bin_collection/uk_bin_collection/councils/StHelensBC.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.wait import WebDriverWait

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
Expand All @@ -8,50 +12,111 @@
class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
baseclass. They can also override some
operations with a default implementation.
base class. They can also override some operations with a default
implementation.
"""

def parse_data(self, page: str, **kwargs) -> dict:
uprn = kwargs.get("uprn")
# Check the UPRN is valid
check_uprn(uprn)

# Request URL
url = f"https://secure.sthelens.net/website/CollectionDates.nsf/servlet.xsp/NextCollections?source=1&refid={uprn}"

# Make Request
requests.packages.urllib3.disable_warnings()
s = requests.Session()
page = s.get(url)

# Make a BS4 object
soup = BeautifulSoup(
re.sub("<div([^>]+)>", "", page.text).replace("</div>", ""),
features="html.parser",
)
soup.prettify()

data = {"bins": []}
collection_rows = (
soup.find("table", {"class": "multitable"}).find("tbody").find_all("tr")
)

for collection_row in collection_rows:
# Get bin collection type
bin_type = collection_row.find("th")
if bin_type:
bin_type = bin_type.get_text(strip=True)
# Get bin collection dates
for bin_date in collection_row.find_all("td"):
if bin_date.get_text(strip=True) != "Dates not allocated":
collection_date = datetime.strptime(
bin_date.get_text(strip=True), "%a %d %b %Y"
)
dict_data = {
"type": bin_type,
"collectionDate": collection_date.strftime(date_format),
}
data["bins"].append(dict_data)
driver = None
try:
data = {"bins": []}
user_paon = kwargs.get("paon")
user_postcode = kwargs.get("postcode")
web_driver = kwargs.get("web_driver")
headless = kwargs.get("headless")
check_paon(user_paon)
check_postcode(user_postcode)

# Create Selenium webdriver
driver = create_webdriver(web_driver, headless, None, __name__)
driver.get(
"https://www.sthelens.gov.uk/article/3473/Check-your-collection-dates"
)

"""
accept_button = WebDriverWait(driver, timeout=30).until(
EC.element_to_be_clickable((By.ID, "ccc-notify-accept"))
)
accept_button.click()
"""

# Wait for the postcode field to appear then populate it
inputElement_postcode = WebDriverWait(driver, 30).until(
EC.presence_of_element_located(
(By.ID, "RESIDENTCOLLECTIONDATES_PAGE1_POSTCODE")
)
)
inputElement_postcode.send_keys(user_postcode)

# Click search button
findAddress = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(
(By.ID, "RESIDENTCOLLECTIONDATES_PAGE1_FINDADDRESS_NEXT")
)
)
findAddress.click()

WebDriverWait(driver, timeout=30).until(
EC.element_to_be_clickable(
(By.ID, "RESIDENTCOLLECTIONDATES_PAGE1_ADDRESS_chosen")
)
).click()

WebDriverWait(driver, 10).until(
EC.element_to_be_clickable(
(
By.XPATH,
f"//ul[@id='RESIDENTCOLLECTIONDATES_PAGE1_ADDRESS-chosen-search-results']/li[starts-with(text(), '{user_paon}')]",
)
)
).click()

WebDriverWait(driver, timeout=30).until(
EC.element_to_be_clickable(
(By.ID, "RESIDENTCOLLECTIONDATES_PAGE1_ADDRESSNEXT_NEXT")
)
).click()

# Wait for the collections table to appear
WebDriverWait(driver, 10).until(
EC.presence_of_element_located(
(By.ID, "RESIDENTCOLLECTIONDATES__FIELDS_OUTER")
)
)

soup = BeautifulSoup(driver.page_source, features="html.parser")

# Get the month rows first
current_month = ""
for row in soup.find_all("tr"):
# Check if the row is a month header (contains 'th' tag)
if row.find("th"):
current_month = row.find("th").get_text(strip=True)
else:
# Extract the date, day, and waste types
columns = row.find_all("td")
if len(columns) >= 4:
day = columns[0].get_text(strip=True)
date = day + " " + current_month
waste_types = columns[3].get_text(strip=True)

for type in waste_types.split(" & "):
dict_data = {
"type": type,
"collectionDate": datetime.strptime(
date,
"%d %B %Y",
).strftime("%d/%m/%Y"),
}
data["bins"].append(dict_data)

except Exception as e:
# Here you can log the exception if needed
print(f"An error occurred: {e}")
# Optionally, re-raise the exception if you want it to propagate
raise
finally:
# This block ensures that the driver is closed regardless of an exception
if driver:
driver.quit()
return data
10 changes: 7 additions & 3 deletions wiki/Councils.md
Original file line number Diff line number Diff line change
Expand Up @@ -1587,7 +1587,7 @@ Additional parameters:

### Newark and Sherwood District Council
```commandline
python collect_data.py NewarkAndSherwoodDC http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=XXXXXXXX
python collect_data.py NewarkAndSherwoodDC http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=XXXXXXXX&nc=1
```

Note: Replace XXXXXXXX with UPRN.
Expand Down Expand Up @@ -2155,11 +2155,15 @@ Additional parameters:

### St Helens Borough Council
```commandline
python collect_data.py StHelensBC https://www.sthelens.gov.uk/ -s -u XXXXXXXX
python collect_data.py StHelensBC https://www.sthelens.gov.uk/ -s -p "XXXX XXX" -n XX -w http://HOST:PORT/
```
Additional parameters:
- `-s` - skip get URL
- `-u` - UPRN
- `-p` - postcode
- `-n` - house number
- `-w` - remote Selenium web driver URL (required for Home Assistant)

Note: Pass the house name/number in the house number parameter, wrapped in double quotes

---

Expand Down

0 comments on commit 7c5ce18

Please sign in to comment.