Skip to content

Commit

Permalink
Merge pull request #499 from robbrad/fix_broken_councils
Browse files Browse the repository at this point in the history
191_fixingbroken_councils
  • Loading branch information
OliverCullimore authored Dec 24, 2023
2 parents 1f56a11 + 46a5551 commit 286085f
Show file tree
Hide file tree
Showing 8 changed files with 343 additions and 256 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,18 @@ Feature: Test each council output matches expected results
| BathAndNorthEastSomersetCouncil | None | None |
| BCPCouncil | None | None |
| BedfordshireCouncil | None | None |
| BexleyCouncil | None | None |
| BexleyCouncil | http://selenium:4444 | local |
| BlackburnCouncil | http://selenium:4444 | local |
| BoltonCouncil | None | None |
| BoltonCouncil | http://selenium:4444 | local |
| BristolCityCouncil | None | None |
| BromleyBoroughCouncil | None | None |
| BromleyBoroughCouncil | http://selenium:4444 | local |
| BroxtoweBoroughCouncil | http://selenium:4444 | local |
| BuckinghamshireCouncil | http://selenium:4444 | local |
| BuryCouncil | None | None |
| CalderdaleCouncil | None | None |
| CalderdaleCouncil | http://selenium:4444 | local |
| CannockChaseDistrictCouncil | None | None |
| CardiffCouncil | None | None |
| CastlepointDistrictCouncil | None | None |
| CastlepointDistrictCouncil | http://selenium:4444 | local |
| CharnwoodBoroughCouncil | None | None |
| ChelmsfordCityCouncil | None | None |
| CheshireEastCouncil | None | None |
Expand Down
23 changes: 15 additions & 8 deletions uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,11 @@
},
"BexleyCouncil": {
"skip_get_url": true,
"uprn": "[email protected]",
"url": "https://www.bexley.gov.uk/",
"url": "https://mybexley.bexley.gov.uk/service/When_is_my_collection_day",
"web_driver": "http://selenium:4444",
"postcode": "DA5 3AH",
"uprn": "100020196143",
"house_number":"1 Dorchester Avenue, Bexley",
"wiki_name": "Bexley Council",
"wiki_note": "In order to use this parser, you will need to sign up to [Bexley's @Home app](https://www.bexley.gov.uk/services/rubbish-and-recycling/bexley-home-recycling-app/about-app) (available for [iOS](https://apps.apple.com/gb/app/home-collection-reminder/id1050703690) and [Android](https://play.google.com/store/apps/details?id=com.contender.athome.android)).\nComplete the setup by entering your email and setting your address with postcode and address line.\nOnce you can see the calendar, you _should_ be good to run the parser.\nJust pass the email you used in quotes in the UPRN parameter.\n"
},
Expand All @@ -58,10 +61,11 @@
"BoltonCouncil": {
"skip_get_url": true,
"postcode": "BL1 5PQ",
"uprn": "100010886949",
"uprn": "100010886936",
"url": "https://carehomes.bolton.gov.uk/bins.aspx",
"wiki_name": "Bolton Council",
"wiki_note": "To get the UPRN, you will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search). Previously required single field that was UPRN and full address, now requires UPRN and postcode as separate fields."
"wiki_note": "To get the UPRN, you will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search). Previously required single field that was UPRN and full address, now requires UPRN and postcode as separate fields.",
"web_driver": "http://selenium:4444"
},
"BristolCityCouncil": {
"skip_get_url": true,
Expand All @@ -73,7 +77,8 @@
"url": "https://recyclingservices.bromley.gov.uk/waste/6087017",
"wiki_command_url_override": "https://recyclingservices.bromley.gov.uk/waste/XXXXXXX",
"wiki_name": "Bromley Borough Council",
"wiki_note": "Follow the instructions [here](https://recyclingservices.bromley.gov.uk/waste) until the \"Your bin days\" page then copy the URL and replace the URL in the command."
"wiki_note": "Follow the instructions [here](https://recyclingservices.bromley.gov.uk/waste) until the \"Your bin days\" page then copy the URL and replace the URL in the command.",
"web_driver": "http://selenium:4444"
},
"BroxtoweBoroughCouncil": {
"postcode": "NG16 2LY",
Expand Down Expand Up @@ -104,8 +109,9 @@
"postcode": "OL14 7EX",
"skip_get_url": true,
"uprn": "010035034598",
"url": "https://www.calderdale.gov.uk/",
"wiki_name": "Calderdale Council"
"url": "https://www.calderdale.gov.uk/environment/waste/household-collections/collectiondayfinder.jsp",
"wiki_name": "Calderdale Council",
"web_driver": "http://selenium:4444"
},
"CannockChaseDistrictCouncil": {
"postcode": "WS15 1JA",
Expand All @@ -125,7 +131,8 @@
"skip_get_url": true,
"uprn": "4525",
"url": "https://apps.castlepoint.gov.uk/cpapps/index.cfm?fa=wastecalendar",
"wiki_name": "Castlepoint District Council"
"wiki_name": "Castlepoint District Council",
"web_driver": "http://selenium:4444"
},
"CharnwoodBoroughCouncil": {
"url": "https://my.charnwood.gov.uk/location?put=cbc10070067259&rememberme=0&redirect=%2F",
Expand Down
20 changes: 12 additions & 8 deletions uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,18 @@ def parse_data(self, page: str, **kwargs) -> dict:

for collection_type, collection_class in COLLECTION_KINDS.items():
for date in soup.select(f"div#{collection_class} li"):
bins.append({
"type": collection_type,
"collectionDate": datetime.strptime(
# Friday, 21 July 2023
date.get_text(strip=True),
'%A, %d %B %Y'
).strftime(date_format)
})

date_pattern = r'\d{1,2}\s\w+\s\d{4}' # Regex pattern to extract date
match = re.search(date_pattern, date.get_text(strip=True))

if match:
extracted_date = match.group()
formatted_date = datetime.strptime(extracted_date, '%d %B %Y').strftime(date_format)

bins.append({
"type": collection_type,
"collectionDate": formatted_date
})

return {
"bins": bins
Expand Down
149 changes: 102 additions & 47 deletions uk_bin_collection/uk_bin_collection/councils/BexleyCouncil.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
import json
from bs4 import BeautifulSoup
from datetime import datetime
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.keys import Keys

import requests
import time
from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import \
AbstractGetBinDataClass
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass


# import the wonderful Beautiful Soup and the URL grabber


class CouncilClass(AbstractGetBinDataClass):
Expand All @@ -14,50 +22,97 @@ class CouncilClass(AbstractGetBinDataClass):
"""

def parse_data(self, page: str, **kwargs) -> dict:
# User email from @Home app as UPRN
user_email = kwargs.get("uprn")
headers = {
"X-country": "gb",
"X-email": user_email,
"Connection": "Keep-Alive",
}

# Sniffed from the app
requests.packages.urllib3.disable_warnings()
response = requests.get(
"https://services.athomeapp.net/ServiceData/GetUserRoundJson",
headers=headers,
)

# 200 is OK. Sometimes it times out and gives this, but I'm not parsing HTTP codes
if response.status_code != 200:
raise ValueError(
"Error parsing API. Please check your email is correct and registered on the @Home app."
)

# Load in the json and only get the bins
json_data = json.loads(response.text)["userrounds"]
data = {"bins": []}
collections = []

# For each bin, run through the list of dates and add them to a collection
for item in json_data:
bin_type = item["containername"]
for sched in item["nextcollectiondates"]:
bin_collection = datetime.strptime(
sched["datestring"], "%d %m %Y %H:%M"
)
if bin_collection.date() >= datetime.now().date():
collections.append((bin_type, bin_collection))
page = "https://mybexley.bexley.gov.uk/service/When_is_my_collection_day"

# Order the collection of bins and dates by date order, then add to dict
ordered_data = sorted(collections, key=lambda x: x[1])
data = {"bins": []}
for item in ordered_data:
dict_data = {
"type": item[0],
"collectionDate": item[1].strftime(date_format),
}
data["bins"].append(dict_data)

user_uprn = kwargs.get("uprn")
user_paon = kwargs.get("paon")
user_postcode = kwargs.get("postcode")
web_driver = kwargs.get("web_driver")

# Create Selenium webdriver
driver = create_webdriver(web_driver)
driver.get(page)

# If you bang in the house number (or property name) and postcode in the box it should find your property

iframe_presense = WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.ID, "fillform-frame-1"))
)


driver.switch_to.frame(iframe_presense)
wait = WebDriverWait(driver, 60)
start_btn = wait.until(
EC.element_to_be_clickable((By.XPATH, "//button/span[contains(text(), 'Next')]"))
)

start_btn.click()

inputElement_postcodesearch = wait.until(
EC.element_to_be_clickable((By.NAME, "postcode_search"))
)
inputElement_postcodesearch.send_keys(user_postcode)

find_address_btn = wait.until(
EC.element_to_be_clickable((By.XPATH, '//*[@id="search"]'))
)
find_address_btn.click()

dropdown_options = wait.until(
EC.presence_of_element_located((By.XPATH, '//*[@id="select2-chosen-1"]'))
)
time.sleep(2)
dropdown_options.click()
time.sleep(1)
dropdown_input = wait.until(
EC.presence_of_element_located((By.XPATH, '//*[@id="s2id_autogen1_search"]'))
)
time.sleep(1)
dropdown_input.send_keys(user_paon)
dropdown_input.send_keys(Keys.ENTER)

results_found = wait.until(
EC.presence_of_element_located((By.CLASS_NAME, "found-content"))
)
finish_btn = wait.until(
EC.element_to_be_clickable((By.XPATH, "//button/span[contains(text(), 'Next')]"))
)
finish_btn.click()
final_page = wait.until(
EC.presence_of_element_located((By.CLASS_NAME, "waste-header-container"))
)


soup = BeautifulSoup(driver.page_source, features="html.parser")

bin_fields = soup.find_all("div", class_="waste-panel-container")
# Define your XPath

for bin in bin_fields:

# Extract h3 text from the current element
h3_text = bin.find('h3', class_='container-name').get_text(strip=True) if bin.find('h3', class_='container-name') else None

date_text = bin.find('p', class_='container-status').get_text(strip=True) if bin.find('p', class_='container-status') else None

if h3_text and date_text:
# Parse the date using the appropriate format
parsed_date = datetime.strptime(date_text, "%A %d %B")

# Assuming the current year is used for the collection date
current_year = datetime.now().year

# If the parsed date is in the past, assume it's for the next year
if parsed_date < datetime.now():
current_year += 1

data["bins"].append(
{
"type": h3_text,
"collectionDate": parsed_date.replace(year=current_year).strftime("%d/%m/%Y")
}
)

return data
Loading

0 comments on commit 286085f

Please sign in to comment.