Skip to content

Commit

Permalink
Merge pull request #1088 from dp247/202412-fixes
Browse files Browse the repository at this point in the history
December 2024 fixes
  • Loading branch information
robbrad authored Jan 4, 2025
2 parents 9cb7ade + 6d9af00 commit 7595bd0
Show file tree
Hide file tree
Showing 11 changed files with 120 additions and 97 deletions.
3 changes: 0 additions & 3 deletions .github/workflows/behave_pull_request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ jobs:
with:
files: |
uk_bin_collection/uk_bin_collection/councils/**.py
- name: Set Council Tests Environment Variable
id: set-council-tests
run: |
Expand All @@ -50,7 +49,6 @@ jobs:
fi
done
echo "council_tests=$COUNCIL_TESTS" >> $GITHUB_OUTPUT
outputs:
council_tests: ${{ steps.set-council-tests.outputs.council_tests }}

Expand Down Expand Up @@ -111,7 +109,6 @@ jobs:
repo=${{ github.event.pull_request.head.repo.full_name || 'robbrad/UKBinCollectionData' }}
branch=${{ github.event.pull_request.head.ref || 'master' }}
make parity-check repo=$repo branch=$branch
integration-tests:
name: Run Integration Tests
needs: setup
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ If you miss this on the first setup you can reconfigure it.
"color": "blue"
}
}
```
---

## Standalone Usage
Expand Down
4 changes: 2 additions & 2 deletions uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -1993,8 +1993,8 @@
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
},
"WestLindseyDistrictCouncil": {
"house_number": "PRIVATE ACCOMMODATION",
"postcode": "LN8 2AR",
"house_number": "35",
"postcode": "LN8 3AX",
"skip_get_url": true,
"url": "https://www.west-lindsey.gov.uk/",
"wiki_name": "West Lindsey District Council",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ class CouncilClass(AbstractGetBinDataClass):
"""

def parse_data(self, page: str, **kwargs) -> dict:
# Make a BS4 object
driver = None
try:
bin_data_dict = {"bins": []}
Expand Down Expand Up @@ -76,12 +75,13 @@ def parse_data(self, page: str, **kwargs) -> dict:
# Get the current year
current_year = datetime.now().year

# Append the year to the date
date_with_year = date_object.replace(year=current_year)

# Check if the parsed date is in the past compared to the current date
if date_object < datetime.now():
# If the parsed date is in the past, assume it's for the next year
current_year += 1
# Append the year to the date
date_with_year = date_object.replace(year=current_year)

# Format the date with the year
date_with_year_formatted = date_with_year.strftime(
Expand Down
117 changes: 64 additions & 53 deletions uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
import time
from datetime import datetime
from typing import Optional

from bs4 import BeautifulSoup
from selenium.common import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.remote.webdriver import WebDriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.drivers.chrome import ChromeDriver

from selenium import webdriver

from uk_bin_collection.uk_bin_collection.common import create_webdriver
from uk_bin_collection.uk_bin_collection.common import date_format
Expand Down Expand Up @@ -55,78 +60,84 @@ def _parse_data(self, page: str, **kwargs) -> dict:
- Extract info from the 'alt' attribute of the images on that page
"""
bins = []
data = {"bins": []}
collections = []

user_paon = kwargs["paon"]
user_postcode = kwargs["postcode"]

self._driver = driver = create_webdriver(
web_driver=kwargs["web_driver"],
headless=kwargs.get("headless", True),
session_name=__name__,
)
self._driver = driver = webdriver.Chrome()
# self._driver = driver = create_webdriver(
# web_driver=kwargs["web_driver"],
# headless=kwargs.get("headless", True),
# session_name=__name__,
# )
driver.implicitly_wait(1)

driver.get(
"https://www.kirklees.gov.uk/beta/your-property-bins-recycling/your-bins/default.aspx"
"https://my.kirklees.gov.uk/service/Bins_and_recycling___Manage_your_bins"
)

wait_for_element(
driver, By.ID, "cphPageBody_cphContent_thisGeoSearch_txtGeoPremises"
)
time.sleep(5)

# Switch to iframe
iframe = driver.find_element(By.CSS_SELECTOR, "#fillform-frame-1")
driver.switch_to.frame(iframe)

house_input = driver.find_element(
By.ID, "cphPageBody_cphContent_thisGeoSearch_txtGeoPremises"
wait_for_element(
driver, By.ID, "mandatory_Postcode", timeout=10
)
house_input.send_keys(user_paon)

postcode_input = driver.find_element(
By.ID, "cphPageBody_cphContent_thisGeoSearch_txtGeoSearch"
By.ID, "Postcode"
)
postcode_input.send_keys(user_postcode)

# submit address search
driver.find_element(By.ID, "butGeoSearch").send_keys(Keys.RETURN)
wait_for_element(driver, By.ID, "List")
time.sleep(2)

WebDriverWait(driver, 10).until(
EC.element_to_be_clickable(
(
By.XPATH,
"//select[@name='List']//option[contains(., '"
+ user_paon
+ "')]",
)
)
).click()

wait_for_element(
driver,
By.ID,
"cphPageBody_cphContent_wtcDomestic240__lnkAccordionAnchor",
# submitting can be slow
timeout=30,
)
time.sleep(10)

# Open the panel
driver.find_element(
By.ID, "cphPageBody_cphContent_wtcDomestic240__lnkAccordionAnchor"
).click()
# For whatever reason, the page sometimes automatically goes to the next step
next_button = driver.find_element(By.XPATH, '/html/body/div/div/section/form/div/nav/div[2]/button')
if next_button.is_displayed():
next_button.click()

# Domestic waste calendar
wait_for_element(
driver, By.ID, "cphPageBody_cphContent_wtcDomestic240__LnkCalendar"
)
calendar_link = driver.find_element(
By.ID, "cphPageBody_cphContent_wtcDomestic240__LnkCalendar"
)
driver.execute_script("arguments[0].click();", calendar_link)

# <img alt="Recycling collection date 14 March 2024"
# <img alt="Domestic collection date 21 March 2024
date_strings = driver.find_elements(
By.CSS_SELECTOR, 'img[alt*="collection date"]'
)
time.sleep(5)

for date in date_strings:
bin_type, _, _, day, month, year = date.get_attribute("alt").split()
collection_date = datetime.strptime(
f"{day} {month} {year}", "%d %B %Y"
).strftime(date_format)

bins.append(
{
"type": bin_type,
"collectionDate": collection_date,
}
)
soup = BeautifulSoup(self._driver.page_source, features="html.parser")
soup.prettify()

radio_button_text = soup.find_all("label", {"class": "radio-label"})
for label in radio_button_text:
parsed_text = label.text.split("x ")
row = parsed_text[1].lower().split("collection date: ")
bin_type = row[0].split("(")[0].strip()
date_text = row[1].strip().replace(")", "")
if date_text == "today":
bin_date = datetime.now()
else:
bin_date = datetime.strptime(date_text, "%A %d %B %Y")
collections.append((bin_type, bin_date))

ordered_data = sorted(collections, key=lambda x: x[1])
for item in ordered_data:
dict_data = {
"type": item[0].replace("standard ", "").capitalize(),
"collectionDate": item[1].strftime(date_format),
}
data["bins"].append(dict_data)

return {"bins": bins}
return data
25 changes: 15 additions & 10 deletions uk_bin_collection/uk_bin_collection/councils/NorwichCityCouncil.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,13 @@ def parse_data(self, page: str, **kwargs) -> dict:
alternateCheck = False

strong = soup.find_all("strong")
collections = []

if alternateCheck:
bin_types = strong[2].text.strip().replace(".", "").split(" and ")
for bin in bin_types:
dict_data = {
"type": bin,
"collectionDate": strong[1].text.strip(),
}
bindata["bins"].append(dict_data)
collections.append((bin.capitalize(), datetime.strptime(strong[1].text.strip(), date_format)))

else:
p_tag = soup.find_all("p")
i = 1
Expand All @@ -65,11 +63,18 @@ def parse_data(self, page: str, **kwargs) -> dict:
p.text.split("Your ")[1].split(" is collected")[0].split(" and ")
)
for bin in bin_types:
dict_data = {
"type": bin,
"collectionDate": strong[i].text.strip(),
}
bindata["bins"].append(dict_data)
collections.append((bin.capitalize(), datetime.strptime(strong[1].text.strip(), date_format)))
i += 2

if len(strong) > 3:
collections.append(("Garden", datetime.strptime(strong[4].text.strip(), date_format)))

ordered_data = sorted(collections, key=lambda x: x[1])
for item in ordered_data:
dict_data = {
"type": item[0] + " bin",
"collectionDate": item[1].strftime(date_format),
}
bindata["bins"].append(dict_data)

return bindata
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,6 @@ class CouncilClass(AbstractGetBinDataClass):

def parse_data(self, page: str, **kwargs) -> dict:
data = {"bins": []}
bin_types = {
"240L RUBBISH BIN": "Black bin",
"240L GARDEN BIN": "Green bin",
"180L RECYCLING BIN": "Blue lid bin",
}
collections = []

user_postcode = kwargs.get("postcode")
Expand Down Expand Up @@ -73,7 +68,8 @@ def parse_data(self, page: str, **kwargs) -> dict:

for row in table_rows:
row_text = row.text.strip().split("\n")
bin_type = bin_types.get(row_text[0])
bin_text = row_text[0].split(" ")
bin_type = ' '.join(bin_text[1:]).capitalize()
collections.append(
(bin_type, datetime.strptime(row_text[1], "%A %d %b %Y"))
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,18 @@
from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass

# import the wonderful Beautiful Soup and the URL grabber

def parse_collection_date(date_string) -> datetime:
now = datetime.now()
if date_string == "is due today":
return now

parsed_date = datetime.strptime(date_string, "%A, %d %B").replace(year=now.year)

if now.month == 12 and parsed_date.month < 12:
parsed_date = parsed_date.replace(year=(now.year + 1))

return parsed_date

class CouncilClass(AbstractGetBinDataClass):
"""
Expand Down
20 changes: 12 additions & 8 deletions uk_bin_collection/uk_bin_collection/councils/WalsallCouncil.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,29 +28,33 @@ def parse_data(self, page: str, **kwargs) -> dict:
response = requests.get(URI, headers=headers)

soup = BeautifulSoup(response.text, "html.parser")
# Extract links to collection shedule pages and iterate through the pages
schedule_links = soup.findAll("a", {"class": "nav-link"}, href=True)
# Extract links to collection schedule pages and iterate through the pages
schedule_links = soup.findAll("td")

for item in schedule_links:
if "roundname" in item["href"]:
if "roundname" in item.contents[1]["href"]:
# get bin colour
bincolour = item["href"].split("=")[-1].split("%")[0].upper()
binURL = "https://cag.walsall.gov.uk" + item["href"]
r = requests.get(binURL, headers=headers)
bin_colour = item.contents[1]["href"].split("=")[-1].split("%")[0].upper()
bin_url = "https://cag.walsall.gov.uk" + item.contents[1]["href"]
r = requests.get(bin_url, headers=headers)
if r.status_code != 200:
print(f"Collection details for {bin_colour.lower()} bin could not be retrieved.")
break
soup = BeautifulSoup(r.text, "html.parser")
table = soup.findAll("tr")
for tr in table:
td = tr.findAll("td")
if td:
dict_data = {
"type": bincolour,
"type": bin_colour.capitalize() + " bin",
"collectionDate": datetime.strptime(
td[1].text.strip(), "%d/%m/%Y"
).strftime("%d/%m/%Y"),
}
bindata["bins"].append(dict_data)

bindata["bins"].sort(
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
)

return bindata
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def parse_data(self, page: str, **kwargs) -> dict:
).replace(year=datetime.now().year)

food_div = soup.find(
"div", {"id": "FINDYOURBINDAYS_RECYCLINGDATE_OUTERDIV"}
"div", {"id": "FINDYOURBINDAYS_FOODWASTEDATE_OUTERDIV"}
)
food_date = food_div.find_all("div")[2]
if food_date.text == "Today":
Expand Down
Loading

0 comments on commit 7595bd0

Please sign in to comment.