Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Council Pack 20 #1056

Merged
merged 11 commits into from
Dec 4, 2024
14 changes: 13 additions & 1 deletion uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,12 @@
"wiki_name": "High Peak Council",
"wiki_note": "Pass the name of the street with the house number parameter, wrapped in double quotes. This parser requires a Selenium webdriver."
},
"HinckleyandBosworthBoroughCouncil": {
"url": "https://www.hinckley-bosworth.gov.uk",
"uprn": "100030533512",
"wiki_name": "Hinckley and Bosworth Borough Council",
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
},
"HounslowCouncil": {
"house_number": "17A LAMPTON PARK ROAD, HOUNSLOW",
"postcode": "TW3 4HS",
Expand Down Expand Up @@ -1072,7 +1078,7 @@
"wiki_note": "Pass the UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search)."
},
"MertonCouncil": {
"url": "https://myneighbourhood.merton.gov.uk/wasteservices/WasteServices.aspx?ID=25851371",
"url": "https://myneighbourhood.merton.gov.uk/wasteservices/WasteServices.aspx?ID=25936129",
"wiki_command_url_override": "https://myneighbourhood.merton.gov.uk/Wasteservices/WasteServices.aspx?ID=XXXXXXXX",
"wiki_name": "Merton Council",
"wiki_note": "Follow the instructions [here](https://myneighbourhood.merton.gov.uk/Wasteservices/WasteServicesSearch.aspx) until you get the \"Your recycling and rubbish collection days\" page, then copy the URL and replace the URL in the command."
Expand Down Expand Up @@ -1132,6 +1138,12 @@
"wiki_name": "Mole Valley District Council",
"wiki_note": "UPRN can only be parsed with a valid postcode."
},
"MonmouthshireCountyCouncil": {
"url": "https://maps.monmouthshire.gov.uk",
"uprn": "100100266220",
"wiki_name": "Monmouthshire County Council",
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
},
"MorayCouncil": {
"uprn": "28841",
"url": "https://bindayfinder.moray.gov.uk/",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,14 @@ def parse_data(self, page: str, **kwargs) -> dict:
for container in soup.find_all(class_="box-item"):

# Get the next collection dates from the <p> tag containing <strong>
dates_tag = (
container.find("p", string=lambda text: "Next" in text)
.find_next("p")
.find("strong")
)
try:
dates_tag = (
container.find("p", string=lambda text: "Next" in text)
.find_next("p")
.find("strong")
)
except:
continue
collection_dates = (
dates_tag.text.strip().split(", and then ")
if dates_tag
Expand Down
12 changes: 10 additions & 2 deletions uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,19 @@ def parse_data(self, page: str, **kwargs) -> dict:
for collection in bin_collections:
if collection is not None:
bin_type = collection[0].get("BinType")
current_collection_date = collection[0].get("CollectionDate")
if current_collection_date is None:
continue
current_collection_date = datetime.strptime(
collection[0].get("CollectionDate"), "%Y-%m-%d"
current_collection_date, "%Y-%m-%d"
)
next_collection_date = collection[0].get(
"NextScheduledCollectionDate"
)
if next_collection_date is None:
continue
next_collection_date = datetime.strptime(
collection[0].get("NextScheduledCollectionDate"), "%Y-%m-%d"
next_collection_date, "%Y-%m-%d"
)

# Work out the most recent collection date to display
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ def parse_data(self, page: str, **kwargs) -> dict:

# Find the next collection date
date_tag = container.find(class_="font11 text-center")
if date_tag:
collection_date = date_tag.text.strip()
else:
if date_tag.text.strip() == "":
continue
else:
collection_date = date_tag.text.strip()

dict_data = {
"type": bin_type,
Expand Down
17 changes: 17 additions & 0 deletions uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,23 @@ def parse_data(self, page: str, **kwargs) -> dict:
"../Images/Bins/ashBin.gif": "Ash bin",
}

fieldset = soup.find("fieldset")
ps = fieldset.find_all("p")
for p in ps:
collection = p.text.strip().replace("Your next ", "").split(".")[0]
bin_type = collection.split(" day is")[0]
collection_date = datetime.strptime(
remove_ordinal_indicator_from_date_string(collection).split("day is ")[
1
],
"%A %d %B %Y",
)
dict_data = {
"type": bin_type,
"collectionDate": collection_date.strftime(date_format),
}
data["bins"].append(dict_data)

# Find the page body with all the calendars
body = soup.find("div", {"id": "Application_ctl00"})
calendars = body.find_all_next("table", {"title": "Calendar"})
Expand Down
15 changes: 10 additions & 5 deletions uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,16 @@ def parse_data(self, page: str, **kwargs) -> dict:
)

# Select address from dropdown and wait
inputElement_ad = Select(
driver.find_element(By.ID, "FINDBINDAYSHIGHPEAK_ADDRESSSELECT_ADDRESS")
)

inputElement_ad.select_by_visible_text(user_paon)
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable(
(
By.XPATH,
"//select[@id='FINDBINDAYSHIGHPEAK_ADDRESSSELECT_ADDRESS']//option[contains(., '"
+ user_paon
+ "')]",
)
)
).click()

WebDriverWait(driver, 10).until(
EC.presence_of_element_located(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import requests
from bs4 import BeautifulSoup

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass


# import the wonderful Beautiful Soup and the URL grabber
class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
base class. They can also override some operations with a default
implementation.
"""

def parse_data(self, page: str, **kwargs) -> dict:

user_uprn = kwargs.get("uprn")
check_uprn(user_uprn)
bindata = {"bins": []}

URI = f"https://www.hinckley-bosworth.gov.uk/set-location?id={user_uprn}&redirect=refuse&rememberloc="

# Make the GET request
response = requests.get(URI)

# Parse the HTML
soup = BeautifulSoup(response.content, "html.parser")

# Find all the bin collection date containers
bin_schedule = []
collection_divs = soup.find_all(
"div", class_=["first_date_bins", "last_date_bins"]
)

for div in collection_divs:
# Extract the date
date = div.find("h3", class_="collectiondate").text.strip().replace(":", "")

# Extract bin types
bins = [img["alt"] for img in div.find_all("img", class_="collection")]

# Append to the schedule
bin_schedule.append({"date": date, "bins": bins})

current_year = datetime.now().year
current_month = datetime.now().month

# Print the schedule
for entry in bin_schedule:
bin_types = entry["bins"]
date = datetime.strptime(entry["date"], "%d %B")

if (current_month > 9) and (date.month < 4):
date = date.replace(year=(current_year + 1))
else:
date = date.replace(year=current_year)

for bin_type in bin_types:

dict_data = {
"type": bin_type,
"collectionDate": date.strftime("%d/%m/%Y"),
}
bindata["bins"].append(dict_data)

bindata["bins"].sort(
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
)

return bindata
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import requests
from bs4 import BeautifulSoup

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass


# import the wonderful Beautiful Soup and the URL grabber
class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
base class. They can also override some operations with a default
implementation.
"""

def parse_data(self, page: str, **kwargs) -> dict:

user_uprn = kwargs.get("uprn")
check_uprn(user_uprn)
bindata = {"bins": []}

URI = (
f"https://maps.monmouthshire.gov.uk/?action=SetAddress&UniqueId={user_uprn}"
)

# Make the GET request
response = requests.get(URI)

# Parse the HTML
soup = BeautifulSoup(response.content, "html.parser")

waste_collections_div = soup.find("div", {"aria-label": "Waste Collections"})

# Find all bin collection panels
bin_panels = waste_collections_div.find_all("div", class_="atPanelContent")

current_year = datetime.now().year
current_month = datetime.now().month

for panel in bin_panels:
# Extract bin name (e.g., "Household rubbish bag")
bin_name = panel.find("h4").text.strip().replace("\r", "").replace("\n", "")

# Extract collection date (e.g., "Monday 9th December")
date_tag = panel.find("p")
if date_tag and "Your next collection date is" in date_tag.text:
collection_date = date_tag.find("strong").text.strip()
else:
continue

collection_date = datetime.strptime(
remove_ordinal_indicator_from_date_string(collection_date), "%A %d %B"
)

if (current_month > 9) and (collection_date.month < 4):
collection_date = collection_date.replace(year=(current_year + 1))
else:
collection_date = collection_date.replace(year=current_year)

dict_data = {
"type": bin_name,
"collectionDate": collection_date.strftime("%d/%m/%Y"),
}
bindata["bins"].append(dict_data)

bindata["bins"].sort(
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
)

return bindata
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
# This script pulls (in one hit) the data
# from Warick District Council Bins Data

from datetime import datetime

from bs4 import BeautifulSoup

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass


Expand All @@ -20,15 +24,30 @@ def parse_data(self, page: str, **kwargs) -> dict:

data = {"bins": []}

for element in soup.find_all("strong"):
bin_type = element.next_element
bin_type = bin_type.lstrip()
collectionDateElement = element.next_sibling.next_element.next_element
collectionDate = collectionDateElement.getText()
dict_data = {
"type": bin_type,
"collectionDate": collectionDate,
}
data["bins"].append(dict_data)
# Find all bin panels
bin_panels = soup.find_all("div", class_="col-sm-4 col-lg-3")

# Iterate through each panel to extract information
for panel in bin_panels:
bin_type = panel.find("img")["alt"].strip()

waste_dates = panel.find(
"div", class_="col-xs-12 text-center waste-dates margin-bottom-15"
)

for p in waste_dates.find_all("p")[1:]:
date = p.text.strip()
if " " in date:
date = date.split(" ")[1]

dict_data = {
"type": bin_type,
"collectionDate": date,
}
data["bins"].append(dict_data)

data["bins"].sort(
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
)

return data
24 changes: 24 additions & 0 deletions wiki/Councils.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ This document is still a work in progress, don't worry if your council isn't lis
- [Hertsmere Borough Council](#hertsmere-borough-council)
- [Highland Council](#highland-council)
- [High Peak Council](#high-peak-council)
- [Hinckley and Bosworth Borough Council](#hinckley-and-bosworth-borough-council)
- [Hounslow Council](#hounslow-council)
- [Hull City Council](#hull-city-council)
- [Huntingdon District Council](#huntingdon-district-council)
Expand Down Expand Up @@ -155,6 +156,7 @@ This document is still a work in progress, don't worry if your council isn't lis
- [Mid Sussex District Council](#mid-sussex-district-council)
- [Milton Keynes City Council](#milton-keynes-city-council)
- [Mole Valley District Council](#mole-valley-district-council)
- [Monmouthshire County Council](#monmouthshire-county-council)
- [Moray Council](#moray-council)
- [Neath Port Talbot Council](#neath-port-talbot-council)
- [New Forest Council](#new-forest-council)
Expand Down Expand Up @@ -1622,6 +1624,17 @@ Note: Pass the name of the street with the house number parameter, wrapped in do

---

### Hinckley and Bosworth Borough Council
```commandline
python collect_data.py HinckleyandBosworthBoroughCouncil https://www.hinckley-bosworth.gov.uk -u XXXXXXXX
```
Additional parameters:
- `-u` - UPRN

Note: You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN.

---

### Hounslow Council
```commandline
python collect_data.py HounslowCouncil https://www.hounslow.gov.uk/info/20272/recycling_and_waste_collection_day_finder -s -u XXXXXXXX -p "XXXX XXX" -n XX -w http://HOST:PORT/
Expand Down Expand Up @@ -2043,6 +2056,17 @@ Note: UPRN can only be parsed with a valid postcode.

---

### Monmouthshire County Council
```commandline
python collect_data.py MonmouthshireCountyCouncil https://maps.monmouthshire.gov.uk -u XXXXXXXX
```
Additional parameters:
- `-u` - UPRN

Note: You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN.

---

### Moray Council
```commandline
python collect_data.py MorayCouncil https://bindayfinder.moray.gov.uk/ -u XXXXXXXX
Expand Down
Loading