Skip to content

Commit

Permalink
Merge pull request #683 from jimmygulp/BradfordMDC
Browse files Browse the repository at this point in the history
Added Bradford MDC - closes #442
  • Loading branch information
robbrad authored Apr 27, 2024
2 parents 0088ae4 + cbb1881 commit 46b77dd
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ Feature: Test each council output matches expected results
| council | selenium_url | selenium_mode |
| BoltonCouncil | http://selenium:4444 | local |

@BradfordMDC
Examples: BradfordMDC
| council | selenium_url | selenium_mode |
| BradfordMDC | None | None |

@BrightonandHoveCityCouncil
Examples: BrightonandHoveCityCouncil
| council | selenium_url | selenium_mode |
Expand Down
10 changes: 9 additions & 1 deletion uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,14 @@
"wiki_name": "Bolton Council",
"wiki_note": "To get the UPRN, you will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search). Previously required single field that was UPRN and full address, now requires UPRN and postcode as separate fields."
},
"BradfordMDC": {
"custom_component_show_url_field": false,
"skip_get_url": true,
"uprn": "100052235823",
"url": "https://onlineforms.bradford.gov.uk/ufs/collectiondates.eb",
"wiki_name": "Bradford MDC"
"wiki_note": "To get the UPRN, you will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search). Post code isn't parsed by this script, but you can pass it in double quotes."
},
"BrightonandHoveCityCouncil": {
"house_number": "44 Carden Avenue, Brighton, BN1 8NE",
"postcode": "BN1 8NE",
Expand Down Expand Up @@ -1055,4 +1063,4 @@
"url": "https://waste-api.york.gov.uk/api/Collections/GetBinCollectionDataForUprn/",
"wiki_name": "York Council"
}
}
}
103 changes: 103 additions & 0 deletions uk_bin_collection/uk_bin_collection/councils/BradfordMDC.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import dumper
import requests
from bs4 import BeautifulSoup
from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass

# import the wonderful Beautiful Soup and the URL grabber
class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
base class. They can also override some operations with a default
implementation.
"""

def parse_data(self, page: str, **kwargs) -> dict:
user_uprn = kwargs.get("uprn")
check_uprn(user_uprn)

# UPRN is passed in via a cookie. Set cookies/params and GET the page
cookies = {
"COLLECTIONDATES": f"{user_uprn}",
}
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "en-GB,en;q=0.7",
"Cache-Control": "max-age=0",
"Connection": "keep-alive",
"Referer": "https://onlineforms.bradford.gov.uk/ufs/collectiondates.eb",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
"Sec-GPC": "1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
}
params = {
"ebp": "30",
"ebd": "0",
"ebz": "1_1713270660323",
}
requests.packages.urllib3.disable_warnings()
response = requests.get(
"https://onlineforms.bradford.gov.uk/ufs/collectiondates.eb",
params=params,
headers=headers,
cookies=cookies,
)

# Parse response text for super speedy finding
soup = BeautifulSoup(response.text, features="html.parser")
soup.prettify()

data = {"bins": []}

# BradfordMDC site has lots of embedded tables, find the table titled 'Your next general/recycling collections are:'
for bin in soup.find_all(attrs={"class": "CTID-FHGh1Q77-_"}):
if bin.find_all(attrs={"class": "CTID-62bNngCB-_"}):
bin_type = "General Waste"
bin_colour = "Green"
bin_date_text = bin.find(attrs={"class": "CTID-62bNngCB-_"}).get_text()
elif bin.find_all(attrs={"class": "CTID-LHo9iO0y-_"}):
bin_type = "Recycling Waste"
bin_colour = "Grey"
bin_date_text = bin.find(attrs={"class": "CTID-LHo9iO0y-_"}).get_text()
else:
raise ValueError(f"No bin info found in {bin_type_info[0]}")

# Collection Date info is alongside the bin type, we got the whole line in the if/elif above
# below strips the text off at the beginning, to get a date, though recycling is a character shorter hence the lstrip
bin_date_info = bin_date_text[29:50].lstrip(' ')

if contains_date(bin_date_info):
bin_date = get_next_occurrence_from_day_month(
datetime.strptime(
bin_date_info,# + " " + datetime.today().strftime("%Y"),
"%a %b %d %Y",
)
).strftime(date_format)
#print(bin_date_info)
#print(bin_date)
# On exceptional collection schedule (e.g. around English Bank Holidays), date will be contained in the second stripped string
else:
bin_date = get_next_occurrence_from_day_month(
datetime.strptime(
bin_date_info[1] + " " + datetime.today().strftime("%Y"),
"%a %b %d %Y",
)
).strftime(date_format)

# Build data dict for each entry
dict_data = {
"type": bin_type,
"collectionDate": bin_date,
"colour": bin_colour,
}
data["bins"].append(dict_data)

data["bins"].sort(
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
)

return data

0 comments on commit 46b77dd

Please sign in to comment.