Skip to content

Commit

Permalink
Merge pull request #698 from dannya/master
Browse files Browse the repository at this point in the history
  • Loading branch information
robbrad authored May 7, 2024
2 parents 35f0ebc + 558384d commit 60dda27
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ Feature: Test each council output matches expected results
| council | selenium_url | selenium_mode |
| BexleyCouncil | http://selenium:4444 | local |

@BirminghamCityCouncil
Examples: BirminghamCityCouncil
| council | selenium_url | selenium_mode |
| BirminghamCityCouncil | None | None |

@BlackburnCouncil
Examples: BlackburnCouncil
| council | selenium_url | selenium_mode |
Expand Down
7 changes: 6 additions & 1 deletion uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@
"wiki_name": "Bexley Council",
"wiki_note": "In order to use this parser, you will need to sign up to [Bexley's @Home app](https://www.bexley.gov.uk/services/rubbish-and-recycling/bexley-home-recycling-app/about-app) (available for [iOS](https://apps.apple.com/gb/app/home-collection-reminder/id1050703690) and [Android](https://play.google.com/store/apps/details?id=com.contender.athome.android)).\nComplete the setup by entering your email and setting your address with postcode and address line.\nOnce you can see the calendar, you _should_ be good to run the parser.\nJust pass the email you used in quotes in the UPRN parameter.\n"
},
"BirminghamCityCouncil": {
"postcode": "B5 7XE",
"uprn": "100070445256",
"url": "https://www.birmingham.gov.uk/xfp/form/619",
"wiki_name": "Birmingham City Council"
},
"BlackburnCouncil": {
"skip_get_url": true,
"uprn": "100010733027",
Expand Down Expand Up @@ -984,7 +990,6 @@
},
"WelhatCouncil": {
"postcode": "AL8 6HQ",
"skip_get_url": true,
"uprn": "100080982825",
"url": "https://www.welhat.gov.uk/xfp/form/214",
"wiki_name": "Welhat Council"
Expand Down
117 changes: 117 additions & 0 deletions uk_bin_collection/uk_bin_collection/councils/BirminghamCityCouncil.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
from dateutil.relativedelta import relativedelta
from bs4 import BeautifulSoup
from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass


def get_token(page) -> str:
"""
Get a __token to include in the form data
:param page: Page html
:return: Form __token
"""
soup = BeautifulSoup(page.text, features="html.parser")
soup.prettify()
token = soup.find("input", {"name": "__token"}).get("value")
return token


class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
base class. They can also override some operations with a default
implementation.
"""

def get_data(self, url) -> str:
"""This method makes the request to the council
Keyword arguments:
url -- the url to get the data from
"""
# Set a user agent so we look like a browser ;-)
user_agent = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/108.0.0.0 Safari/537.36"
)
headers = {"User-Agent": user_agent}
requests.packages.urllib3.disable_warnings()

# Make the Request - change the URL - find out your property number
try:
session = requests.Session()
session.headers.update(headers)
full_page = session.get(url)
return full_page
except requests.exceptions.HTTPError as errh:
_LOGGER.error(f"Http Error: {errh}")
raise
except requests.exceptions.ConnectionError as errc:
_LOGGER.error(f"Error Connecting: {errc}")
raise
except requests.exceptions.Timeout as errt:
_LOGGER.error(f"Timeout Error: {errt}")
raise
except requests.exceptions.RequestException as err:
_LOGGER.error(f"Oops: Something Else {err}")
raise

def parse_data(self, page: str, **kwargs) -> dict:
uprn = kwargs.get("uprn")
postcode = kwargs.get("postcode")
check_uprn(uprn)
check_postcode(postcode)

values = {
"__token": get_token(page),
"page": "491",
"locale": "en_GB",
"q1f8ccce1d1e2f58649b4069712be6879a839233f_0_0": postcode,
"q1f8ccce1d1e2f58649b4069712be6879a839233f_1_0": uprn,
"next": "Next",
}
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"}
requests.packages.urllib3.disable_warnings()
response = requests.request(
"POST",
"https://www.birmingham.gov.uk/xfp/form/619",
headers=headers,
data=values,
)

soup = BeautifulSoup(response.text, features="html.parser")

rows = soup.find("table").find_all("tr")

# Form a JSON wrapper
data = {"bins": []}

# Loops the Rows
for row in rows:
cells = row.find_all("td")
if cells:
bin_type = cells[0].get_text(strip=True)
collection_next = cells[1].get_text(strip=True)

collection_date = re.findall("\(.*?\)", collection_next)

if len(collection_date) != 1:
continue

collection_date_obj = parse(re.sub("[()]", "", collection_date[0])).date()

# since we only have the next collection day, if the parsed date is in the past,
# assume the day is instead next month
if collection_date_obj < datetime.now().date():
collection_date_obj += relativedelta(months=1)

# Make each Bin element in the JSON
dict_data = {
"type": bin_type,
"collectionDate": collection_date_obj.strftime(date_format),
}

# Add data to the main JSON Wrapper
data["bins"].append(dict_data)

return data

0 comments on commit 60dda27

Please sign in to comment.