Skip to content

Commit

Permalink
Merge pull request #321 from MystifiedMeat/HarrogateBoroughCouncil
Browse files Browse the repository at this point in the history
Adding support for Harrogate Borough Council
  • Loading branch information
OliverCullimore authored Sep 10, 2023
2 parents e469a0c + fb4481c commit cae36cc
Show file tree
Hide file tree
Showing 5 changed files with 151 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
"$schema": "http://json-schema.org/draft-06/schema#",
"$ref": "#/definitions/Welcome7",
"definitions": {
"Welcome7": {
"type": "object",
"additionalProperties": false,
"properties": {
"bins": {
"type": "array",
"items": {
"$ref": "#/definitions/Bin"
}
}
},
"required": [
"bins"
],
"title": "Welcome7"
},
"Bin": {
"type": "object",
"additionalProperties": false,
"properties": {
"type": {
"type": "string"
},
"collectionDate": {
"type": "string"
}
},
"required": [
"collectionDate",
"type"
],
"title": "Bin"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ Feature: Test each council output matches expected results in /outputs
| FenlandDistrictCouncil |
| GlasgowCityCouncil |
| GuildfordCouncil |
| HarrogateBoroughCouncil |
| HighPeakCouncil |
| HuntingdonDistrictCouncil |
| KingstonUponThamesCouncil |
Expand Down
7 changes: 7 additions & 0 deletions uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,13 @@
"wiki_name": "Guildford Council",
"wiki_note": "If the bin day is 'today' then the collectionDate will only show today's date if before 7AM, else the date will be in 'previousCollectionDate'. To get the UPRN, you will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
},
"HarrogateBoroughCouncil": {
"SKIP_GET_URL": "SKIP_GET_URL",
"uprn": "100050414307",
"url": "https://secure.harrogate.gov.uk/inmyarea",
"wiki_name": "Harrogate Borough Council",
"wiki_note": "Pass the UPRN which can be found at https://secure.harrogate.gov.uk/inmyarea URL doesn't need to be passed."
},
"HighPeakCouncil": {
"SKIP_GET_URL": "SKIP_GET_URL",
"house_number": "9 Ellison Street, Glossop",
Expand Down
36 changes: 36 additions & 0 deletions uk_bin_collection/tests/outputs/HarrogateBoroughCouncil.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"bins": [
{
"type": "Recycling",
"collectionDate": "25/08/2023"
},
{
"type": "Refuse",
"collectionDate": "02/09/2023"
},
{
"type": "Recycling",
"collectionDate": "08/09/2023"
},
{
"type": "Refuse",
"collectionDate": "15/09/2023"
},
{
"type": "Recycling",
"collectionDate": "22/09/2023"
},
{
"type": "Refuse",
"collectionDate": "29/09/2023"
},
{
"type": "Recycling",
"collectionDate": "06/10/2023"
},
{
"type": "Refuse",
"collectionDate": "13/10/2023"
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from bs4 import BeautifulSoup
from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import \
AbstractGetBinDataClass


# import the wonderful Beautiful Soup and the URL grabber
class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
base class. They can also override some operations with a default
implementation.
"""

def parse_data(self, page: str, **kwargs) -> dict:

user_uprn = kwargs.get("uprn")
check_uprn(user_uprn)

soup = BeautifulSoup(page.text, features="html.parser")
soup.prettify()

data = {"bins": []}

headers = {
'accept-language': 'en-GB,en;q=0.9',
'cache-control': 'no-cache',
}

req_data = {
'uprn': user_uprn,
}

url = f'https://secure.harrogate.gov.uk/inmyarea/Property/?uprn={user_uprn}'

requests.packages.urllib3.disable_warnings()
response = requests.post(url, headers=headers)

soup = BeautifulSoup(response.text, features="html.parser")
soup.prettify()

collections = []

# Find section with bins in
table = soup.find_all("table", {"class": "hbcRounds"})[1]

# For each bin section, get the text and the list elements
for row in table.find_all('tr'):
bin_type = row.find('th').text
td = row.find('td')
for span in td.find_all('span'):
span.extract()
collectionDate = td.text.strip()
next_collection = datetime.strptime(collectionDate, "%a %d %b %Y")
collections.append((bin_type, next_collection))

# Sort the text and list elements by date
ordered_data = sorted(collections, key=lambda x: x[1])

# Put the elements into the dictionary
for item in ordered_data:
dict_data = {
"type": item[0],
"collectionDate": item[1].strftime(date_format),
}
data["bins"].append(dict_data)

return data

0 comments on commit cae36cc

Please sign in to comment.