Skip to content

Commit

Permalink
Merge pull request #498 from skelt0/feat-West-Lindsey-District-Council
Browse files Browse the repository at this point in the history
  • Loading branch information
robbrad authored Dec 19, 2023
2 parents 23c2f83 + 5637974 commit 61a8da3
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ Feature: Test each council output matches expected results
| WaverleyBoroughCouncil | None | None |
| WealdenDistrictCouncil | None | None |
| WelhatCouncil | None | None |
| WestLindseyDistrictCouncil | None | None |
| WestLothianCouncil | http://selenium:4444 | local |
| WestSuffolkCouncil | http://selenium:4444 | local |
| WiganBoroughCouncil | None | None |
Expand Down
12 changes: 10 additions & 2 deletions uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@
"uprn": "200002981143",
"url": "https://www.fenland.gov.uk/article/13114/",
"wiki_name": "Fenland District Council"
},
},
"ForestOfDeanDistrictCouncil": {
"skip_get_url": true,
"house_number": "ELMOGAL, PARKEND ROAD, BREAM, LYDNEY",
Expand Down Expand Up @@ -504,7 +504,7 @@
"url": "https://www.northumberland.gov.uk/Waste/Bins/Bin-Calendars.aspx",
"web_driver": "http://selenium:4444",
"wiki_name": "Northumberland Council"
},
},
"OldhamCouncil": {
"url": "https://portal.oldham.gov.uk/bincollectiondates/details?uprn=422000033556",
"wiki_name": "Oldham Council",
Expand Down Expand Up @@ -755,6 +755,14 @@
"url": "https://www.welhat.gov.uk/xfp/form/214",
"wiki_name": "Welhat Council"
},
"WestLindseyDistrictCouncil": {
"house_number": "PRIVATE ACCOMODATION",
"postcode": "LN8 2AR",
"skip_get_url": true,
"url": "https://www.west-lindsey.gov.uk/",
"wiki_name": "West Lindsey District Council",
"wiki_note": "Pass the house name/number in the house number parameter, and postcode in the postcode parameter, both wrapped in double quotes. If a named house or flat, enter this in the number field. If multiple results return, we'll pick the first. You can test it [here](https://www.west-lindsey.gov.uk/bins-waste-recycling/find-your-bin-collection-day)"
},
"WestLothianCouncil": {
"house_number": "1 GOSCHEN PLACE",
"postcode": "EH52 5JE",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import requests, re, urllib.parse

from datetime import datetime, timedelta
from bs4 import BeautifulSoup

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import \
AbstractGetBinDataClass

class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
base class. They can also override some operations with a default
implementation.
"""

def parse_data(self, page: str, **kwargs) -> dict:

data = {"bins": []}

user_postcode = kwargs.get("postcode")
user_number = kwargs.get("paon")

user_address = "{} {}".format(user_number,user_postcode)
user_address = urllib.parse.quote(user_address)

# This first URL checks against a string represenging the users address and returns values used for a second lookup.
stage1_url = "https://wlnk.statmap.co.uk/map/Cluster.svc/findLocation?callback=getAddressesCallback1702938375023&script=%5CCluster%5CCluster.AuroraScript%24&address={}".format(user_address)

address_data = requests.get(stage1_url).text

# Strip data and parse the JSON
address_data = json.loads(re.sub('getAddressesCallback[\d]+\(', '', address_data)[:-2])

if address_data['TotalHits'] == 0:
raise Exception("No address found for string {}. See Wiki".format(user_address))
elif address_data['TotalHits'] != 1:
# Multiple hits returned. Lets pick the first one. We could raise an exception here if this causes problems.
pass

# Pull out the address data needed for the next step
address_id = address_data['Locations'][0]['Id']
address_x = address_data['Locations'][0]['X']
address_y = address_data['Locations'][0]['Y']

stage2_url = "https://wlnk.statmap.co.uk/map/Cluster.svc/getpage?script=\Cluster\Cluster.AuroraScript$&taskId=bins&format=js&updateOnly=true&query=x%3D{}%3By%3D{}%3Bid%3D{}".format(address_x,address_y,address_id)

bin_query = requests.get(stage2_url).text

# Test that what we got is good
if "injectCss" not in bin_query:
raise Exception("Error. Data has not been returned correctly. Please raise an issue on the GitHub page")

# Return only the HTML contained within the Javascript function payload.
pattern = 'document\.getElementById\("DR1"\)\.innerHTML="(.+)";'

bin_html = re.findall(pattern, bin_query)

if len(bin_html) != 1:
# This exception is raised if the regular expression above finds anything other than one expected match.
raise Exception("Incorrect number of matches found during phase 2 search. Please raise an issue on the Github page")

# Some silly python foo required here to unescape the unicode contained.
bin_html = bin_html[0].encode().decode('unicode-escape')

soup = BeautifulSoup(bin_html, 'html.parser')

collection_rows = soup.find("li", {"class": "auroraListItem"}).find_all("li")

for row in collection_rows:

# Get bin type
bin_type = row.find("span").text

# Get bin date
bin_date_text = row.text
pattern = '\d+\/\d+'
bin_dates = re.findall(pattern, bin_date_text)

input_date_format = "%d/%m"

for bin_date in bin_dates:

# The date returned from the webpage only gives DD/MM. So we need to add a year, but we can't simply add this year otherwise we would get it wrong at the end of the year. So we will test to see if the returned date + this year is in the future. If not, add next years date.
bin_dt = datetime.strptime(bin_date, input_date_format)
bin_dt = bin_dt.replace(year = datetime.now().year)

if bin_dt.date() == datetime.today().date(): # Check if date is today. This is OK
pass
elif bin_dt.date() < datetime.today().date(): # Check if the date is in the past. If so, increment the year
bin_dt = bin_dt.replace(year = bin_dt.year + 1)
elif bin_dt.date() > datetime.today().date(): # Check if date is in the future. This is OK
pass
else:
raise Exception("Date issue has occured. This should never happen. Please raise a bug in GitHub")

dict_data = {
"type": bin_type,
"collectionDate": bin_dt.strftime(date_format)
}

data["bins"].append(dict_data)

return data

0 comments on commit 61a8da3

Please sign in to comment.