Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Various council fixes #361

Merged
merged 12 commits into from
Oct 15, 2023
1,739 changes: 910 additions & 829 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ python = ">=3.10"
requests = "*"
selenium = "*"
lxml = "*"
urllib3 = "^2.0.6"

[tool.commitizen]
major_version_zero = true
Expand Down
2 changes: 1 addition & 1 deletion uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@
"CrawleyBoroughCouncil": {
"SKIP_GET_URL": "SKIP_GET_URL",
"uprn": "100061785321",
"usrn": "9701076",
"house_number": "9701076",
"url": "https://my.crawley.gov.uk/",
"wiki_name": "Crawley Borough Council",
"wiki_note": "Crawley needs to be passed both a UPRN and a USRN to work. Find these on [FindMyAddress](https://www.findmyaddress.co.uk/search) or [FindMyStreet](https://www.findmystreet.co.uk/map)."
Expand Down
43 changes: 31 additions & 12 deletions uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,42 @@


class CouncilClass(AbstractGetBinDataClass):

def get_data(self, address_url):
# Unused, we need the uprn!
return None

def parse_data(self, page: str, **kwargs) -> dict:
requests.packages.urllib3.disable_warnings()

user_uprn = kwargs.get("uprn")
check_uprn(user_uprn)

request_headers = {
"cookie": f"WhenAreMyBinsCollected={user_uprn}"
cookies = {
'cookie_control_popup': 'A',
'WhenAreMyBinsCollected': f'{user_uprn}',
}
requests.packages.urllib3.disable_warnings()
response = requests.get(
"https://www.basingstoke.gov.uk/bincollections",
headers=request_headers,
)

headers = {
'Accept': '*/*',
'Accept-Language': 'en-GB,en;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Origin': 'https://www.basingstoke.gov.uk',
'Pragma': 'no-cache',
'Referer': 'https://www.basingstoke.gov.uk/rte.aspx?id=1270',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.5845.188 Safari/537.36',
'X-MicrosoftAjax': 'Delta=true',
'X-Requested-With': 'XMLHttpRequest',
}

params = {
'id': '1270',
}

data = f'rteelem%24ctl03%24ctl00=rteelem%24ctl03%24ctl01%7Crteelem%24ctl03%24gapAddress%24ctl05&rteelem%24ctl03%24gapAddress%24lstStage2_SearchResults=UPRN%3A{user_uprn}&__EVENTTARGET=rteelem%24ctl03%24gapAddress%24ctl05&__EVENTARGUMENT=&__VIEWSTATE=%2FwEPDwUKLTQ2NzE5Mjc0NQ9kFgJmD2QWAgICD2QWAgIBD2QWAgIHD2QWAmYPZBYEAgEPDxYEHhNBc3NvY2lhdGVkQ29udHJvbElEBSJnYXBBZGRyZXNzOmxzdFN0YWdlMl9TZWFyY2hSZXN1bHRzHgRUZXh0BQ5TZWxlY3QgYWRkcmVzc2RkAgMPDxYEHiBHYXBFeHRlcm5hbFByb21wdExhYmVsVGV4dFN0YWdlMQUSU2VhcmNoIGZvciBhZGRyZXNzHiBHYXBFeHRlcm5hbFByb21wdExhYmVsVGV4dFN0YWdlMgUOU2VsZWN0IGFkZHJlc3NkFgJmD2QWBGYPDxYCHgdWaXNpYmxlaGQWAmYPDxYCHwEFCFJHMjIgNlRIZGQCAQ8PFgIfBGdkFgJmDxBkEBUMNDEzOCBTdCBQZXRlcnMgUm9hZCwgQmFzaW5nc3Rva2UsIEhhbXBzaGlyZSwgUkcyMiA2VEg0MTQwIFN0IFBldGVycyBSb2FkLCBCYXNpbmdzdG9rZSwgSGFtcHNoaXJlLCBSRzIyIDZUSDQxNDIgU3QgUGV0ZXJzIFJvYWQsIEJhc2luZ3N0b2tlLCBIYW1wc2hpcmUsIFJHMjIgNlRINDE0NCBTdCBQZXRlcnMgUm9hZCwgQmFzaW5nc3Rva2UsIEhhbXBzaGlyZSwgUkcyMiA2VEg0MTQ2IFN0IFBldGVycyBSb2FkLCBCYXNpbmdzdG9rZSwgSGFtcHNoaXJlLCBSRzIyIDZUSDQxNDggU3QgUGV0ZXJzIFJvYWQsIEJhc2luZ3N0b2tlLCBIYW1wc2hpcmUsIFJHMjIgNlRINDE1MCBTdCBQZXRlcnMgUm9hZCwgQmFzaW5nc3Rva2UsIEhhbXBzaGlyZSwgUkcyMiA2VEg0MTUyIFN0IFBldGVycyBSb2FkLCBCYXNpbmdzdG9rZSwgSGFtcHNoaXJlLCBSRzIyIDZUSDQxNTQgU3QgUGV0ZXJzIFJvYWQsIEJhc2luZ3N0b2tlLCBIYW1wc2hpcmUsIFJHMjIgNlRINDE1NiBTdCBQZXRlcnMgUm9hZCwgQmFzaW5nc3Rva2UsIEhhbXBzaGlyZSwgUkcyMiA2VEg0MTU4IFN0IFBldGVycyBSb2FkLCBCYXNpbmdzdG9rZSwgSGFtcHNoaXJlLCBSRzIyIDZUSDQxNjAgU3QgUGV0ZXJzIFJvYWQsIEJhc2luZ3N0b2tlLCBIYW1wc2hpcmUsIFJHMjIgNlRIFQwRVVBSTjoxMDAwNjAyNDM5MjcRVVBSTjoxMDAwNjAyNDM5MjkRVVBSTjoxMDAwNjAyNDM5MzERVVBSTjoxMDAwNjAyNDM5MzMRVVBSTjoxMDAwNjAyNDM5MzURVVBSTjoxMDAwNjAyNDM5MzYRVVBSTjoxMDAwNjAyNDM5MzcRVVBSTjoxMDAwNjAyNDM5MzgRVVBSTjoxMDAwNjAyNDM5MzkRVVBSTjoxMDAwNjAyNDM5NDARVVBSTjoxMDAwNjAyNDM5NDERVVBSTjoxMDAwNjAyNDM5NDIUKwMMZ2dnZ2dnZ2dnZ2dnZGRkpXCIF40J9nPqukmdVM4NgNZFZyw%3D&__VIEWSTATEGENERATOR=99691FF6&__EVENTVALIDATION=%2FwEdABCb2eofM0yrOZt2P3lnE8LBzdIwLRuYuP7lVS1GO2hXAAf%2FiyMIUYr%2BX38W%2FCsEufkYF%2FJqBocIUvPBZShq0SWLlDuEZpde9d1EPv1cdNAxtv0a5P%2BAzvWcKULA75C%2FHDNl8al%2FKtVDH8iZIW8%2BPWamtUNjyfZaTGu1VxFRW7%2BrIZHFk8PySEuoYzdlb%2Fw0NMLP8MZHHy%2BSyI7El1raMGfVGyh7Lv3Ohzid1s46Z3mtovjgyLnG9kXo%2FMyI4mgBTTdOYHrncJX8sN52g9M2NHMrNJrGEa%2BGwkZVSfqAxtisKhbq%2Bzxiu%2BV7mP9nRlRrnJ0yunAhZS1%2FkWU9mq7vbq4HclDPJK5tGeZ7jNpUx3wTgU%2Btyxc%3D&__ASYNCPOST=true&'

response = requests.post('https://www.basingstoke.gov.uk/rte.aspx', params=params, cookies=cookies,
headers=headers, data=data, verify=False)

if response.status_code != 200:
raise SystemError("Error retrieving data! Please try again or raise an issue on GitHub!")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,20 @@
from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import \
AbstractGetBinDataClass
import ssl
import urllib3

class CustomHttpAdapter (requests.adapters.HTTPAdapter):
'''Transport adapter" that allows us to use custom ssl_context.'''

def __init__(self, ssl_context=None, **kwargs):
self.ssl_context = ssl_context
super().__init__(**kwargs)

def init_poolmanager(self, connections, maxsize, block=False):
self.poolmanager = urllib3.poolmanager.PoolManager(
num_pools=connections, maxsize=maxsize,
block=block, ssl_context=self.ssl_context)

class CouncilClass(AbstractGetBinDataClass):
"""
Expand All @@ -18,25 +31,29 @@ def parse_data(self, page: str, **kwargs) -> dict:
check_uprn(user_uprn)

headers = {
"accept": "application/json, text/javascript, */*; q=0.01",
"accept-encoding": "gzip, deflate, br",
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
"connection": "keep-alive",
"content-type": "application/json",
"host": "www.bathnes.gov.uk",
"referer": "https://www.bathnes.gov.uk/webforms/waste/collectionday/",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"sec-gpc": "1",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
"x-requested-with": "XMLHttpRequest",
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Language': 'en-GB,en;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Content-Type': 'application/json; charset=utf-8',
'Pragma': 'no-cache',
'Referer': 'https://www.bathnes.gov.uk/webforms/waste/collectionday/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.5845.188 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
}

session = requests.Session()
ctx = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
ctx.options |= 0x4
session.mount('https://', CustomHttpAdapter(ctx))

requests.packages.urllib3.disable_warnings()
response = requests.get(
response = session.get(
f"https://www.bathnes.gov.uk/webapi/api/BinsAPI/v2/getbartecroute/{user_uprn}/true",
headers=headers,
headers=headers
)
if response.text == "":
raise ValueError("Error parsing data. Please check the provided UPRN. "
Expand Down
21 changes: 20 additions & 1 deletion uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,20 @@
from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import \
AbstractGetBinDataClass
import ssl
import urllib3

class CustomHttpAdapter (requests.adapters.HTTPAdapter):
'''Transport adapter" that allows us to use custom ssl_context.'''

def __init__(self, ssl_context=None, **kwargs):
self.ssl_context = ssl_context
super().__init__(**kwargs)

def init_poolmanager(self, connections, maxsize, block=False):
self.poolmanager = urllib3.poolmanager.PoolManager(
num_pools=connections, maxsize=maxsize,
block=block, ssl_context=self.ssl_context)


class CouncilClass(AbstractGetBinDataClass):
Expand Down Expand Up @@ -39,7 +53,12 @@ def parse_data(self, page: str, **kwargs) -> dict:
)
response_headers = parse_header(response_header_str)
requests.packages.urllib3.disable_warnings()
response = requests.get(url, headers=response_headers, verify=False)
session = requests.Session()
ctx = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
ctx.options |= 0x4
session.mount('https://', CustomHttpAdapter(ctx))

response = session.get(url, headers=response_headers)

# Return JSON from response and loop through collections
json_result = json.loads(response.content)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# This script pulls (in one hit) the data from Bromley Council Bins Data
import dateutil.parser
import datetime
from dateutil.relativedelta import relativedelta
from bs4 import BeautifulSoup
from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import \
Expand All @@ -20,27 +21,38 @@ def parse_data(self, page: str, **kwargs) -> dict:
soup.prettify()

bin_data_dict = {"bins": []}
collections = []


# Search for the specific bins in the table using BS4
bin_types = soup.find_all("h3", class_="govuk-heading-m waste-service-name")
collection_info = soup.find_all("dl", {"class": "govuk-summary-list"})

# Raise error if data is not loaded at time of scrape (30% chance it is)
if len(bin_types) == 0:
raise ConnectionError("Error fetching council data: data absent when page was scraped.")

# Parse the data
for idx, value in enumerate(collection_info):
bin_type = bin_types[idx].text.strip()
collection_date = value.contents[3].contents[3].text.strip()
next_collection = datetime.strptime(remove_ordinal_indicator_from_date_string(collection_date.replace(',', '')), "%A %d %B")
curr_date = datetime.now().date()
next_collection = next_collection.replace(year=curr_date.year)
if curr_date.month == 12 and next_collection.month == 1:
next_collection = next_collection + relativedelta(years=1)
collections.append((bin_type, next_collection))

# Sort the text and list elements by date
ordered_data = sorted(collections, key=lambda x: x[1])

# Put the elements into the dictionary
for item in ordered_data:
dict_data = {
"type": item[0],
"collectionDate": item[1].strftime(date_format),
}
bin_data_dict["bins"].append(dict_data)

# Search for the specific bin in the table using BS4
rows = soup.find("div", class_=("waste__collections")).find_all(
"h3",
class_=("waste-service-name",),
)

# Loops the Rows
for row in rows:
bin_type = row.get_text().strip()
collectionDate = row.find_all_next(
"dd", {"class": "govuk-summary-list__value"}
)
# Make each Bin element in the JSON, but only if we have a date available
if collectionDate:
date = dateutil.parser.parse(collectionDate[1].text.strip())
dict_data = {
"type": bin_type,
"collectionDate": date.strftime(date_format),
}
# Add data to the main JSON Wrapper
bin_data_dict["bins"].append(dict_data)

return bin_data_dict
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from bs4 import BeautifulSoup
from dateutil.relativedelta import relativedelta

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import \
AbstractGetBinDataClass
Expand All @@ -15,7 +17,7 @@ class CouncilClass(AbstractGetBinDataClass):
def parse_data(self, page: str, **kwargs) -> dict:
# Make a BS4 object
uprn = kwargs.get("uprn")
usrn = kwargs.get("usrn")
usrn = kwargs.get("paon")
check_uprn(uprn)
check_usrn(usrn)

Expand All @@ -41,11 +43,15 @@ def parse_data(self, page: str, **kwargs) -> dict:
bin_index = 0
for tag in collection_tag:
for item in tag.next_elements:
if str(item).startswith('<div class="date text-right text-grey">'):
collection_date = datetime.strptime(item.text, "%A %d %B").strftime(date_format)
if str(item).startswith('<div class="date text-right text-grey">') and str(item) != "":
collection_date = datetime.strptime(item.text, "%A %d %B")
next_collection = collection_date.replace(year=datetime.now().year)
if datetime.now().month == 12 and next_collection.month == 1:
next_collection = next_collection + relativedelta(years=1)

dict_data = {
"type": titles[bin_index].strip(),
"collectionDate": collection_date,
"collectionDate": next_collection.strftime(date_format),
}
data["bins"].append(dict_data)
bin_index += 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@ def parse_data(self, page: str, **kwargs) -> dict:
user_uprn = kwargs.get("uprn")
check_uprn(user_uprn)

soup = BeautifulSoup(page.text, features="html.parser")
soup.prettify()

data = {"bins": []}

headers = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def parse_data(self, page: str, **kwargs) -> dict:

# Make a request to the API
requests.packages.urllib3.disable_warnings()
response = requests.post(api_url, data=form_data)
response = requests.post(api_url, data=form_data, verify=False)

# Make a BS4 object
soup = BeautifulSoup(response.text, features="html.parser")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def parse_data(self, page: str, **kwargs) -> dict:

# Parse URL and read if connection successful
requests.packages.urllib3.disable_warnings()
response = requests.get(council_url)
response = requests.get(council_url, verify=False)
if response.status_code == 200:
soup = BeautifulSoup(response.text, features="html.parser")
soup.prettify()
Expand Down
13 changes: 13 additions & 0 deletions wiki/Councils.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ This document is still a work in progress, don't worry if your council isn't lis
- [Northumberland Council](#northumberland-council)
- [Preston City Council](#preston-city-council)
- [Reigate and Banstead Borough Council](#reigate-and-banstead-borough-council)
- [Rhondda Cynon Taff Council](#rhondda-cynon-taff-council)
- [Rochdale Council](#rochdale-council)
- [Rushcliffe Borough Council](#rushcliffe-borough-council)
- [Rushmoor Council](#rushmoor-council)
Expand Down Expand Up @@ -692,6 +693,18 @@ Note: To get the UPRN, you can use [FindMyAddress](https://www.findmyaddress.co.

---

### Rhondda Cynon Taff Council
```commandline
python collect_data.py RhonddaCynonTaffCouncil https://www.rctcbc.gov.uk/EN/Resident/RecyclingandWaste/RecyclingandWasteCollectionDays.aspx -s -u XXXXXXXX
```
Additional parameters:
- `-s` - skip get URL
- `-u` - UPRN

Note: To get the UPRN, you can use [FindMyAddress](https://www.findmyaddress.co.uk/search)

---

### Rochdale Council
```commandline
python collect_data.py RochdaleCouncil https://webforms.rochdale.gov.uk/BinCalendar -s -u XXXXXXXX -p "XXXX XXX"
Expand Down
Loading