robbrad · OliverCullimore · Oct 15, 2023 · Oct 11, 2023 · Oct 13, 2023 · Oct 13, 2023
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -54,6 +54,7 @@ python = ">=3.10"
 requests = "*"
 selenium = "*"
 lxml = "*"
+urllib3 = "^2.0.6"
 
 [tool.commitizen]
 major_version_zero = true

diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json
@@ -105,7 +105,7 @@
   "CrawleyBoroughCouncil": {
     "SKIP_GET_URL": "SKIP_GET_URL",
     "uprn": "100061785321",
-    "usrn": "9701076",
+    "house_number": "9701076",
     "url": "https://my.crawley.gov.uk/",
     "wiki_name": "Crawley Borough Council",
     "wiki_note": "Crawley needs to be passed both a UPRN and a USRN to work. Find these on [FindMyAddress](https://www.findmyaddress.co.uk/search) or [FindMyStreet](https://www.findmystreet.co.uk/map)."

diff --git a/uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py
@@ -15,23 +15,42 @@
 
 
 class CouncilClass(AbstractGetBinDataClass):
-
-    def get_data(self, address_url):
-        # Unused, we need the uprn!
-        return None
-
     def parse_data(self, page: str, **kwargs) -> dict:
+        requests.packages.urllib3.disable_warnings()
+
         user_uprn = kwargs.get("uprn")
         check_uprn(user_uprn)
 
-        request_headers = {
-            "cookie": f"WhenAreMyBinsCollected={user_uprn}"
+        cookies = {
+            'cookie_control_popup': 'A',
+            'WhenAreMyBinsCollected': f'{user_uprn}',
         }
-        requests.packages.urllib3.disable_warnings()
-        response = requests.get(
-            "https://www.basingstoke.gov.uk/bincollections",
-            headers=request_headers,
-        )
+
+        headers = {
+            'Accept': '*/*',
+            'Accept-Language': 'en-GB,en;q=0.9',
+            'Cache-Control': 'no-cache',
+            'Connection': 'keep-alive',
+            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+            'Origin': 'https://www.basingstoke.gov.uk',
+            'Pragma': 'no-cache',
+            'Referer': 'https://www.basingstoke.gov.uk/rte.aspx?id=1270',
+            'Sec-Fetch-Dest': 'empty',
+            'Sec-Fetch-Mode': 'cors',
+            'Sec-Fetch-Site': 'same-origin',
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.5845.188 Safari/537.36',
+            'X-MicrosoftAjax': 'Delta=true',
+            'X-Requested-With': 'XMLHttpRequest',
+        }
+
+        params = {
+            'id': '1270',
+        }
+
+        data = f'rteelem%24ctl03%24ctl00=rteelem%24ctl03%24ctl01%7Crteelem%24ctl03%24gapAddress%24ctl05&rteelem%24ctl03%24gapAddress%24lstStage2_SearchResults=UPRN%3A{user_uprn}&__EVENTTARGET=rteelem%24ctl03%24gapAddress%24ctl05&__EVENTARGUMENT=&__VIEWSTATE=%2FwEPDwUKLTQ2NzE5Mjc0NQ9kFgJmD2QWAgICD2QWAgIBD2QWAgIHD2QWAmYPZBYEAgEPDxYEHhNBc3NvY2lhdGVkQ29udHJvbElEBSJnYXBBZGRyZXNzOmxzdFN0YWdlMl9TZWFyY2hSZXN1bHRzHgRUZXh0BQ5TZWxlY3QgYWRkcmVzc2RkAgMPDxYEHiBHYXBFeHRlcm5hbFByb21wdExhYmVsVGV4dFN0YWdlMQUSU2VhcmNoIGZvciBhZGRyZXNzHiBHYXBFeHRlcm5hbFByb21wdExhYmVsVGV4dFN0YWdlMgUOU2VsZWN0IGFkZHJlc3NkFgJmD2QWBGYPDxYCHgdWaXNpYmxlaGQWAmYPDxYCHwEFCFJHMjIgNlRIZGQCAQ8PFgIfBGdkFgJmDxBkEBUMNDEzOCBTdCBQZXRlcnMgUm9hZCwgQmFzaW5nc3Rva2UsIEhhbXBzaGlyZSwgUkcyMiA2VEg0MTQwIFN0IFBldGVycyBSb2FkLCBCYXNpbmdzdG9rZSwgSGFtcHNoaXJlLCBSRzIyIDZUSDQxNDIgU3QgUGV0ZXJzIFJvYWQsIEJhc2luZ3N0b2tlLCBIYW1wc2hpcmUsIFJHMjIgNlRINDE0NCBTdCBQZXRlcnMgUm9hZCwgQmFzaW5nc3Rva2UsIEhhbXBzaGlyZSwgUkcyMiA2VEg0MTQ2IFN0IFBldGVycyBSb2FkLCBCYXNpbmdzdG9rZSwgSGFtcHNoaXJlLCBSRzIyIDZUSDQxNDggU3QgUGV0ZXJzIFJvYWQsIEJhc2luZ3N0b2tlLCBIYW1wc2hpcmUsIFJHMjIgNlRINDE1MCBTdCBQZXRlcnMgUm9hZCwgQmFzaW5nc3Rva2UsIEhhbXBzaGlyZSwgUkcyMiA2VEg0MTUyIFN0IFBldGVycyBSb2FkLCBCYXNpbmdzdG9rZSwgSGFtcHNoaXJlLCBSRzIyIDZUSDQxNTQgU3QgUGV0ZXJzIFJvYWQsIEJhc2luZ3N0b2tlLCBIYW1wc2hpcmUsIFJHMjIgNlRINDE1NiBTdCBQZXRlcnMgUm9hZCwgQmFzaW5nc3Rva2UsIEhhbXBzaGlyZSwgUkcyMiA2VEg0MTU4IFN0IFBldGVycyBSb2FkLCBCYXNpbmdzdG9rZSwgSGFtcHNoaXJlLCBSRzIyIDZUSDQxNjAgU3QgUGV0ZXJzIFJvYWQsIEJhc2luZ3N0b2tlLCBIYW1wc2hpcmUsIFJHMjIgNlRIFQwRVVBSTjoxMDAwNjAyNDM5MjcRVVBSTjoxMDAwNjAyNDM5MjkRVVBSTjoxMDAwNjAyNDM5MzERVVBSTjoxMDAwNjAyNDM5MzMRVVBSTjoxMDAwNjAyNDM5MzURVVBSTjoxMDAwNjAyNDM5MzYRVVBSTjoxMDAwNjAyNDM5MzcRVVBSTjoxMDAwNjAyNDM5MzgRVVBSTjoxMDAwNjAyNDM5MzkRVVBSTjoxMDAwNjAyNDM5NDARVVBSTjoxMDAwNjAyNDM5NDERVVBSTjoxMDAwNjAyNDM5NDIUKwMMZ2dnZ2dnZ2dnZ2dnZGRkpXCIF40J9nPqukmdVM4NgNZFZyw%3D&__VIEWSTATEGENERATOR=99691FF6&__EVENTVALIDATION=%2FwEdABCb2eofM0yrOZt2P3lnE8LBzdIwLRuYuP7lVS1GO2hXAAf%2FiyMIUYr%2BX38W%2FCsEufkYF%2FJqBocIUvPBZShq0SWLlDuEZpde9d1EPv1cdNAxtv0a5P%2BAzvWcKULA75C%2FHDNl8al%2FKtVDH8iZIW8%2BPWamtUNjyfZaTGu1VxFRW7%2BrIZHFk8PySEuoYzdlb%2Fw0NMLP8MZHHy%2BSyI7El1raMGfVGyh7Lv3Ohzid1s46Z3mtovjgyLnG9kXo%2FMyI4mgBTTdOYHrncJX8sN52g9M2NHMrNJrGEa%2BGwkZVSfqAxtisKhbq%2Bzxiu%2BV7mP9nRlRrnJ0yunAhZS1%2FkWU9mq7vbq4HclDPJK5tGeZ7jNpUx3wTgU%2Btyxc%3D&__ASYNCPOST=true&'
+
+        response = requests.post('https://www.basingstoke.gov.uk/rte.aspx', params=params, cookies=cookies,
+                                 headers=headers, data=data, verify=False)
 
         if response.status_code != 200:
             raise SystemError("Error retrieving data! Please try again or raise an issue on GitHub!")

diff --git a/uk_bin_collection/uk_bin_collection/councils/BathAndNorthEastSomersetCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BathAndNorthEastSomersetCouncil.py
@@ -4,7 +4,20 @@
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import \
     AbstractGetBinDataClass
+import ssl
+import urllib3
 
+class CustomHttpAdapter (requests.adapters.HTTPAdapter):
+    '''Transport adapter" that allows us to use custom ssl_context.'''
+
+    def __init__(self, ssl_context=None, **kwargs):
+        self.ssl_context = ssl_context
+        super().__init__(**kwargs)
+
+    def init_poolmanager(self, connections, maxsize, block=False):
+        self.poolmanager = urllib3.poolmanager.PoolManager(
+            num_pools=connections, maxsize=maxsize,
+            block=block, ssl_context=self.ssl_context)
 
 class CouncilClass(AbstractGetBinDataClass):
     """
@@ -18,25 +31,29 @@ def parse_data(self, page: str, **kwargs) -> dict:
         check_uprn(user_uprn)
 
         headers = {
-            "accept": "application/json, text/javascript, */*; q=0.01",
-            "accept-encoding": "gzip, deflate, br",
-            "accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
-            "connection": "keep-alive",
-            "content-type": "application/json",
-            "host": "www.bathnes.gov.uk",
-            "referer": "https://www.bathnes.gov.uk/webforms/waste/collectionday/",
-            "sec-fetch-dest": "empty",
-            "sec-fetch-mode": "cors",
-            "sec-fetch-site": "same-origin",
-            "sec-gpc": "1",
-            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
-            "x-requested-with": "XMLHttpRequest",
+            'Accept': 'application/json, text/javascript, */*; q=0.01',
+            'Accept-Language': 'en-GB,en;q=0.9',
+            'Cache-Control': 'no-cache',
+            'Connection': 'keep-alive',
+            'Content-Type': 'application/json; charset=utf-8',
+            'Pragma': 'no-cache',
+            'Referer': 'https://www.bathnes.gov.uk/webforms/waste/collectionday/',
+            'Sec-Fetch-Dest': 'empty',
+            'Sec-Fetch-Mode': 'cors',
+            'Sec-Fetch-Site': 'same-origin',
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.5845.188 Safari/537.36',
+            'X-Requested-With': 'XMLHttpRequest',
         }
 
+        session = requests.Session()
+        ctx = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
+        ctx.options |= 0x4
+        session.mount('https://', CustomHttpAdapter(ctx))
+
         requests.packages.urllib3.disable_warnings()
-        response = requests.get(
+        response = session.get(
             f"https://www.bathnes.gov.uk/webapi/api/BinsAPI/v2/getbartecroute/{user_uprn}/true",
-            headers=headers,
+            headers=headers
         )
         if response.text == "":
             raise ValueError("Error parsing data. Please check the provided UPRN. "

diff --git a/uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BlackburnCouncil.py
@@ -6,6 +6,20 @@
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import \
     AbstractGetBinDataClass
+import ssl
+import urllib3
+
+class CustomHttpAdapter (requests.adapters.HTTPAdapter):
+    '''Transport adapter" that allows us to use custom ssl_context.'''
+
+    def __init__(self, ssl_context=None, **kwargs):
+        self.ssl_context = ssl_context
+        super().__init__(**kwargs)
+
+    def init_poolmanager(self, connections, maxsize, block=False):
+        self.poolmanager = urllib3.poolmanager.PoolManager(
+            num_pools=connections, maxsize=maxsize,
+            block=block, ssl_context=self.ssl_context)
 
 
 class CouncilClass(AbstractGetBinDataClass):
@@ -39,7 +53,12 @@ def parse_data(self, page: str, **kwargs) -> dict:
         )
         response_headers = parse_header(response_header_str)
         requests.packages.urllib3.disable_warnings()
-        response = requests.get(url, headers=response_headers, verify=False)
+        session = requests.Session()
+        ctx = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
+        ctx.options |= 0x4
+        session.mount('https://', CustomHttpAdapter(ctx))
+
+        response = session.get(url, headers=response_headers)
 
         # Return JSON from response and loop through collections
         json_result = json.loads(response.content)

diff --git a/uk_bin_collection/uk_bin_collection/councils/BromleyBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BromleyBoroughCouncil.py
@@ -1,5 +1,6 @@
 # This script pulls (in one hit) the data from Bromley Council Bins Data
-import dateutil.parser
+import datetime
+from dateutil.relativedelta import relativedelta
 from bs4 import BeautifulSoup
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import \
@@ -20,27 +21,38 @@ def parse_data(self, page: str, **kwargs) -> dict:
         soup.prettify()
 
         bin_data_dict = {"bins": []}
+        collections = []
+
+
+        # Search for the specific bins in the table using BS4
+        bin_types = soup.find_all("h3", class_="govuk-heading-m waste-service-name")
+        collection_info = soup.find_all("dl", {"class": "govuk-summary-list"})
+
+        # Raise error if data is not loaded at time of scrape (30% chance it is)
+        if len(bin_types) == 0:
+            raise ConnectionError("Error fetching council data: data absent when page was scraped.")
+
+        # Parse the data
+        for idx, value in enumerate(collection_info):
+            bin_type = bin_types[idx].text.strip()
+            collection_date = value.contents[3].contents[3].text.strip()
+            next_collection = datetime.strptime(remove_ordinal_indicator_from_date_string(collection_date.replace(',', '')), "%A %d %B")
+            curr_date = datetime.now().date()
+            next_collection = next_collection.replace(year=curr_date.year)
+            if curr_date.month == 12 and next_collection.month == 1:
+                next_collection = next_collection + relativedelta(years=1)
+            collections.append((bin_type, next_collection))
+
+        # Sort the text and list elements by date
+        ordered_data = sorted(collections, key=lambda x: x[1])
+
+        # Put the elements into the dictionary
+        for item in ordered_data:
+            dict_data = {
+                "type": item[0],
+                "collectionDate": item[1].strftime(date_format),
+            }
+            bin_data_dict["bins"].append(dict_data)
 
-        # Search for the specific bin in the table using BS4
-        rows = soup.find("div", class_=("waste__collections")).find_all(
-            "h3",
-            class_=("waste-service-name",),
-        )
-
-        # Loops the Rows
-        for row in rows:
-            bin_type = row.get_text().strip()
-            collectionDate = row.find_all_next(
-                "dd", {"class": "govuk-summary-list__value"}
-            )
-            # Make each Bin element in the JSON, but only if we have a date available
-            if collectionDate:
-                date = dateutil.parser.parse(collectionDate[1].text.strip())
-                dict_data = {
-                    "type": bin_type,
-                    "collectionDate": date.strftime(date_format),
-                }
-                # Add data to the main JSON Wrapper
-                bin_data_dict["bins"].append(dict_data)
 
         return bin_data_dict
diff --git a/uk_bin_collection/uk_bin_collection/councils/CrawleyBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/CrawleyBoroughCouncil.py
@@ -1,4 +1,6 @@
 from bs4 import BeautifulSoup
+from dateutil.relativedelta import relativedelta
+
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import \
     AbstractGetBinDataClass
@@ -15,7 +17,7 @@ class CouncilClass(AbstractGetBinDataClass):
     def parse_data(self, page: str, **kwargs) -> dict:
         # Make a BS4 object
         uprn = kwargs.get("uprn")
-        usrn = kwargs.get("usrn")
+        usrn = kwargs.get("paon")
         check_uprn(uprn)
         check_usrn(usrn)
 
@@ -41,11 +43,15 @@ def parse_data(self, page: str, **kwargs) -> dict:
         bin_index = 0
         for tag in collection_tag:
             for item in tag.next_elements:
-                if str(item).startswith('<div class="date text-right text-grey">'):
-                    collection_date = datetime.strptime(item.text, "%A %d %B").strftime(date_format)
+                if str(item).startswith('<div class="date text-right text-grey">') and str(item) != "":
+                    collection_date = datetime.strptime(item.text, "%A %d %B")
+                    next_collection = collection_date.replace(year=datetime.now().year)
+                    if datetime.now().month == 12 and next_collection.month == 1:
+                        next_collection = next_collection + relativedelta(years=1)
+
                     dict_data = {
                         "type": titles[bin_index].strip(),
-                        "collectionDate": collection_date,
+                        "collectionDate": next_collection.strftime(date_format),
                     }
                     data["bins"].append(dict_data)
                     bin_index += 1

diff --git a/uk_bin_collection/uk_bin_collection/councils/HarrogateBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/HarrogateBoroughCouncil.py
@@ -17,9 +17,6 @@ def parse_data(self, page: str, **kwargs) -> dict:
         user_uprn = kwargs.get("uprn")
         check_uprn(user_uprn)
 
-        soup = BeautifulSoup(page.text, features="html.parser")
-        soup.prettify()
-
         data = {"bins": []}
 
         headers = {

diff --git a/uk_bin_collection/uk_bin_collection/councils/MalvernHillsDC.py b/uk_bin_collection/uk_bin_collection/councils/MalvernHillsDC.py
@@ -26,7 +26,7 @@ def parse_data(self, page: str, **kwargs) -> dict:
 
         # Make a request to the API
         requests.packages.urllib3.disable_warnings()
-        response = requests.post(api_url, data=form_data)
+        response = requests.post(api_url, data=form_data, verify=False)
 
         # Make a BS4 object
         soup = BeautifulSoup(response.text, features="html.parser")

diff --git a/uk_bin_collection/uk_bin_collection/councils/SwaleBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/SwaleBoroughCouncil.py
@@ -27,7 +27,7 @@ def parse_data(self, page: str, **kwargs) -> dict:
 
         # Parse URL and read if connection successful
         requests.packages.urllib3.disable_warnings()
-        response = requests.get(council_url)
+        response = requests.get(council_url, verify=False)
         if response.status_code == 200:
             soup = BeautifulSoup(response.text, features="html.parser")
             soup.prettify()

diff --git a/wiki/Councils.md b/wiki/Councils.md
@@ -66,6 +66,7 @@ This document is still a work in progress, don't worry if your council isn't lis
 - [Northumberland Council](#northumberland-council)
 - [Preston City Council](#preston-city-council)
 - [Reigate and Banstead Borough Council](#reigate-and-banstead-borough-council)
+- [Rhondda Cynon Taff Council](#rhondda-cynon-taff-council)
 - [Rochdale Council](#rochdale-council)
 - [Rushcliffe Borough Council](#rushcliffe-borough-council)
 - [Rushmoor Council](#rushmoor-council)
@@ -692,6 +693,18 @@ Note: To get the UPRN, you can use [FindMyAddress](https://www.findmyaddress.co.
 
 ---
 
+### Rhondda Cynon Taff Council
+```commandline
+python collect_data.py RhonddaCynonTaffCouncil https://www.rctcbc.gov.uk/EN/Resident/RecyclingandWaste/RecyclingandWasteCollectionDays.aspx -s -u XXXXXXXX
+```
+Additional parameters:
+- `-s` - skip get URL
+- `-u` - UPRN
+
+Note: To get the UPRN, you can use [FindMyAddress](https://www.findmyaddress.co.uk/search)
+
+---
+
 ### Rochdale Council
 ```commandline
 python collect_data.py RochdaleCouncil https://webforms.rochdale.gov.uk/BinCalendar -s -u XXXXXXXX -p "XXXX XXX"