From 167c15cae914d7ff04e2dbdf6793d5ed25618c55 Mon Sep 17 00:00:00 2001
From: Simon Drake <simondrake1990@gmail.com>
Date: Thu, 7 Dec 2023 13:13:16 +0000
Subject: [PATCH] feat: Add BefordshireCouncil scraper

---
 .../features/validate_council_outputs.feature |  1 +
 uk_bin_collection/tests/input.json            | 11 ++-
 .../councils/BedfordshireCouncil.py           | 67 +++++++++++++++++++
 3 files changed, 77 insertions(+), 2 deletions(-)
 create mode 100644 uk_bin_collection/uk_bin_collection/councils/BedfordshireCouncil.py

diff --git a/uk_bin_collection/tests/features/validate_council_outputs.feature b/uk_bin_collection/tests/features/validate_council_outputs.feature
index e49f801fe9..c32b0885a9 100644
--- a/uk_bin_collection/tests/features/validate_council_outputs.feature
+++ b/uk_bin_collection/tests/features/validate_council_outputs.feature
@@ -13,6 +13,7 @@ Feature: Test each council output matches expected results
             | BasingstokeCouncil | None | None |
             | BathAndNorthEastSomersetCouncil | None | None |
             | BCPCouncil | None | None |
+            | BedfordshireCouncil | None | None |
             | BexleyCouncil | None | None |
             | BlackburnCouncil | None | None |
             | BoltonCouncil | None | None |
diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json
index f86c5db254..37f26c3da4 100644
--- a/uk_bin_collection/tests/input.json
+++ b/uk_bin_collection/tests/input.json
@@ -32,6 +32,12 @@
         "url": "https://www.bathnes.gov.uk/webforms/waste/collectionday/",
         "wiki_name": "Bath and North East Somerset Council"
     },
+    "BedfordshireCouncil": {
+        "skip_get_url": true,
+        "url": "https://www.centralbedfordshire.gov.uk/info/163/bins_and_waste_collections_-_check_bin_collection_day",
+        "wiki_name": "Bedfordshire Council",
+        "wiki_note": "In order to use this parser, you must provide a valid postcode and a uprn retrieved from the councils website for your specific address"
+    },
     "BexleyCouncil": {
         "skip_get_url": true,
         "uprn": "spamstorage@live.co.uk",
@@ -393,7 +399,7 @@
         "wiki_command_url_override": "https://community.newcastle.gov.uk/my-neighbourhood/ajax/getBinsNew.php?uprn=XXXXXXXX",
         "wiki_name": "Newcastle City Council",
         "wiki_note": "Replace XXXXXXXX with UPRN."
-    },    
+    },
     "NorthEastDerbyshireDistrictCouncil": {
         "skip_get_url": true,
         "uprn": "010034492221",
@@ -750,4 +756,5 @@
         "url": "https://waste-api.york.gov.uk/api/Collections/GetBinCollectionDataForUprn/",
         "wiki_name": "York Council"
     }
-}
\ No newline at end of file
+}
+
diff --git a/uk_bin_collection/uk_bin_collection/councils/BedfordshireCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BedfordshireCouncil.py
new file mode 100644
index 0000000000..15aeb7ef56
--- /dev/null
+++ b/uk_bin_collection/uk_bin_collection/councils/BedfordshireCouncil.py
@@ -0,0 +1,67 @@
+from datetime import datetime
+
+import requests
+from bs4 import BeautifulSoup
+from uk_bin_collection.uk_bin_collection.common import *
+from uk_bin_collection.uk_bin_collection.get_bin_data import \
+    AbstractGetBinDataClass
+
+
+class CouncilClass(AbstractGetBinDataClass):
+    """
+    Concrete classes have to implement all abstract operations of the
+    base class. They can also override some operations with a default
+    implementation.
+    """
+
+    def parse_data(self, page: str, **kwargs) -> dict:
+        user_uprn = kwargs.get("uprn")
+        user_postcode = kwargs.get("postcode")
+
+        check_uprn(user_uprn)
+        check_postcode(user_postcode)
+
+        # Start a new session to walk through the form
+        requests.packages.urllib3.disable_warnings()
+        s = requests.session()
+
+        headers = {
+            'Origin': 'https://www.centralbedfordshire.gov.uk',
+            'Referer': 'https://www.centralbedfordshire.gov.uk/info/163/bins_and_waste_collections_-_check_bin_collection_day',
+        }
+
+        files = {
+            'postcode': (None, user_postcode),
+            'address': (None, user_uprn),
+        }
+
+        response = requests.post(
+            'https://www.centralbedfordshire.gov.uk/info/163/bins_and_waste_collections_-_check_bin_collection_day#my_bin_collections',
+            headers=headers,
+            files=files,
+        )
+
+        # Make that BS4 object and use it to prettify the response
+        soup = BeautifulSoup(response.content, features="html.parser")
+        soup.prettify()
+
+        collections_div = soup.find(id="collections")
+
+        # Get the collection items on the page and strip the bits of text that we don't care for
+        collections = []
+        for bin in collections_div.find_all("h3"):
+            bin_type = bin.find_next("br").next_sibling
+            collection_date = datetime.strptime(bin.text, "%A, %d %B %Y")
+            collections.append((bin_type, collection_date))
+
+        # Sort the collections by date order rather than bin type, then return as a dictionary (with str date)
+        ordered_data = sorted(collections, key=lambda x: x[1])
+        data = {"bins": []}
+        for item in ordered_data:
+            dict_data = {
+                "type": item[0],
+                "collectionDate": item[1].strftime(date_format),
+            }
+            data["bins"].append(dict_data)
+
+        return data