Skip to content

Commit

Permalink
Merge pull request #909 from rustyd0g/MidlothianCouncil
Browse files Browse the repository at this point in the history
  • Loading branch information
robbrad authored Oct 20, 2024
2 parents 27914d7 + e427b6a commit 038c72f
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 0 deletions.
6 changes: 6 additions & 0 deletions uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,12 @@
"wiki_name": "Mid and East Antrim Borough Council",
"wiki_note": "Pass the house name/number plus the name of the street with the postcode parameter, wrapped in double quotes. Check the address in the web site first. This version will only pick the first SHOW button returned by the search or if it is fully unique. The search is not very predictable (e.g. house number 4 returns 14,24,4,44 etc.)."
},
"MidlothianCouncil": {
"url": "https://www.midlothian.gov.uk/directory_record/92551426/glenesk_bonnyrigg_eh19_3je",
"wiki_command_url_override": "https://www.midlothian.gov.uk/directory_record/XXXXXX/XXXXXX",
"wiki_name": "Midlothian Council",
"wiki_note": "Follow the instructions [here](https://www.midlothian.gov.uk/info/1054/bins_and_recycling/343/bin_collection_days) until you get the page that shows the weekly collections for your address then copy the URL and replace the URL in the command."
},
"MidSussexDistrictCouncil": {
"house_number": "OAKLANDS, OAKLANDS ROAD RH16 1SS",
"postcode": "RH16 1SS",
Expand Down
68 changes: 68 additions & 0 deletions uk_bin_collection/uk_bin_collection/councils/MidlothianCouncil.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from bs4 import BeautifulSoup

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass


# import the wonderful Beautiful Soup and the URL grabber
class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
base class. They can also override some operations with a default
implementation.
"""

def parse_data(self, page: str, **kwargs) -> dict:
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(page.text, features="html.parser")

# Initialize a dictionary to store the parsed bin data
data = {"bins": []}

# Define a mapping of bin collection labels to their corresponding types
bin_types = {
"Next recycling collection": "Recycling",
"Next grey bin collection": "Grey Bin",
"Next brown bin collection": "Brown Bin",
"Next food bin collection": "Food Bin",
}

# Locate the <ul> element with the class "data-table"
bin_collections = soup.find("ul", {"class": "data-table"})

# Proceed only if the <ul> element is found
if bin_collections:
# Retrieve all <li> elements within the <ul>, skipping the first two (not relevant)
bin_items = bin_collections.find_all("li")[2:]

# Iterate through each bin item
for bin in bin_items:
bin_type = None
# Retrieve the bin type from the header if it exists
if bin.h2 and bin.h2.text.strip() in bin_types:
bin_type = bin_types[bin.h2.text.strip()]

bin_collection_date = None
# Retrieve the bin collection date from the div if it exists
if bin.div and bin.div.text.strip():
try:
# Parse the collection date from the div text and format it
bin_collection_date = datetime.strptime(
bin.div.text.strip(),
"%A %d/%m/%Y",
).strftime(date_format)
except ValueError:
# If date parsing fails, keep bin_collection_date as None
pass

# If both bin type and collection date are identified, add to the data
if bin_type and bin_collection_date:
data["bins"].append(
{
"type": bin_type,
"collectionDate": bin_collection_date,
}
)

# Return the parsed data, which may be empty if no bins were found
return data

0 comments on commit 038c72f

Please sign in to comment.