Skip to content

Commit

Permalink
fix: #709 Update DoverDistrictCouncil.py
Browse files Browse the repository at this point in the history
  • Loading branch information
robbrad authored May 16, 2024
1 parent f8d3784 commit c4f81f5
Showing 1 changed file with 36 additions and 44 deletions.
Original file line number Diff line number Diff line change
@@ -1,49 +1,41 @@
from bs4 import BeautifulSoup
from uk_bin_collection.uk_bin_collection.common import *
from datetime import datetime
import re
from uk_bin_collection.uk_bin_collection.common import * # Consider specific imports
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass


# import the wonderful Beautiful Soup and the URL grabber
class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
base class. They can also override some operations with a default
implementation.
"""

def parse_data(self, page: str, **kwargs) -> dict:
# Make a BS4 object
soup = BeautifulSoup(page.text, features="html.parser")
soup.prettify()

data = {"bins": []}
collections = []

bins = soup.find("div", {"class": "results-table-wrapper"}).find_all(
"div", {"class": "service-wrapper"}
)
for bin in bins:
bin_type = (
bin.find("h3", {"class": "service-name"})
.get_text()
.replace("Collection", "bin")
.strip()
)
bin_date = datetime.strptime(
bin.find("td", {"class": "next-service"})
.find("span", {"class": "table-label"})
.next_sibling.get_text()
.strip(),
"%d/%m/%Y",
)
collections.append((bin_type, bin_date))

ordered_data = sorted(collections, key=lambda x: x[1])
for item in ordered_data:
dict_data = {
"type": item[0].capitalize(),
"collectionDate": item[1].strftime(date_format),
}
data["bins"].append(dict_data)

return data
soup = BeautifulSoup(page.text, 'html.parser')

bins_data = {"bins": []}
bin_collections = []

results_wrapper = soup.find("div", {"class": "results-table-wrapper"})
if not results_wrapper:
return bins_data # Return empty if the results wrapper is not found

bins = results_wrapper.find_all("div", {"class": "service-wrapper"})
for bin_item in bins:
service_name = bin_item.find("h3", {"class": "service-name"})
next_service = bin_item.find("td", {"class": "next-service"})

if service_name and next_service:
bin_type = service_name.get_text().replace("Collection", "bin").strip()
date_span = next_service.find("span", {"class": "table-label"})
date_text = date_span.next_sibling.get_text().strip() if date_span else None

if date_text and re.match(r"\d{2}/\d{2}/\d{4}", date_text):
try:
bin_date = datetime.strptime(date_text, "%d/%m/%Y")
bin_collections.append((bin_type, bin_date))
except ValueError:
continue

for bin_type, bin_date in sorted(bin_collections, key=lambda x: x[1]):
bins_data["bins"].append({
"type": bin_type.capitalize(),
"collectionDate": bin_date.strftime("%d/%m/%Y"),
})

return bins_data

0 comments on commit c4f81f5

Please sign in to comment.