Skip to content

Commit

Permalink
fix: Swale Borough Council
Browse files Browse the repository at this point in the history
  • Loading branch information
m26dvd committed Dec 12, 2024
1 parent 1eab20c commit 6f580b3
Showing 1 changed file with 42 additions and 18 deletions.
60 changes: 42 additions & 18 deletions uk_bin_collection/uk_bin_collection/councils/SwaleBoroughCouncil.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def parse_data(self, page: str, **kwargs) -> dict:
check_paon(user_paon)

# Build URL to parse
council_url = "https://swale.gov.uk/bins-littering-and-the-environment/bins/my-collection-day"
council_url = "https://swale.gov.uk/bins-littering-and-the-environment/bins/check-your-bin-day"

# Create Selenium webdriver
driver = create_webdriver(web_driver, headless, None, __name__)
Expand All @@ -35,15 +35,15 @@ def parse_data(self, page: str, **kwargs) -> dict:
# Wait for the postcode field to appear then populate it
try:
inputElement_postcode = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "q462406_q1"))
EC.presence_of_element_located((By.ID, "q485476_q1"))
)
inputElement_postcode.send_keys(user_postcode)
except Exception:
print("Page failed to load. Probably due to Cloudflare robot check!")

# Click search button
findAddress = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "form_email_462397_submit"))
EC.presence_of_element_located((By.ID, "form_email_485465_submit"))
)
driver.execute_script("arguments[0].click();", findAddress)

Expand All @@ -52,7 +52,7 @@ def parse_data(self, page: str, **kwargs) -> dict:
EC.element_to_be_clickable(
(
By.XPATH,
"//select[@id='SBCYBDAddressList']//option[contains(., '"
"//select[@name='q485480:q1']//option[contains(., '"
+ user_paon
+ "')]",
)
Expand All @@ -61,30 +61,54 @@ def parse_data(self, page: str, **kwargs) -> dict:

# Click search button
getBins = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "form_email_462397_submit"))
EC.presence_of_element_located((By.ID, "form_email_485465_submit"))
)
driver.execute_script("arguments[0].click();", getBins)

BinTable = WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.ID, "SBC-YBD-Main"))
EC.presence_of_element_located((By.ID, "SBCYBDSummary"))
)

soup = BeautifulSoup(driver.page_source, features="html.parser")
soup.prettify()

data = {"bins": []}

# Get the collection bullet points on the page and parse them
nextCollections = soup.find("div", {"id": "nextCollections"})
for c in nextCollections:
collection = c.find_all("strong")
for bin in collection:
split = (bin.text).split(" on ")
bin_type = split[0]
bin_date = datetime.strptime(split[1], "%A %d %b %Y").strftime(
"%d/%m/%Y"
)
dict_data = {"type": bin_type, "collectionDate": bin_date}
data["bins"].append(dict_data)
next_collection_date = soup.find(
"strong", id="SBC-YBD-collectionDate"
).text.strip()

# Extract bins for the next collection
next_bins = [li.text.strip() for li in soup.select("#SBCFirstBins ul li")]

# Extract future collection details
future_collection_date_tag = soup.find(
"p", text=lambda t: t and "starting from" in t
)
future_collection_date = (
future_collection_date_tag.text.split("starting from")[-1].strip()
if future_collection_date_tag
else "No future date found"
)

future_bins = [li.text.strip() for li in soup.select("#FirstFutureBins li")]

for bin in next_bins:
dict_data = {
"type": bin,
"collectionDate": datetime.strptime(
next_collection_date, "%A, %d %B"
).strftime(date_format),
}
data["bins"].append(dict_data)

for bin in future_bins:
dict_data = {
"type": bin,
"collectionDate": datetime.strptime(
future_collection_date, "%A, %d %B"
).strftime(date_format),
}
data["bins"].append(dict_data)

return data

0 comments on commit 6f580b3

Please sign in to comment.