Skip to content

Commit

Permalink
Merge pull request #886 from m26dvd/master
Browse files Browse the repository at this point in the history
  • Loading branch information
robbrad authored Oct 15, 2024
2 parents 4e95ade + 50f3aab commit cdd2e97
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 57 deletions.
3 changes: 2 additions & 1 deletion uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -1128,7 +1128,8 @@
"SwaleBoroughCouncil": {
"postcode": "ME12 2NQ",
"skip_get_url": true,
"uprn": "100061081168",
"house_number": "81",
"web_driver": "http://selenium:4444",
"url": "https://swale.gov.uk/bins-littering-and-the-environment/bins/collection-days",
"wiki_name": "Swale Borough Council"
},
Expand Down
31 changes: 24 additions & 7 deletions uk_bin_collection/uk_bin_collection/councils/BarnetCouncil.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,30 @@ def parse_data(self, page: str, **kwargs) -> dict:

driver.get(page)

wait = WebDriverWait(driver, 10)
accept_cookies_button = wait.until(
EC.element_to_be_clickable(
(
By.XPATH,
"//button[contains(text(), 'Accept additional cookies')]",
)
)
)
accept_cookies_button.click()

# Wait for the element to be clickable
find_your_collection_button = WebDriverWait(driver, 10).until(
wait = WebDriverWait(driver, 10)
find_your_collection_button = wait.until(
EC.element_to_be_clickable(
(By.XPATH, '//a[contains(text(), "Find your household collection day")]')
(By.LINK_TEXT, "Find your household collection day")
)
)

# Scroll to the element (in case something is blocking it)
driver.execute_script(
"arguments[0].scrollIntoView();", find_your_collection_button
)

# Click the element
find_your_collection_button.click()

Expand All @@ -107,12 +124,12 @@ def parse_data(self, page: str, **kwargs) -> dict:

postcode_input.send_keys(user_postcode)

find_address_button = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(
(By.CSS_SELECTOR, '[value="Find address"]')
)
find_address_button = WebDriverWait(driver, 30).until(
EC.element_to_be_clickable((By.CSS_SELECTOR, '[value="Find address"]'))
)
find_address_button.click()
driver.execute_script("arguments[0].scrollIntoView();", find_address_button)
driver.execute_script("arguments[0].click();", find_address_button)
# find_address_button.click()

time.sleep(15)
# Wait for address box to be visible
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ def parse_data(self, page: str, **kwargs) -> dict:
)
)
search_btn.send_keys(Keys.ENTER)
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "collectionTabs"))
)

soup = BeautifulSoup(driver.page_source, features="html.parser")

# Find all tab panels within the collectionTabs
Expand Down
88 changes: 63 additions & 25 deletions uk_bin_collection/uk_bin_collection/councils/SwaleBoroughCouncil.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import requests
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass


# import the wonderful Beautiful Soup and the URL grabber


Expand All @@ -17,36 +19,72 @@ class CouncilClass(AbstractGetBinDataClass):
def parse_data(self, page: str, **kwargs) -> dict:
# Get postcode and UPRN from kwargs
user_postcode = kwargs.get("postcode")
user_uprn = kwargs.get("uprn")
user_paon = kwargs.get("paon")
web_driver = kwargs.get("web_driver")
headless = kwargs.get("headless")
check_postcode(user_postcode)
check_uprn(user_uprn)
check_paon(user_paon)

# Build URL to parse
council_url = f"https://swale.gov.uk/bins-littering-and-the-environment/bins/collection-days?postcode={user_postcode.replace(' ', '+')}&addresses={user_uprn}&address-submit="
council_url = "https://swale.gov.uk/bins-littering-and-the-environment/bins/my-collection-day"

# Create Selenium webdriver
driver = create_webdriver(web_driver, headless, None, __name__)
driver.get(council_url)

# Wait for the postcode field to appear then populate it
try:
inputElement_postcode = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "q462406_q1"))
)
inputElement_postcode.send_keys(user_postcode)
except Exception:
print("Page failed to load. Probably due to Cloudflare robot check!")

# Click search button
findAddress = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "form_email_462397_submit"))
)
driver.execute_script("arguments[0].click();", findAddress)

# Wait for the 'Select address' dropdown to appear and select option matching the house name/number
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable(
(
By.XPATH,
"//select[@id='SBCYBDAddressList']//option[contains(., '"
+ user_paon
+ "')]",
)
)
).click()

# Click search button
getBins = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "form_email_462397_submit"))
)
driver.execute_script("arguments[0].click();", getBins)

BinTable = WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.ID, "SBC-YBD-Main"))
)

# Parse URL and read if connection successful
requests.packages.urllib3.disable_warnings()
response = requests.get(council_url, verify=False)
if response.status_code == 200:
soup = BeautifulSoup(response.text, features="html.parser")
soup.prettify()
else:
raise ConnectionAbortedError("Could not parse council website.")
soup = BeautifulSoup(driver.page_source, features="html.parser")
soup.prettify()

data = {"bins": []}

# Get the collection bullet points on the page and parse them
form_area = soup.find("form", {"class": "integration bin-lookup"})
collections = [
item.text.strip().split(",") for item in form_area.find_all("li")
]
for c in collections:
bin_type = c[0].strip()
# temp_date = c[2].strip() + " " + str(datetime.now().year)
bin_date = datetime.strptime(
c[2].strip() + " " + str(datetime.now().year), "%d %B %Y"
).strftime(date_format)
dict_data = {"type": bin_type, "collectionDate": bin_date}
data["bins"].append(dict_data)
nextCollections = soup.find("div", {"id": "nextCollections"})
for c in nextCollections:
collection = c.find_all("strong")
for bin in collection:
split = (bin.text).split(" on ")
bin_type = split[0]
bin_date = datetime.strptime(split[1], "%A %d %b %Y").strftime(
"%d/%m/%Y"
)
dict_data = {"type": bin_type, "collectionDate": bin_date}
data["bins"].append(dict_data)

return data
Original file line number Diff line number Diff line change
Expand Up @@ -74,30 +74,51 @@ def parse_data(self, page: str, **kwargs) -> dict:
soup = BeautifulSoup(driver.page_source, features="html.parser")
soup.prettify()

rubbish_date = datetime.strptime(
" ".join(
soup.find("div", {"id": "FINDYOURBINDAYS_RUBBISHDATE_OUTERDIV"})
.get_text(strip=True)
.split()[6:8]
),
"%d %B",
).replace(year=datetime.now().year)
recycling_date = datetime.strptime(
" ".join(
soup.find("div", {"id": "FINDYOURBINDAYS_RECYCLINGDATE_OUTERDIV"})
.get_text(strip=True)
.split()[6:8]
),
"%d %B",
).replace(year=datetime.now().year)
food_date = datetime.strptime(
" ".join(
soup.find("div", {"id": "FINDYOURBINDAYS_FOODWASTEDATE_OUTERDIV"})
.get_text(strip=True)
.split()[8:10]
),
"%d %B",
).replace(year=datetime.now().year)
rubbish_div = soup.find(
"div", {"id": "FINDYOURBINDAYS_RUBBISHDATE_OUTERDIV"}
)
try:
rubbish_date = rubbish_div.find_all("div")[2]
rubbish_date = datetime.strptime(
rubbish_date.text,
"%A %d %B",
).replace(year=datetime.now().year)
except:
rubbish_date = rubbish_div.find_all("div")[3]
rubbish_date = datetime.strptime(
rubbish_date.text,
"%A %d %B",
).replace(year=datetime.now().year)
recycling_div = soup.find(
"div", {"id": "FINDYOURBINDAYS_RECYCLINGDATE_OUTERDIV"}
)
try:
recycling_date = recycling_div.find_all("div")[2]
recycling_date = datetime.strptime(
recycling_date.text,
"%A %d %B",
).replace(year=datetime.now().year)
except:
rubbish_date = recycling_div.find_all("div")[3]
rubbish_date = datetime.strptime(
rubbish_date.text,
"%A %d %B",
).replace(year=datetime.now().year)
food_div = soup.find(
"div", {"id": "FINDYOURBINDAYS_RECYCLINGDATE_OUTERDIV"}
)
try:
food_date = food_div.find_all("div")[2]
food_date = datetime.strptime(
food_date.text,
"%A %d %B",
).replace(year=datetime.now().year)
except:
food_date = food_div.find_all("div")[3]
food_date = datetime.strptime(
food_date.text,
"%A %d %B",
).replace(year=datetime.now().year)

if datetime.now().month == 12 and rubbish_date.month == 1:
rubbish_date = rubbish_date + relativedelta(years=1)
Expand Down

0 comments on commit cdd2e97

Please sign in to comment.