Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Council Pack 15 #992

Merged
merged 7 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 48 additions & 21 deletions uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@
"wiki_name": "Ards and North Down Council",
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
},
"ArgyllandButeCouncil": {
"uprn": "125061759",
"skip_get_url": true,
"url": "https://www.argyll-bute.gov.uk",
"wiki_name": "Argyll and Bute Council",
"wiki_note": "Pass the UPRN. You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search)."
},
"ArmaghBanbridgeCraigavonCouncil": {
"url": "https://www.armaghbanbridgecraigavon.gov.uk/",
"wiki_command_url_override": "https://www.armaghbanbridgecraigavon.gov.uk/",
Expand All @@ -48,13 +55,13 @@
"wiki_name": "Arun Council",
"wiki_note": "Pass the house name/number and postcode in their respective parameters, both wrapped in double quotes. This parser requires a Selenium webdriver."
},
"AshfordBoroughCouncil": {
"url": "https://ashford.gov.uk",
"wiki_command_url_override": "https://ashford.gov.uk",
"postcode": "TN23 7SP",
"uprn": "100060777899",
"wiki_name": "Ashford Borough Council",
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
"AshfieldDistrictCouncil": {
"url": "https://www.ashfield.gov.uk",
"postcode": "NG16 6RH",
"house_number": "1",
"web_driver": "http://selenium:4444",
"wiki_name": "Ashfield District Council",
"wiki_note": "Pass the house name/number and postcode in their respective parameters, both wrapped in double quotes. This parser requires a Selenium webdriver"
},
"AshfordBoroughCouncil": {
"url": "https://ashford.gov.uk",
Expand All @@ -71,6 +78,13 @@
"wiki_name": "Aylesbury Vale Council (Buckinghamshire)",
"wiki_note": "To get the UPRN, please use [FindMyAddress](https://www.findmyaddress.co.uk/search). Returns all published collections in the past, present, future."
},
"BaberghDistrictCouncil": {
"skip_get_url": true,
"house_number": "Monday",
"url": "https://www.babergh.gov.uk",
"wiki_name": "Babergh District Council",
"wiki_note": "Use the House Number field to pass the DAY of the week for your collections. Monday/Tuesday/Wednesday/Thursday/Friday"
},
"BCPCouncil": {
"skip_get_url": true,
"uprn": "100040810214",
Expand Down Expand Up @@ -335,26 +349,19 @@
"wiki_note": "Both the UPRN and a one-line address are passed in the URL, which needs to be wrapped in double quotes. The one-line address is made up of the house number, street name, and postcode. Use the form [here](https://online.cheshireeast.gov.uk/mycollectionday/) to find them, then take the first line and postcode and replace all spaces with `%20`."
},
"CheshireWestAndChesterCouncil": {
"house_number": "Hill View House",
"postcode": "CH3 9ER",
"uprn": "100012346655",
"skip_get_url": true,
"url": "https://www.cheshirewestandchester.gov.uk/residents/waste-and-recycling/your-bin-collection/collection-day",
"url": "https://my.cheshirewestandchester.gov.uk",
"wiki_name": "Cheshire West and Chester Council",
"wiki_note": "Pass the house name/number and postcode in their respective parameters."
"wiki_note": "Pass the UPRN. You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search)."
},
"ChesterfieldBoroughCouncil": {
"uprn": "74008234",
"skip_get_url": true,
"url": "https://www.cheshirewestandchester.gov.uk/residents/waste-and-recycling/your-bin-collection/collection-day",
"url": "https://www.chesterfield.gov.uk",
"wiki_name": "Chesterfield Borough Council",
"wiki_note": "Pass the UPRN. You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search)."
},
"ChesterfieldBoroughCouncil": {
"uprn": "74008234",
"skip_get_url": true,
"url": "https://www.cheshirewestandchester.gov.uk/residents/waste-and-recycling/your-bin-collection/collection-day",
"wiki_name": "Chesterfield Borough Council"
},
"ChichesterDistrictCouncil": {
"house_number": "7, Plaistow Road, Kirdford, Billingshurst, West Sussex",
"postcode": "RH14 0JT",
Expand Down Expand Up @@ -449,6 +456,12 @@
"wiki_name": "Dartford Borough Council",
"wiki_note": "Use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find your UPRN."
},
"DerbyCityCouncil": {
"url": "https://www.derby.gov.uk",
"uprn": "10010684240",
"wiki_name": "Derby City Council",
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
},
"DerbyshireDalesDistrictCouncil": {
"postcode": "DE4 3AS",
"skip_get_url": true,
Expand Down Expand Up @@ -702,6 +715,13 @@
"wiki_name": "Gloucester City Council",
"wiki_note": "Pass the house number, postcode, and UPRN in their respective parameters. This parser requires a Selenium webdriver."
},
"GraveshamBoroughCouncil": {
"uprn": "100060927046",
"skip_get_url": true,
"url": "https://www.gravesham.gov.uk",
"wiki_name": "Gravesham Borough Council",
"wiki_note": "Pass the UPRN. You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search)."
},
"GuildfordCouncil": {
"house_number": "THE LODGE, PUTTENHAM HILL HOUSE, PUTTENHAM HILL, PUTTENHAM, GUILDFORD, GU3 1AH",
"postcode": "GU3 1AH",
Expand Down Expand Up @@ -982,6 +1002,13 @@
"wiki_name": "Midlothian Council",
"wiki_note": "Pass the house name/number wrapped in double quotes along with the postcode parameter."
},
"MidSuffolkDistrictCouncil": {
"skip_get_url": true,
"house_number": "Monday",
"url": "https://www.midsuffolk.gov.uk",
"wiki_name": "Mid Suffolk District Council",
"wiki_note": "Use the House Number field to pass the DAY of the week for your collections. Monday/Tuesday/Wednesday/Thursday/Friday"
},
"MidSussexDistrictCouncil": {
"house_number": "OAKLANDS, OAKLANDS ROAD RH16 1SS",
"postcode": "RH16 1SS",
Expand All @@ -992,10 +1019,10 @@
"wiki_note": "Pass the name of the street with the house number parameter, wrapped in double quotes. This parser requires a Selenium webdriver."
},
"MiltonKeynesCityCouncil": {
"uprn": "Fullers Slade",
"url": "https://www.milton-keynes.gov.uk/waste-and-recycling/collection-days",
"uprn": "25109551",
"url": "https://mycouncil.milton-keynes.gov.uk/en/service/Waste_Collection_Round_Checker",
"wiki_name": "Milton Keynes City Council",
"wiki_note": "Pass the name of the estate with the UPRN parameter, wrapped in double quotes."
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
},
"MoleValleyDistrictCouncil": {
"postcode": "RH4 1SJ",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import time

import requests
from bs4 import BeautifulSoup

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass


# import the wonderful Beautiful Soup and the URL grabber
class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
base class. They can also override some operations with a default
implementation.
"""

def parse_data(self, page: str, **kwargs) -> dict:

user_uprn = kwargs.get("uprn")
check_uprn(user_uprn)
user_uprn = user_uprn.zfill(12)
bindata = {"bins": []}

URI = "https://www.argyll-bute.gov.uk/rubbish-and-recycling/household-waste/bin-collection"

data = {"addressSelect": user_uprn}

s = requests.session()
r = s.post(URI, data=data)
r.raise_for_status()

soup = BeautifulSoup(r.content, features="html.parser")
soup.prettify()

# Find the table and extract the rows with bin schedule information
table = soup.find("table", class_="table table-bordered")
rows = table.find_all("tr")[1:] # Skip the header row

current_year = datetime.now().year
# Loop through each row and extract the bin type and collection date
for row in rows:
cells = row.find_all("td")
bin_type = cells[0].get_text(strip=True)
collection_date = cells[1].get_text(strip=True)

collection_date = datetime.strptime(
collection_date,
"%A %d %B",
)

if collection_date.month == 1:
collection_date = collection_date.replace(year=current_year + 1)
else:
collection_date = collection_date.replace(year=current_year)

dict_data = {
"type": bin_type,
"collectionDate": collection_date.strftime(date_format),
}
bindata["bins"].append(dict_data)

bindata["bins"].sort(
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
)

return bindata
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import time
from datetime import datetime

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass


class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
base class. They can also override some operations with a default
implementation.
"""

def parse_data(self, page: str, **kwargs) -> dict:
# Get and check UPRN
user_postcode = kwargs.get("postcode")
user_paon = kwargs.get("paon")
check_paon(user_paon)
check_postcode(user_postcode)
web_driver = kwargs.get("web_driver")
headless = kwargs.get("headless")
bindata = {"bins": []}

API_URL = "https://portal.digital.ashfield.gov.uk/w/webpage/raise-case?service=bin_calendar"

# Create Selenium webdriver
driver = create_webdriver(web_driver, headless, None, __name__)
driver.get(API_URL)

title = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "sub_page_title"))
)

# Wait for the postcode field to appear then populate it
WebDriverWait(driver, 10).until(
EC.presence_of_element_located(
(By.CSS_SELECTOR, "input.relation_path_type_ahead_search")
)
)

inputElement_postcode = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(
(By.CSS_SELECTOR, "input.relation_path_type_ahead_search")
)
)
inputElement_postcode.clear()
inputElement_postcode.send_keys(user_postcode)

# Wait for the 'Select your property' dropdown to appear and select the first result
dropdown = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable(
(
By.CLASS_NAME,
"result_list ",
)
)
)

address_element = (
WebDriverWait(driver, 10)
.until(
EC.element_to_be_clickable(
(By.XPATH, f"//li[starts-with(@aria-label, '{user_paon}')]")
)
)
.click()
)

search_button = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable(
(By.XPATH, "//input[@type='submit' and @value='Search']")
)
)
search_button.click()

time.sleep(10)

soup = BeautifulSoup(driver.page_source, features="html.parser")
soup.prettify()

# Find the table by class name
table = soup.find("table", {"class": "table listing table-striped"})

# Iterate over each row in the tbody of the table
for row in table.find("tbody").find_all("tr"):
# Extract the service, day, and date for each row
service = row.find_all("td")[0].get_text(strip=True)
date = row.find_all("td")[2].get_text(strip=True)

dict_data = {
"type": service,
"collectionDate": datetime.strptime(date, "%a, %d %b %Y").strftime(
date_format
),
}
bindata["bins"].append(dict_data)

return bindata
Loading
Loading