Skip to content

Commit

Permalink
fix: Home Assistant custom component fix for Selenium based councils
Browse files Browse the repository at this point in the history
  • Loading branch information
OliverCullimore committed Oct 29, 2023
1 parent 90c905f commit 7e2d88f
Show file tree
Hide file tree
Showing 19 changed files with 167 additions and 194 deletions.
105 changes: 102 additions & 3 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ requests = "*"
selenium = "*"
lxml = "*"
urllib3 = "*"
webdriver-manager = "^4.0.1"

[tool.commitizen]
major_version_zero = true
Expand Down
26 changes: 22 additions & 4 deletions uk_bin_collection/uk_bin_collection/common.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import calendar
import holidays
import json
import os
import pandas as pd
import re
import requests
from datetime import datetime
from enum import Enum

import holidays
import pandas as pd
import requests
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager

date_format = "%d/%m/%Y"
days_of_week = {
Expand Down Expand Up @@ -217,3 +219,19 @@ def write_output_json(council: str, content: str):
def validate_dates(bin_dates: dict) -> dict:
raise NotImplementedError()
# If a date is in December and the next is in January, increase the year


def create_webdriver() -> webdriver.Chrome:
"""
Create and return a headless Selenium webdriver
:rtype: webdriver.Chrome
"""
# Set up Selenium to run 'headless'
options = webdriver.ChromeOptions()
# options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--disable-dev-shm-usage")
options.add_experimental_option("excludeSwitches", ["enable-logging"])
# Return a Selenium webdriver
return webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import \
AbstractGetBinDataClass
Expand All @@ -27,16 +27,8 @@ def parse_data(self, page: str, **kwargs) -> dict:
check_uprn(user_uprn)
check_postcode(user_postcode)

# Set up Selenium to run 'headless'
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--disable-dev-shm-usage")
options.add_experimental_option("excludeSwitches", ["enable-logging"])

# Create Selenium webdriver
driver = webdriver.Chrome(options=options)
driver = create_webdriver()
driver.get(page)

# Populate postcode field
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import time

import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import \
AbstractGetBinDataClass
Expand Down Expand Up @@ -40,16 +38,8 @@ def parse_data(self, page: str, **kwargs) -> dict:
user_postcode = kwargs.get("postcode")
user_paon = kwargs.get("paon")

# Set up Selenium to run 'headless'
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--disable-dev-shm-usage")
options.add_experimental_option("excludeSwitches", ["enable-logging"])

# Create Selenium webdriver
driver = webdriver.Chrome(options=options)
driver = create_webdriver()
driver.get(page)

# Enter postcode in text box and wait
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import \
AbstractGetBinDataClass
Expand All @@ -27,16 +27,8 @@ def parse_data(self, page: str, **kwargs) -> dict:
check_uprn(user_uprn)
check_postcode(user_postcode)

# Set up Selenium to run 'headless'
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--disable-dev-shm-usage")
options.add_experimental_option("excludeSwitches", ["enable-logging"])

# Create Selenium webdriver
driver = webdriver.Chrome(options=options)
driver = create_webdriver()
driver.get(page)

# Populate postcode field
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
Expand All @@ -24,16 +23,8 @@ def parse_data(self, page: str, **kwargs) -> dict:
check_paon(user_paon)
check_postcode(user_postcode)

# Set up Selenium to run 'headless'
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--disable-dev-shm-usage")
options.add_experimental_option("excludeSwitches", ["enable-logging"])

# Create Selenium webdriver
driver = webdriver.Chrome(options=options)
driver = create_webdriver()
driver.get("https://www.e-lindsey.gov.uk/article/6714/Your-Waste-Collection-Days")

# Wait for the postcode field to appear then populate it
Expand Down
18 changes: 4 additions & 14 deletions uk_bin_collection/uk_bin_collection/councils/EastSuffolkCouncil.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait, Select

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import \
AbstractGetBinDataClass
Expand All @@ -17,21 +17,13 @@ class CouncilClass(AbstractGetBinDataClass):
"""

def parse_data(self, page: str, **kwargs) -> dict:
# Set up Selenium to run 'headless'
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--disable-dev-shm-usage")
options.add_experimental_option("excludeSwitches", ["enable-logging"])

user_uprn = kwargs.get("uprn")
user_postcode = kwargs.get("postcode")
check_uprn(user_uprn)
check_postcode(user_postcode)

# Create Selenium webdriver
driver = webdriver.Chrome(options=options)
driver = create_webdriver()
driver.get("https://my.eastsuffolk.gov.uk/service/Bin_collection_dates_finder")

# Wait for iframe to load and switch to it
Expand All @@ -42,7 +34,7 @@ def parse_data(self, page: str, **kwargs) -> dict:
EC.presence_of_element_located((By.ID, "alt_postcode_search"))
)
# Enter postcode
postcode.send_keys(user_postcode)
postcode.send_keys(user_postcode.replace(" ", ""))

# Wait for address selection dropdown to appear
address = Select(
Expand Down Expand Up @@ -74,8 +66,6 @@ def parse_data(self, page: str, **kwargs) -> dict:
)
)



# Make a BS4 object
soup = BeautifulSoup(data_table.get_attribute("innerHTML"), features="html.parser")

Expand Down
11 changes: 1 addition & 10 deletions uk_bin_collection/uk_bin_collection/councils/GatesheadCouncil.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
Expand All @@ -24,16 +23,8 @@ def parse_data(self, page: str, **kwargs) -> dict:
check_paon(user_paon)
check_postcode(user_postcode)

# Set up Selenium to run 'headless'
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--disable-dev-shm-usage")
options.add_experimental_option("excludeSwitches", ["enable-logging"])

# Create Selenium webdriver
driver = webdriver.Chrome(options=options)
driver = create_webdriver()
driver.get("https://www.gateshead.gov.uk/article/3150/Bin-collection-day-checker")

# Wait for the postcode field to appear then populate it
Expand Down
14 changes: 2 additions & 12 deletions uk_bin_collection/uk_bin_collection/councils/HighPeakCouncil.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import time

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.wait import WebDriverWait

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import \
AbstractGetBinDataClass
Expand Down Expand Up @@ -62,16 +60,8 @@ def parse_data(self, page: str, **kwargs) -> dict:
user_postcode = kwargs.get("postcode")
user_paon = kwargs.get("paon")

# Set up Selenium to run 'headless'
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--disable-dev-shm-usage")
options.add_experimental_option("excludeSwitches", ["enable-logging"])

# Create Selenium webdriver
driver = webdriver.Chrome(options=options)
driver = create_webdriver()
driver.get(page)

# Hide Cookies
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
Expand All @@ -26,16 +25,8 @@ def parse_data(self, page: str, **kwargs) -> dict:
check_uprn(user_uprn)
check_postcode(user_postcode)

# Set up Selenium to run 'headless'
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--disable-dev-shm-usage")
options.add_experimental_option("excludeSwitches", ["enable-logging"])

# Create Selenium webdriver
driver = webdriver.Chrome(options=options)
driver = create_webdriver()
driver.get("https://www.npt.gov.uk/2195")

# Accept cookies banner
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import \
AbstractGetBinDataClass
Expand All @@ -27,16 +27,8 @@ def parse_data(self, page: str, **kwargs) -> dict:
check_paon(user_paon)
check_postcode(user_postcode)

# Set up Selenium to run 'headless'
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--disable-dev-shm-usage")
options.add_experimental_option("excludeSwitches", ["enable-logging"])

# Create Selenium webdriver
driver = webdriver.Chrome(options=options)
driver = create_webdriver()
driver.get(page)

# Populate postcode field
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
import time

import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import \
AbstractGetBinDataClass


# import the wonderful Beautiful Soup and the URL grabber


Expand Down Expand Up @@ -39,16 +35,8 @@ def parse_data(self, page: str, **kwargs) -> dict:
check_paon(user_paon)
check_postcode(user_postcode)

# Set up Selenium to run 'headless'
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--disable-dev-shm-usage")
options.add_experimental_option("excludeSwitches", ["enable-logging"])

# Create Selenium webdriver
driver = webdriver.Chrome(options=options)
driver = create_webdriver()
driver.get(page)

time.sleep(1)
Expand Down
19 changes: 4 additions & 15 deletions uk_bin_collection/uk_bin_collection/councils/PrestonCityCouncil.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
from datetime import datetime

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from datetime import datetime
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import \
AbstractGetBinDataClass
Expand All @@ -33,16 +30,8 @@ def parse_data(self, page: str, **kwargs) -> dict:
check_paon(user_paon)
check_postcode(user_postcode)

# Set up Selenium to run 'headless'
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--disable-dev-shm-usage")
options.add_experimental_option("excludeSwitches", ["enable-logging"])

# Create Selenium webdriver
driver = webdriver.Chrome(options=options)
driver = create_webdriver()
driver.get(page)

# If you bang in the house number (or property name) and postcode in the box it should find your property
Expand Down
Loading

0 comments on commit 7e2d88f

Please sign in to comment.