Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Custom component selenium fix #416

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/behave.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ jobs:
matrix:
python-version: ['3.10', 3.11]
poetry-version: [1.5.1]
services:
selenium:
image: selenium/standalone-chrome:latest
options: --shm-size=2gb
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Most scripts make use of [Beautiful Soup 4](https://pypi.org/project/beautifulso
1. Click `+ Add Integration` and search for and select `UK Bin Collection Data` then click `Download`.
1. Restart your Home Assistant.
1. In the Home Assistant UI go to `Settings` > `Devices & Services` click `+ Add Integration` and search for `UK Bin Collection Data`.
1. If your see a "URL of the remote Selenium web driver to use" field when setting up your council, you'll need to provide the URL to a web driver you've set up seperately such as [standalone-chrome](https://hub.docker.com/r/selenium/standalone-chrome).

### Install manually

Expand Down
4 changes: 4 additions & 0 deletions custom_components/uk_bin_collection/config_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ async def get_council_schema(self, council=str) -> vol.Schema:
council_schema = council_schema.extend(
{vol.Required("usrn", default=""): cv.string}
)
if "web_driver" in self.councils_data[council]:
council_schema = council_schema.extend(
{vol.Required("web_driver", default=""): cv.string}
)
return council_schema

async def async_step_user(self, user_input=None):
Expand Down
1 change: 1 addition & 0 deletions custom_components/uk_bin_collection/strings.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"postcode": "Postcode of the address",
"number": "House number of the address",
"usrn": "USRN (Unique Street Reference Number)",
"web_driver": "URL of the remote Selenium web driver to use",
"submit": "Submit"
},
"description": "Please refer to your councils [wiki](https://github.com/robbrad/UKBinCollectionData/wiki/Councils) entry for details on what to enter"
Expand Down
16 changes: 16 additions & 0 deletions uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,15 @@
"skip_get_url": true,
"uprn": "100031325997",
"url": "https://www.broxtowe.gov.uk/",
"web_driver": "http://selenium:4444",
"wiki_name": "Broxtowe Borough Council"
},
"BuckinghamshireCouncil": {
"house_number": "HUGHENDEN MANOR, MANOR ROAD, HUGHENDEN VALLEY, HIGH WYCOMBE",
"postcode": "HP14 4LA",
"skip_get_url": true,
"url": "https://chiltern.gov.uk/collection-dates",
"web_driver": "http://selenium:4444",
"wiki_name": "Buckinghamshire Council (Chiltern, South Bucks, Wycombe)",
"wiki_note": "Pass the name of the street with the house number parameter, wrapped in double quotes"
},
Expand Down Expand Up @@ -152,6 +154,7 @@
"skip_get_url": true,
"uprn": "10070102161",
"url": "https://www.derbyshiredales.gov.uk/",
"web_driver": "http://selenium:4444",
"wiki_name": "Derbyshire Dales District Council"
},
"DoncasterCouncil": {
Expand Down Expand Up @@ -183,6 +186,7 @@
"postcode": "LN4 4SY",
"skip_get_url": true,
"url": "https://www.e-lindsey.gov.uk/",
"web_driver": "http://selenium:4444",
"wiki_name": "East Lindsey District Council",
"wiki_note": "Pass the house name/number in the house number parameter, wrapped in double quotes"
},
Expand All @@ -197,6 +201,7 @@
"skip_get_url": true,
"uprn": "10093544720",
"url": "https://my.eastsuffolk.gov.uk/service/Bin_collection_dates_finder",
"web_driver": "http://selenium:4444",
"wiki_name": "East Suffolk Council",
"wiki_note": "To get the UPRN, you can use [FindMyAddress](https://www.findmyaddress.co.uk/search)"
},
Expand All @@ -223,6 +228,7 @@
"postcode": "NE16 5LQ",
"skip_get_url": true,
"url": "https://www.gateshead.gov.uk/",
"web_driver": "http://selenium:4444",
"wiki_name": "Gateshead Council",
"wiki_note": "Pass the house name/number in the house number parameter, wrapped in double quotes"
},
Expand Down Expand Up @@ -251,6 +257,7 @@
"postcode": "SK13 8BX",
"skip_get_url": true,
"url": "https://www.highpeak.gov.uk/findyourbinday",
"web_driver": "http://selenium:4444",
"wiki_name": "High Peak Council",
"wiki_note": "Pass the name of the street with the house number parameter, wrapped in double quotes"
},
Expand Down Expand Up @@ -337,6 +344,7 @@
"skip_get_url": true,
"uprn": "10023947752",
"url": "https://www.npt.gov.uk",
"web_driver": "http://selenium:4444",
"wiki_name": "Neath Port Talbot Council"
},
"NewarkAndSherwoodDC": {
Expand Down Expand Up @@ -382,6 +390,7 @@
"postcode": "NR25 6BH",
"skip_get_url": true,
"url": "https://www.north-norfolk.gov.uk/",
"web_driver": "http://selenium:4444",
"wiki_name": "North Norfolk District Council",
"wiki_note": "Pass the name of the street with the house number parameter, wrapped in double quotes"
},
Expand Down Expand Up @@ -410,19 +419,22 @@
"postcode": "NE46 1UQ",
"skip_get_url": true,
"url": "https://www.northumberland.gov.uk/Waste/Bins/Bin-Calendars.aspx",
"web_driver": "http://selenium:4444",
"wiki_name": "Northumberland Council"
},
"PrestonCityCouncil": {
"house_number": "Town Hall",
"postcode": "PR1 2RL",
"skip_get_url": true,
"url": "https://selfservice.preston.gov.uk/service/Forms/FindMyNearest.aspx?Service=bins",
"web_driver": "http://selenium:4444",
"wiki_name": "Preston City Council"
},
"ReigateAndBansteadBoroughCouncil": {
"skip_get_url": true,
"uprn": "68134867",
"url": "https://www.reigate-banstead.gov.uk/",
"web_driver": "http://selenium:4444",
"wiki_name": "Reigate and Banstead Borough Council",
"wiki_note": "To get the UPRN, you can use [FindMyAddress](https://www.findmyaddress.co.uk/search)"
},
Expand All @@ -445,6 +457,7 @@
"skip_get_url": true,
"uprn": "3040040994",
"url": "https://www.rushcliffe.gov.uk/",
"web_driver": "http://selenium:4444",
"wiki_name": "Rushcliffe Borough Council"
},
"RushmoorCouncil": {
Expand Down Expand Up @@ -521,6 +534,7 @@
"skip_get_url": true,
"uprn": "100031863037",
"url": "https://www.staffsmoorlands.gov.uk/",
"web_driver": "http://selenium:4444",
"wiki_name": "Staffordshire Moorlands District Council",
"wiki_note": "To get the UPRN, you can use [FindMyAddress](https://www.findmyaddress.co.uk/search)"
},
Expand Down Expand Up @@ -578,6 +592,7 @@
"custom_component_show_url_field": true,
"skip_get_url": true,
"url": "https://www.wakefield.gov.uk/where-i-live/?uprn=63035490&a=115%20Elizabeth%20Drive%20Castleford%20WF10%203RR&usrn=41801243&e=445418&n=426091&p=WF10%203RR",
"web_driver": "http://selenium:4444",
"wiki_command_url_override": "https://www.wakefield.gov.uk/where-i-live/?uprn=XXXXXXXXXXX&a=XXXXXXXXXXX&usrn=XXXXXXXXXXX&e=XXXXXXXXXXX&n=XXXXXXXXXXX&p=XXXXXXXXXXX",
"wiki_name": "Wakefield City Council",
"wiki_note": "Follow the instructions [here](https://www.wakefield.gov.uk/where-i-live/) until you get the page that includes a \"Bin Collections\" section then copy the URL and replace the URL in the command."
Expand Down Expand Up @@ -614,6 +629,7 @@
"postcode": "EH52 5JE",
"skip_get_url": true,
"url": "https://www.westlothian.gov.uk/",
"web_driver": "http://selenium:4444",
"wiki_name": "West Lothian Council",
"wiki_note": "Pass the house name/number in the house number parameter, wrapped in double quotes"
},
Expand Down
4 changes: 4 additions & 0 deletions uk_bin_collection/tests/step_defs/test_validate_council.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ def scrape_step(context, council):
if "usrn" in context.metadata:
usrn = context.metadata["usrn"]
args.append(f"-us={usrn}")
# TODO we should somehow run this test with and without this argument passed
if "web_driver" in context.metadata:
web_driver = context.metadata["web_driver"]
args.append(f"-w={web_driver}")
if "skip_get_url" in context.metadata:
args.append(f"-s")

Expand Down
16 changes: 11 additions & 5 deletions uk_bin_collection/tests/test_common_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,15 +166,16 @@ def test_update_input_json():
url = "TEST_URL"
postcode="TEST_POSTCODE"
uprn="TEST_UPRN"
web_driver="TEST_WEBDRIVER"
skip_get_url = True
update_input_json(council, url, postcode=postcode, uprn=uprn, skip_get_url=skip_get_url)
update_input_json(council, url, postcode=postcode, uprn=uprn, web_driver=web_driver, skip_get_url=skip_get_url)
cwd = os.getcwd()
input_file_path = os.path.join(cwd, "uk_bin_collection", "tests", "input.json")
result1 = os.path.exists(input_file_path)
with open(input_file_path, 'r') as f:
data = json.load(f)
assert result1 == True
assert data[council] == {'postcode': 'TEST_POSTCODE', 'skip_get_url': True, 'uprn': 'TEST_UPRN', 'url': 'TEST_URL', 'wiki_name': 'test_council'}
assert data[council] == {"postcode": postcode, "skip_get_url": skip_get_url, "uprn": uprn, "url": url, "web_driver": web_driver, "wiki_name": council}

def test_update_input_json_fail(capsys, monkeypatch):
def mock_os_path_exists(path):
Expand All @@ -186,13 +187,18 @@ def mock_os_path_exists(path):
url = "TEST_URL"
postcode="TEST_POSTCODE"
uprn="TEST_UPRN"
web_driver="TEST_WEBDRIVER"
skip_get_url = True
update_input_json(council, url, postcode=postcode, uprn=uprn, skip_get_url=skip_get_url)
update_input_json(council, url, postcode=postcode, uprn=uprn, web_driver=web_driver, skip_get_url=skip_get_url)

captured = capsys.readouterr()
assert "Exception encountered: Unable to update input.json file for the council." in captured.out
assert "Please check you're running developer mode" in captured.out

def test_create_webdriver():
result = create_webdriver()
def test_create_webdriver_local():
result = create_webdriver(None)
assert result.name == 'chrome'

def test_create_webdriver_remote():
result = create_webdriver("http://selenium:4444")
assert result.name == 'chrome'
8 changes: 8 additions & 0 deletions uk_bin_collection/uk_bin_collection/collect_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ def __init__(self):
required=False,
)
self.parser.add_argument("-u", "--uprn", type=str, help="UPRN to parse", required=False)
self.parser.add_argument(
"-w",
"--web_driver",
help="URL for remote Selenium web driver - should be wrapped in double quotes",
required=False,
)
self.parser.add_argument(
"-d",
"--dev_mode",
Expand Down Expand Up @@ -66,6 +72,7 @@ def run(self):
paon = self.parsed_args.number
uprn = self.parsed_args.uprn
skip_get_url = self.parsed_args.skip_get_url
web_driver = self.parsed_args.web_driver
dev_mode = self.parsed_args.dev_mode

return self.client_code(
Expand All @@ -75,6 +82,7 @@ def run(self):
paon=paon,
uprn=uprn,
skip_get_url=skip_get_url,
web_driver=web_driver,
dev_mode=dev_mode,
council_module_str=council_module_str,
)
Expand Down
17 changes: 8 additions & 9 deletions uk_bin_collection/uk_bin_collection/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def update_input_json(council: str, url: str, **kwargs):
paon = kwargs.get("paon", None)
uprn = kwargs.get("uprn", None)
usrn = kwargs.get("usrn", None)
def write_output_json(council: str, content: str):
web_driver = kwargs.get("web_driver", None)
skip_get_url = kwargs.get("skip_get_url", None)
cwd = os.getcwd()
input_file_path = os.path.join(cwd, "uk_bin_collection", "tests", "input.json")
Expand All @@ -223,12 +223,8 @@ def write_output_json(council: str, content: str):
data[council]["uprn"] = uprn
if usrn is not None:
data[council]["usrn"] = usrn
outputs_path = os.path.join(cwd, "..", "tests", "outputs")
if not os.path.exists(outputs_path) or not os.path.isdir(outputs_path):
outputs_path = os.path.join(cwd, "uk_bin_collection", "tests", "outputs")
if os.path.exists(outputs_path) and os.path.isdir(outputs_path):
with open(os.path.join(outputs_path, council + ".json"), "w") as f:
f.write(content)
if web_driver is not None:
data[council]["web_driver"] = web_driver
if skip_get_url is not None:
data[council]["skip_get_url"] = skip_get_url
with open(input_file_path, 'w') as f:
Expand All @@ -246,7 +242,7 @@ def validate_dates(bin_dates: dict) -> dict:
# If a date is in December and the next is in January, increase the year


def create_webdriver() -> webdriver.Chrome:
def create_webdriver(web_driver) -> webdriver.Chrome:
"""
Create and return a headless Selenium webdriver
:rtype: webdriver.Chrome
Expand All @@ -258,5 +254,8 @@ def create_webdriver() -> webdriver.Chrome:
options.add_argument("--disable-gpu")
options.add_argument("--disable-dev-shm-usage")
options.add_experimental_option("excludeSwitches", ["enable-logging"])
# Return a Selenium webdriver
# Return a remote Selenium webdriver
if web_driver is not None:
return webdriver.Remote(command_executor=web_driver, options=options)
# Return a local Selenium webdriver
return webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@ def parse_data(self, page: str, **kwargs) -> dict:

user_uprn = kwargs.get("uprn")
user_postcode = kwargs.get("postcode")
web_driver = kwargs.get("web_driver")
check_uprn(user_uprn)
check_postcode(user_postcode)

# Create Selenium webdriver
driver = create_webdriver()
driver = create_webdriver(web_driver)
driver.get(page)

# Populate postcode field
Expand Down Expand Up @@ -60,6 +61,9 @@ def parse_data(self, page: str, **kwargs) -> dict:

soup = BeautifulSoup(driver.page_source, features="html.parser")

# Quit Selenium webdriver to release session
driver.quit()

bins_div = soup.find("div", id="ctl00_ContentPlaceHolder1_FF5686FormGroup")
if bins_div:
bins_table = bins_div.find("table")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ def parse_data(self, page: str, **kwargs) -> dict:
# Assign user info
user_postcode = kwargs.get("postcode")
user_paon = kwargs.get("paon")
web_driver = kwargs.get("web_driver")

# Create Selenium webdriver
driver = create_webdriver()
driver = create_webdriver(web_driver)
driver.get(page)

# Enter postcode in text box and wait
Expand Down Expand Up @@ -77,6 +78,9 @@ def parse_data(self, page: str, **kwargs) -> dict:
df = pd.read_html(table, header=[1])
df = df[0]

# Quit Selenium webdriver to release session
driver.quit()

# Parse data into dict
data = self.get_data(df)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@ def parse_data(self, page: str, **kwargs) -> dict:

user_uprn = kwargs.get("uprn")
user_postcode = kwargs.get("postcode")
web_driver = kwargs.get("web_driver")
check_uprn(user_uprn)
check_postcode(user_postcode)

# Create Selenium webdriver
driver = create_webdriver()
driver = create_webdriver(web_driver)
driver.get(page)

# Populate postcode field
Expand Down Expand Up @@ -60,6 +61,9 @@ def parse_data(self, page: str, **kwargs) -> dict:

soup = BeautifulSoup(driver.page_source, features="html.parser")

# Quit Selenium webdriver to release session
driver.quit()

bin_rows = soup.find("div", id="ctl00_ContentPlaceHolder1_pnlConfirmation") \
.find("div", {"class": "row"}).find_all("div", {"class": "row"})
if bin_rows:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@ def parse_data(self, page: str, **kwargs) -> dict:
data = {"bins": []}
user_paon = kwargs.get("paon")
user_postcode = kwargs.get("postcode")
web_driver = kwargs.get("web_driver")
check_paon(user_paon)
check_postcode(user_postcode)

# Create Selenium webdriver
driver = create_webdriver()
driver = create_webdriver(web_driver)
driver.get("https://www.e-lindsey.gov.uk/article/6714/Your-Waste-Collection-Days")

# Wait for the postcode field to appear then populate it
Expand Down Expand Up @@ -59,6 +60,9 @@ def parse_data(self, page: str, **kwargs) -> dict:

soup = BeautifulSoup(driver.page_source, features="html.parser")

# Quit Selenium webdriver to release session
driver.quit()

# Get collections
for collection in soup.find_all("div", {"class": "waste-result"}):
ptags = collection.find_all("p")
Expand Down
Loading
Loading