Skip to content

Commit

Permalink
Merge pull request #608 from JonathanStreet/606-add-mole-valley-distr…
Browse files Browse the repository at this point in the history
…ict-council

feat: Add Mole Valley District Council
  • Loading branch information
dp247 authored Feb 11, 2024
2 parents ac228bb + 6cb4ea0 commit 18a5466
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,11 @@ Feature: Test each council output matches expected results
| council | selenium_url | selenium_mode |
| MiltonKeynesCityCouncil | None | None |

@MoleValleyDistrictCouncil
Examples: MoleValleyDistrictCouncil
| council | selenium_url | selenium_mode |
| MoleValleyDistrictCouncil | None | None |

@NeathPortTalbotCouncil
Examples: NeathPortTalbotCouncil
| council | selenium_url | selenium_mode |
Expand Down
8 changes: 8 additions & 0 deletions uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,14 @@
"wiki_name": "Milton Keynes City Council",
"wiki_note": "Pass the name of the estate with the UPRN parameter, wrapped in double quotes"
},
"MoleValleyDistrictCouncil": {
"postcode": "RH4 1SJ",
"skip_get_url": true,
"uprn": "200000171235",
"url": "https://molevalley.cloudmappin.com/mmv/",
"wiki_name": "Mole Valley District Council",
"wiki_note": "UPRN can only be parsed with a valid postcode."
},
"NeathPortTalbotCouncil": {
"postcode": "SA13 3BA",
"skip_get_url": true,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from bs4 import BeautifulSoup
from datetime import datetime
import re
import requests
from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass

# import the wonderful Beautiful Soup and the URL grabber
class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
base class. They can also override some operations with a default
implementation.
"""

def parse_data(self, page: str, **kwargs) -> dict:

user_postcode = kwargs.get("postcode")
check_postcode(user_postcode)

root_url = "https://molevalley.cloudmappin.com/my-mv-address-search/search/{}/0".format(user_postcode)
response = requests.get(root_url)

if not response.ok:
raise ValueError("Invalid server response code retreiving data.")

jsonData = response.json()

if len(jsonData["results"]) == 0:
raise ValueError("No collection data found for postcode provided.")

properties_found = jsonData["results"][0]["items"]

# If UPRN is provided, we can check a specific address.
html_data = None
uprn = kwargs.get("uprn")
if uprn:
check_uprn(uprn)
for n, item in enumerate(properties_found):
if uprn == str(int(item["info"][0][1]["value"])):
html_data = properties_found[n]["info"][2][1]["value"]
break
if html_data is None:
raise ValueError("No collection data found for UPRN provided.")
else:
# If UPRN not provided, just use the first result
html_data = properties_found[0]["info"][2][1]["value"]

soup = BeautifulSoup(html_data, features="html.parser")
soup.prettify()

data = {"bins": []}
all_collection_dates = []
regex_date = re.compile(r'.* ([\d]+\/[\d]+\/[\d]+)')
regex_additional_collection = re.compile(r'We also collect (.*) on (.*) -')

# Search for the 'Bins and Recycling' panel
for panel in soup.select('div[class*="panel"]'):
if panel.h2.text.strip() == "Bins and Recycling":

# Gather the bin types and dates
for collection in panel.select('div > strong'):
bin_type = collection.text.strip()
collection_string = collection.find_next('p').text.strip()
m = regex_date.match(collection_string)
if m:
collection_date = datetime.strptime(m.group(1),'%d/%m/%Y').date()
data["bins"].append({"type": bin_type, "collectionDate": collection_date.strftime('%d/%m/%Y')})
all_collection_dates.append(collection_date)

# Search for additional collections
for p in panel.select('p'):
m2 = regex_additional_collection.match(p.text.strip())
if m2:
bin_type = m2.group(1)
if "each collection day" in m2.group(2):
collection_date = min(all_collection_dates)
data["bins"].append({"type": bin_type, "collectionDate": collection_date.strftime('%d/%m/%Y')})
break

return data
14 changes: 14 additions & 0 deletions wiki/Councils.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ This document is still a work in progress, don't worry if your council isn't lis
- [Mid and East Antrim Borough Council](#mid-and-east-antrim-borough-council)
- [Mid Sussex District Council](#mid-sussex-district-council)
- [Milton Keynes City Council](#milton-keynes-city-council)
- [Mole Valley District Council](#mole-valley-district-council)
- [Neath Port Talbot Council](#neath-port-talbot-council)
- [Newark and Sherwood District Council](#newark-and-sherwood-district-council)
- [Newcastle City Council](#newcastle-city-council)
Expand Down Expand Up @@ -890,6 +891,19 @@ Note: Pass the name of the estate with the UPRN parameter, wrapped in double quo

---

### Mole Valley District Council
```commandline
python collect_data.py MoleValleyDistrictCouncil https://molevalley.cloudmappin.com/mmv/ -s -p "XXXX XXX" -u XXXXXXXX
```
Additional parameters:
- `-s` - skip get URL
- `-u` - UPRN
- `-p` - postcode

Note: In order to use this parser, you must provide a valid postcode and optionally a UPRN for your specific address.

---

### Neath Port Talbot Council
```commandline
python collect_data.py NeathPortTalbotCouncil https://www.npt.gov.uk -s -u XXXXXXXX -p "XXXX XXX" -w http://HOST:PORT/
Expand Down

0 comments on commit 18a5466

Please sign in to comment.