-
Notifications
You must be signed in to change notification settings - Fork 0
/
fetch_city_meetings.py
78 lines (70 loc) · 2.26 KB
/
fetch_city_meetings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import requests
from bs4 import BeautifulSoup
import os
def main():
base_api_url = "http://webapi.legistar.com/v1/alameda/"
base_url = "http://Alameda.legistar.com/"
years = [
"2004",
"2005",
"2006",
"2007",
"2008",
"2009",
"2010",
"2011",
"2012",
"2013",
"2014",
"2015",
"2016",
"2017",
"2018",
"2019",
"2020",
"2021",
"2022",
"2023",
]
total = 0
for year in years:
print(f"Fetching {year}")
event_listing_url = f"{base_api_url}events?$filter=EventDate+ge+datetime%27{year}-01-01%27+and+EventDate+lt+datetime%27{year}-12-31%27"
list_response = list_response = requests.get(event_listing_url)
total += len(list_response.json())
print(f"{total} events fetched so far")
for event in list_response.json():
body = event["EventBodyName"]
print(f"ID: {event['EventId']}, Body: {body}")
directory = f"./data/{body.replace(' ','')}"
if not os.path.exists(directory):
os.makedirs(directory)
filename = event["EventDate"].split("T")[0] + ".pdf"
filepath = f"{directory}/{filename}"
if os.path.exists(filepath):
continue
page_url = event["EventInSiteURL"]
try:
page_response = requests.get(page_url)
except:
continue
soup = BeautifulSoup(page_response.text, "html.parser")
try:
minutes_url = (
base_url
+ soup.find(id="ctl00_ContentPlaceHolder1_hypMinutes").attrs["href"]
)
except:
print(page_url)
continue
try:
minutes_response = requests.get(minutes_url, allow_redirects=True)
except:
print(minutes_url)
continue
filename = event["EventDate"].split("T")[0] + ".pdf"
print(f"Writing file {filepath}, body: {body}")
with open(filepath, "wb") as minutes_pdf:
minutes_pdf.write(minutes_response.content)
if __name__ == "__main__":
main()