-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMagnoWeather.py
79 lines (63 loc) · 3.65 KB
/
MagnoWeather.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import wget
import re
from bs4 import BeautifulSoup
from urllib2 import urlopen
#timeframe= [2-> monthy, 3-> Daily] (we want 2)
#Prov=[AB,BC,SK,MB,ON,QC,NB,NT,PE,NU,YT,NS]
#StationID= Count up from 1 until we 404 5 in a row (or we hit 50000 in a province)
#dlyRange=2004-09-02|2015-04-15
#Year=[current year]
#Month=1&Day=01 (always since this seems to make it work)
#wget.download('http://www.google.com/url?q=http%3A%2F%2Fclimate.weather.gc.ca%2FclimateData%2Fbulkdata_e.html%3Fformat%3Dcsv%26stationID%3D42967%26Year%3D2014%26Month%3D1%26Day%3D1%26timeframe%3D2%26submit%3DDownload%2BData&sa=D&sntz=1&usg=AFQjCNEwIlyJ0esbczYOIhDB-mqKb2QiJg')
#http://climate.weather.gc.ca/climateData/bulkdata_e.html?format={0}&stationID={1}&Year={2}&Month=1&Day=1&timeframe=2&submit=Download+Data
#http://climate.weather.gc.ca/climate_data/bulk_data_e.html?hlyRange=2013-11-12%7C2018-12-09&dlyRange=2013-03-09%7C2018-12-09&mlyRange=%7C&StationID=51878&Prov=SK&urlExtension=_e.html&searchType=stnName&optLimit=yearRange&StartYear=1840&EndYear=2018&selRowPerPage=25&Line=2&searchMethod=contains&Month=10&Day=10&txtStationName=Prince+Albert&timeframe=2&Year=2018
def getTemps(stationID,Year):
try:
url="http://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=csv&stationID={0}&Year={1}&Month=1&Day=1&timeframe=2&submit=Download+Data".format(stationID,Year)
filename = wget.download(url, out="{0}_{1}.csv".format(stationID, Year))
print "got File: {0}".format(filename)
except:
print "couldn't get url {0}".format(url)
def getAllTemps(stationIDs=0):
print "Starting Climate Downloader"
fstation=open("canStation.csv")
for year in range(2014,2018): #Not actually 2015 though
if stationIDs==0:
for station in fstation:
getTemps(int(station),year)
else:
for station in stationIDs:
getTemps(int(station),year)
def getCanStations():
print "Parsing out StationIDs"
output=open("canStation.csv",'w')
reID=re.compile("value=\"\d+\"")
reID=re.compile("\d+")
for pages in range(0,24):
start=pages*100+1
website="http://climate.weather.gc.ca/historical_data/search_historic_data_stations_e.html?searchType=stnProv&timeframe=1&lstProvince=&StartYear=1840&EndYear=2018&optLimit=specDate&Year=2018&Month=12&Day=5&selRowPerPage=100&txtCentralLatMin=0&txtCentralLatSec=0&txtCentralLongMin=0&txtCentralLongSec=0&startRow={0}".format(start)
try:
page = urlopen(website).read()
soup = BeautifulSoup(page, "html.parser") #lxml || html.parser
stations=soup.find_all(attrs={"name": "StationID"})
station=reID.findall(str(stations)) #regex out the station ID then store at the end of the station list
print "writing to file for {0}".format(pages)
for stationNum in station:
output.write("{0}\n".format(int(stationNum)))
except:
print "couldn't open page num: {0}".format(pages)
if __name__ == "__main__":
#wget.download("http://climate.weather.gc.ca/climateData/bulkdata_e.html?format=csv&stationID=42967&Year=2014&Month=1&Day=1&timeframe=2&submit=Download+Data")
#the above works, just testing the automated below:
#NOTE: you need the canStation.csv file to run getAllTemps
#getCanStations()
getAllTemps()
#def getCanStations(filename):
# print "Parsing out StationID from {0}"
# fid=open(filename,'r')
# output=open("canStation.csv",'w')
# for line in fid:
# if station != []:
# print "found station: {0}, saving to canStation.csv".format(station[0])
# output.write("{0}\n".format(station[0]))
#