Skip to content

Commit

Permalink
Adding support for Storm Prediction Center and National Hurrican Center
Browse files Browse the repository at this point in the history
  • Loading branch information
Aodhan Sweeney committed Jul 15, 2019
1 parent 881c77d commit 727cc1e
Show file tree
Hide file tree
Showing 18 changed files with 2,661,821 additions and 1 deletion.
2 changes: 1 addition & 1 deletion siphon/simplewebservice/iastate.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,6 @@ def _get_data_raw(self, time, site_id, pressure=None):
message += 'for station {stid}'.format(stid=site_id)
if pressure is not None:
message += 'for pressure {pres}'.format(pres=pressure)
message = message[:-1] + '.'
message = message + '.'
raise ValueError(message)
return json_data
160 changes: 160 additions & 0 deletions siphon/simplewebservice/nhc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
"""Reading National Hurricane Center Data.
======================================
This program is written to pull data from the National Hurricane
Center and return the data in an easy to use format.
"""

from datetime import datetime

import numpy as np
import pandas as pd
import requests


class NHCD():
"""
Read data from the National Hurricane Center Database (NHCD).
This class reads and then makes dataframes to easier access NHC Data.
"""

def __init__(self):
"""
Create with member attributes and storm info.
This initiation creates a file table based on a url for all storms in the
NHCD and puts them into a pandas dataframe. This dataframe is then turned
into a member atribute '.storm_table'.
"""
storm_list_columns = ['Name', 'Basin', 'CycloneNum', 'Year', 'StormType', 'Filename']
file_table = pd.read_csv('http://ftp.nhc.noaa.gov/atcf/index/storm_list.txt',
names=storm_list_columns, header=None, index_col=False,
usecols=[0, 1, 7, 8, 9, 20])
file_table.Filename = file_table.Filename.str.lower()
self.storm_table = file_table

def get_tracks(self, year, filename):
"""
Make url and pulls track data for a given storm.
The Url is made by using both the year and the filename. This function will then
read the data and create a data frame for both the forecast and best tracks and
compile these data frames into a dictionary. This function returns this dictionary
of forecast and best track.
Parameters
----------
self:
storing the storm dictionary as member attrubute of NHCD
year: int
year of the storm incident
filename: str
unique filename of the storm which is used for indexing purposes and id
in the NHCD. The first character is defaulted as space in NHCD so it is clipped
when being used.
Returns
-------
unique_models: list
all the models that have run forecasts for this storm throughout its life
"""
today = datetime.today()
current_year = today.year
data_dictionary = {}
# Current year data is stored in a different location
if year == str(current_year):
unformatted_forecast_url = 'http://ftp.nhc.noaa.gov/atcf/aid_public/a{}.dat.gz'
urlf = unformatted_forecast_url.format(filename[1:])
unformatted_best_url = 'http://ftp.nhc.noaa.gov/atcf/btk/b{}.dat'
urlb = unformatted_best_url.format(filename[1:])
else:
unformatted_forecast_url = 'http://ftp.nhc.noaa.gov/atcf/archive/{}/a{}.dat.gz'
urlf = unformatted_forecast_url.format(year, filename[1:])
unformatted_best_url = 'http://ftp.nhc.noaa.gov/atcf/archive/{}/b{}.dat.gz'
urlb = unformatted_best_url.format(year, filename[1:])

url_links = [urlf, urlb]
url_count = 0
for url in url_links:
# Checking if url is valid, if status_code is 200 then website is active
if requests.get(url).status_code == 200:
# Creating column names
storm_data_column_names = ['Basin', 'CycloneNum', 'WarnDT', 'Model',
'Forecast_hour', 'Lat', 'Lon']
# Create a pandas dataframe using specific columns for a storm
single_storm = pd.read_csv(url, header=None, names=storm_data_column_names,
index_col=False, usecols=[0, 1, 2, 4, 5, 6, 7])

# Must convert lats and lons from string to float and preform division by 10
storm_lats = single_storm['Lat']
storm_lats = (storm_lats.str.slice(stop=-1))
storm_lats = storm_lats.astype(float)
storm_lats = storm_lats / 10
single_storm['Lat'] = storm_lats

storm_lons = single_storm['Lon']
storm_lons = (storm_lons.str.slice(stop=-1))
storm_lons = storm_lons.astype(float)
storm_lons = -storm_lons / 10
single_storm['Lon'] = storm_lons

# Change WarnDT to a string
single_storm['WarnDT'] = [str(x) for x in single_storm['WarnDT']]

# Adding this newly created DataFrame to a dictionary
if url_count == 0:
data_dictionary['forecast'] = single_storm
else:
data_dictionary['best_track'] = single_storm

else:
raise ValueError('url {} was not valid, select different storm.'.format(url))

url_count += 1
# Turn data_dictionary into a member attribute
self.storm_dictionary = data_dictionary
forecast = data_dictionary.get('forecast')
unique_models, unique_index = list(np.unique(forecast['Model'].values,
return_index=True))
return(unique_models)

def model_selection_latlon(self, models):
"""
Select model type and get lat/lons and track evolution data.
Parameters
----------
self:
using storm dictionary attribute and also storing other model_table attribute
and date_times attribute
models: list
unique models that are ran for a storm
Returns
-------
self.model_table: list attribute
all model forecasts for that specific model type that have been run for a given
storm
"""
# We will always plot best track, and thus must save the coordinates for plotting
best_track = self.storm_dictionary.get('best_track')
self.date_times = best_track['WarnDT']

lats = best_track['Lat']
lons = best_track['Lon']
self.best_track_coordinates = [lats, lons]

model_tracks = self.storm_dictionary.get('forecast')

self.model_table = []
for model in models:
one_model_table = model_tracks[model_tracks['Model'] == model]
self.model_table.append(one_model_table)

return self.model_table
140 changes: 140 additions & 0 deletions siphon/simplewebservice/spc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
"""Reading Storm Prediction Center Data.
======================================
This program pulls data from the Storm Prediction
Center's Archive of data that goes back to the 1950s.
Weather events that are available are
hail, wind, and tornados.
"""

import pandas as pd


class SpcData:
"""
Pulls data from the SPC center.
This class gets data on tornados, hail, and severe wind events.
This will return a pandas dataframe for each of these storm events.
"""

def __init__(self, stormtype, datetime):
"""
Create class of SpcData with attributes that select date, and storm type.
SPC data sifting method is differentiated based on whether the storm is before 2018
or not. Storms after 12/31/2017 will be found by using the specific URL for the date
selected and finding the csv file on the SPC website. Storms prior to this date are
first collected into a large pd dataframe with all SPC events of a selected type
from 1955-2017. This data is then successively parced and trim until storms from one
date are achieved. This is because the SPC has changed the way they display data so
many times that to write specific methods for each time frame would be too time
consuming.
"""
self.storm_type = stormtype
self.date_time = datetime
self.year_string = self.date_time[0:4]
self.month_string = self.date_time[4:6]
self.day_string = self.date_time[6:8]
self.storms = self.storm_type_selection()

if int(self.year_string) <= 2017:
one_year_table = self.storms[self.storms['Year'] == int(self.year_string)]
month_table = one_year_table[one_year_table['Month'] == int(self.month_string)]
self.day_table = month_table[month_table['Day'] == int(self.day_string)]

elif int(self.year_string) > 2017:
self.day_table = self.storms

def storm_type_selection(self):
"""
Find and create the url for a specific storm type and year.
Prior to 2017, the ways in which the SPC storm data is inconsistent. In order
to deal with this, the Urls used to find the data for a given day changes
based on the year chosen by the user.
Parameters
----------
self:
The date_time string attribute will be used for year identification
Returns
-------
(torn/wind/hail)_reports: pandas DataFrame
This dataframe has the data about the specific SPC data type for either one day
or a 60+ year period based on what year is chosen.
"""
# Place holder string 'mag' will be replaced by event type (tornado, hail or wind)
mag = str
# Colums are different for events before and after 12/31/2017.
after_2017_columns = ['Time', mag, 'Location', 'County', 'State',
'Lat', 'Lon', 'Comment']
before_2018_columns = ['Num', 'Year', 'Month', 'Day', 'Time', 'Time Zone',
'State', mag, 'Injuries', 'Fatalities', 'Property Loss',
'Crop Loss', 'Start Lat', 'Start Lon', 'End Lat',
'End Lon', 'Length (mi)', 'Width (yd)', 'Ns', 'SN', 'SG',
'County Code 1', 'County Code 2', 'County Code 3',
'County Code 4']

# Find specific urls and create dataframe based on time and event type
if self.storm_type == 'tornado':
if int(self.year_string) <= 2017:
before_2018_columns[7] = 'F-Scale'
url = 'https://www.spc.noaa.gov/wcm/data/1950-2017_torn.csv'
torn_reports = pd.read_csv(url, names=before_2018_columns, header=0,
index_col=False, usecols=[0, 1, 2, 3, 5, 6, 7,
10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27])
else:
_url = 'https://www.spc.noaa.gov/climo/reports/{}{}{}_rpts_filtered_torn.csv'
url = _url.format(self.year_string[2: 4], self.month_string, self.day_string)
after_2017_columns[1] = 'F-Scale'
torn_reports = pd.read_csv(url, names=after_2017_columns,
header=0, index_col=False,
usecols=[0, 1, 2, 3, 4, 5, 6, 7])
return(torn_reports)

elif self.storm_type == 'hail':
if int(self.year_string) <= 2017:
before_2018_columns[7] = 'Size (hundredth in)'
url = 'https://www.spc.noaa.gov/wcm/data/1955-2017_hail.csv'
hail_reports = pd.read_csv(url, names=before_2018_columns, header=0,
index_col=False, usecols=[0, 1, 2, 3, 5, 6, 7,
10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27])

else:
_url = 'https://www.spc.noaa.gov/climo/reports/{}{}{}_rpts_filtered_hail.csv'
url = _url.format(self.year_string[2:4], self.month_string, self.day_string)
after_2017_columns[1] = 'Size (in)'
hail_reports = pd.read_csv(url, names=after_2017_columns,
header=0, index_col=False,
usecols=[0, 1, 2, 3, 4, 5, 6, 7])
return(hail_reports)

elif self.storm_type == 'wind':
if int(self.year_string) <= 2017:
before_2018_columns[7] = 'Speed (kt)'
url = 'https://www.spc.noaa.gov/wcm/data/1955-2017_wind.csv'
wind_reports = pd.read_csv(url, names=before_2018_columns, header=0,
index_col=False, usecols=[0, 1, 2, 3, 5, 6, 7,
10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27])
else:
_url = 'https://www.spc.noaa.gov/climo/reports/{}{}{}_rpts_filtered_wind.csv'
url = _url.format(self.year_string[2:4], self.month_string, self.day_string)
after_2017_columns[1] = 'Speed (kt)'
wind_reports = pd.read_csv(url, names=after_2017_columns, header=0,
index_col=False, usecols=[0, 1, 2, 3, 4, 5, 6, 7])
return(wind_reports)

else:
raise ValueError('Not a valid event type: enter either tornado, wind or hail.')
Loading

0 comments on commit 727cc1e

Please sign in to comment.