Adding support for Storm Prediction Center and National Hurrican Center

Unidata · Jul 15, 2019 · 727cc1e · 727cc1e
1 parent 881c77d
commit 727cc1e
Show file tree

Hide file tree

Showing 18 changed files with 2,661,821 additions and 1 deletion.
diff --git a/siphon/simplewebservice/iastate.py b/siphon/simplewebservice/iastate.py
@@ -173,6 +173,6 @@ def _get_data_raw(self, time, site_id, pressure=None):
                 message += 'for station {stid}'.format(stid=site_id)
             if pressure is not None:
                 message += 'for pressure {pres}'.format(pres=pressure)
-            message = message[:-1] + '.'
+            message = message + '.'
             raise ValueError(message)
         return json_data
diff --git a/siphon/simplewebservice/nhc.py b/siphon/simplewebservice/nhc.py
@@ -0,0 +1,160 @@
+"""Reading National Hurricane Center Data.
+
+======================================
+This program is written to pull data from the National Hurricane
+Center and return the data in an easy to use format.
+
+"""
+
+from datetime import datetime
+
+import numpy as np
+import pandas as pd
+import requests
+
+
+class NHCD():
+    """
+    Read data from the National Hurricane Center Database (NHCD).
+
+    This class reads and then makes dataframes to easier access NHC Data.
+
+    """
+
+    def __init__(self):
+        """
+        Create with member attributes and storm info.
+
+        This initiation creates a file table based on a url for all storms in the
+        NHCD and puts them into a pandas dataframe. This dataframe is then turned
+        into a member atribute '.storm_table'.
+
+        """
+        storm_list_columns = ['Name', 'Basin', 'CycloneNum', 'Year', 'StormType', 'Filename']
+        file_table = pd.read_csv('http://ftp.nhc.noaa.gov/atcf/index/storm_list.txt',
+                                 names=storm_list_columns, header=None, index_col=False,
+                                 usecols=[0, 1, 7, 8, 9, 20])
+        file_table.Filename = file_table.Filename.str.lower()
+        self.storm_table = file_table
+
+    def get_tracks(self, year, filename):
+        """
+        Make url and pulls track data for a given storm.
+
+        The Url is made by using both the year and the filename. This function will then
+        read the data and create a data frame for both the forecast and best tracks and
+        compile these data frames into a dictionary. This function returns this dictionary
+        of forecast and best track.
+
+        Parameters
+        ----------
+        self:
+            storing the storm dictionary as member attrubute of NHCD
+        year: int
+            year of the storm incident
+        filename: str
+            unique filename of the storm which is used for indexing purposes and id
+            in the NHCD. The first character is defaulted as space in NHCD so it is clipped
+            when being used.
+
+        Returns
+        -------
+        unique_models: list
+            all the models that have run forecasts for this storm throughout its life
+
+        """
+        today = datetime.today()
+        current_year = today.year
+        data_dictionary = {}
+        # Current year data is stored in a different location
+        if year == str(current_year):
+            unformatted_forecast_url = 'http://ftp.nhc.noaa.gov/atcf/aid_public/a{}.dat.gz'
+            urlf = unformatted_forecast_url.format(filename[1:])
+            unformatted_best_url = 'http://ftp.nhc.noaa.gov/atcf/btk/b{}.dat'
+            urlb = unformatted_best_url.format(filename[1:])
+        else:
+            unformatted_forecast_url = 'http://ftp.nhc.noaa.gov/atcf/archive/{}/a{}.dat.gz'
+            urlf = unformatted_forecast_url.format(year, filename[1:])
+            unformatted_best_url = 'http://ftp.nhc.noaa.gov/atcf/archive/{}/b{}.dat.gz'
+            urlb = unformatted_best_url.format(year, filename[1:])
+
+        url_links = [urlf, urlb]
+        url_count = 0
+        for url in url_links:
+            # Checking if url is valid, if status_code is 200 then website is active
+            if requests.get(url).status_code == 200:
+                # Creating column names
+                storm_data_column_names = ['Basin', 'CycloneNum', 'WarnDT', 'Model',
+                                           'Forecast_hour', 'Lat', 'Lon']
+                # Create a pandas dataframe using specific columns for a storm
+                single_storm = pd.read_csv(url, header=None, names=storm_data_column_names,
+                                           index_col=False, usecols=[0, 1, 2, 4, 5, 6, 7])
+
+                # Must convert lats and lons from string to float and preform division by 10
+                storm_lats = single_storm['Lat']
+                storm_lats = (storm_lats.str.slice(stop=-1))
+                storm_lats = storm_lats.astype(float)
+                storm_lats = storm_lats / 10
+                single_storm['Lat'] = storm_lats
+
+                storm_lons = single_storm['Lon']
+                storm_lons = (storm_lons.str.slice(stop=-1))
+                storm_lons = storm_lons.astype(float)
+                storm_lons = -storm_lons / 10
+                single_storm['Lon'] = storm_lons
+
+                # Change WarnDT to a string
+                single_storm['WarnDT'] = [str(x) for x in single_storm['WarnDT']]
+
+                # Adding this newly created DataFrame to a dictionary
+                if url_count == 0:
+                    data_dictionary['forecast'] = single_storm
+                else:
+                    data_dictionary['best_track'] = single_storm
+
+            else:
+                raise ValueError('url {} was not valid, select different storm.'.format(url))
+
+            url_count += 1
+        # Turn data_dictionary into a member attribute
+        self.storm_dictionary = data_dictionary
+        forecast = data_dictionary.get('forecast')
+        unique_models, unique_index = list(np.unique(forecast['Model'].values,
+                                           return_index=True))
+        return(unique_models)
+
+    def model_selection_latlon(self, models):
+        """
+        Select model type and get lat/lons and track evolution data.
+
+        Parameters
+        ----------
+        self:
+            using storm dictionary attribute and also storing other model_table attribute
+            and date_times attribute
+        models: list
+            unique models that are ran for a storm
+
+        Returns
+        -------
+        self.model_table: list attribute
+            all model forecasts for that specific model type that have been run for a given
+            storm
+
+        """
+        # We will always plot best track, and thus must save the coordinates for plotting
+        best_track = self.storm_dictionary.get('best_track')
+        self.date_times = best_track['WarnDT']
+
+        lats = best_track['Lat']
+        lons = best_track['Lon']
+        self.best_track_coordinates = [lats, lons]
+
+        model_tracks = self.storm_dictionary.get('forecast')
+
+        self.model_table = []
+        for model in models:
+            one_model_table = model_tracks[model_tracks['Model'] == model]
+            self.model_table.append(one_model_table)
+
+        return self.model_table
diff --git a/siphon/simplewebservice/spc.py b/siphon/simplewebservice/spc.py
@@ -0,0 +1,140 @@
+"""Reading Storm Prediction Center Data.
+
+======================================
+This program pulls data from the Storm Prediction
+Center's Archive of data that goes back to the 1950s.
+Weather events that are available are
+hail, wind, and tornados.
+
+"""
+
+import pandas as pd
+
+
+class SpcData:
+    """
+    Pulls data from the SPC center.
+
+    This class gets data on tornados, hail, and severe wind events.
+    This will return a pandas dataframe for each of these storm events.
+
+    """
+
+    def __init__(self, stormtype, datetime):
+        """
+        Create class of SpcData with attributes that select date, and storm type.
+
+        SPC data sifting method is differentiated based on whether the storm is before 2018
+        or not. Storms after 12/31/2017 will be found by using the specific URL for the date
+        selected and finding the csv file on the SPC website. Storms prior to this date are
+        first collected into a large pd dataframe with all SPC events of a selected type
+        from 1955-2017. This data is then successively parced and trim until storms from one
+        date are achieved. This is because the SPC has changed the way they display data so
+        many times that to write specific methods for each time frame would be too time
+        consuming.
+
+        """
+        self.storm_type = stormtype
+        self.date_time = datetime
+        self.year_string = self.date_time[0:4]
+        self.month_string = self.date_time[4:6]
+        self.day_string = self.date_time[6:8]
+        self.storms = self.storm_type_selection()
+
+        if int(self.year_string) <= 2017:
+            one_year_table = self.storms[self.storms['Year'] == int(self.year_string)]
+            month_table = one_year_table[one_year_table['Month'] == int(self.month_string)]
+            self.day_table = month_table[month_table['Day'] == int(self.day_string)]
+
+        elif int(self.year_string) > 2017:
+            self.day_table = self.storms
+
+    def storm_type_selection(self):
+        """
+        Find and create the url for a specific storm type and year.
+
+        Prior to 2017, the ways in which the SPC storm data is inconsistent. In order
+        to deal with this, the Urls used to find the data for a given day changes
+        based on the year chosen by the user.
+
+        Parameters
+        ----------
+        self:
+            The date_time string attribute will be used for year identification
+
+        Returns
+        -------
+        (torn/wind/hail)_reports: pandas DataFrame
+            This dataframe has the data about the specific SPC data type for either one day
+            or a 60+ year period based on what year is chosen.
+
+        """
+        # Place holder string 'mag' will be replaced by event type (tornado, hail or wind)
+        mag = str
+        # Colums are different for events before and after 12/31/2017.
+        after_2017_columns = ['Time', mag, 'Location', 'County', 'State',
+                              'Lat', 'Lon', 'Comment']
+        before_2018_columns = ['Num', 'Year', 'Month', 'Day', 'Time', 'Time Zone',
+                               'State', mag, 'Injuries', 'Fatalities', 'Property Loss',
+                               'Crop Loss', 'Start Lat', 'Start Lon', 'End Lat',
+                               'End Lon', 'Length (mi)', 'Width (yd)', 'Ns', 'SN', 'SG',
+                               'County Code 1', 'County Code 2', 'County Code 3',
+                               'County Code 4']
+
+        # Find specific urls and create dataframe based on time and event type
+        if self.storm_type == 'tornado':
+            if int(self.year_string) <= 2017:
+                before_2018_columns[7] = 'F-Scale'
+                url = 'https://www.spc.noaa.gov/wcm/data/1950-2017_torn.csv'
+                torn_reports = pd.read_csv(url, names=before_2018_columns, header=0,
+                                           index_col=False, usecols=[0, 1, 2, 3, 5, 6, 7,
+                                                                     10, 11, 12, 13, 14, 15,
+                                                                     16, 17, 18, 19, 20, 21,
+                                                                     22, 23, 24, 25, 26, 27])
+            else:
+                _url = 'https://www.spc.noaa.gov/climo/reports/{}{}{}_rpts_filtered_torn.csv'
+                url = _url.format(self.year_string[2: 4], self.month_string, self.day_string)
+                after_2017_columns[1] = 'F-Scale'
+                torn_reports = pd.read_csv(url, names=after_2017_columns,
+                                           header=0, index_col=False,
+                                           usecols=[0, 1, 2, 3, 4, 5, 6, 7])
+            return(torn_reports)
+
+        elif self.storm_type == 'hail':
+            if int(self.year_string) <= 2017:
+                before_2018_columns[7] = 'Size (hundredth in)'
+                url = 'https://www.spc.noaa.gov/wcm/data/1955-2017_hail.csv'
+                hail_reports = pd.read_csv(url, names=before_2018_columns, header=0,
+                                           index_col=False, usecols=[0, 1, 2, 3, 5, 6, 7,
+                                                                     10, 11, 12, 13, 14, 15,
+                                                                     16, 17, 18, 19, 20, 21,
+                                                                     22, 23, 24, 25, 26, 27])
+
+            else:
+                _url = 'https://www.spc.noaa.gov/climo/reports/{}{}{}_rpts_filtered_hail.csv'
+                url = _url.format(self.year_string[2:4], self.month_string, self.day_string)
+                after_2017_columns[1] = 'Size (in)'
+                hail_reports = pd.read_csv(url, names=after_2017_columns,
+                                           header=0, index_col=False,
+                                           usecols=[0, 1, 2, 3, 4, 5, 6, 7])
+            return(hail_reports)
+
+        elif self.storm_type == 'wind':
+            if int(self.year_string) <= 2017:
+                before_2018_columns[7] = 'Speed (kt)'
+                url = 'https://www.spc.noaa.gov/wcm/data/1955-2017_wind.csv'
+                wind_reports = pd.read_csv(url, names=before_2018_columns, header=0,
+                                           index_col=False, usecols=[0, 1, 2, 3, 5, 6, 7,
+                                                                     10, 11, 12, 13, 14, 15,
+                                                                     16, 17, 18, 19, 20, 21,
+                                                                     22, 23, 24, 25, 26, 27])
+            else:
+                _url = 'https://www.spc.noaa.gov/climo/reports/{}{}{}_rpts_filtered_wind.csv'
+                url = _url.format(self.year_string[2:4], self.month_string, self.day_string)
+                after_2017_columns[1] = 'Speed (kt)'
+                wind_reports = pd.read_csv(url, names=after_2017_columns, header=0,
+                                           index_col=False, usecols=[0, 1, 2, 3, 4, 5, 6, 7])
+            return(wind_reports)
+
+        else:
+            raise ValueError('Not a valid event type: enter either tornado, wind or hail.')