diff --git a/.coveragerc b/.coveragerc index fb5ff5e650..2ac706c0b3 100644 --- a/.coveragerc +++ b/.coveragerc @@ -5,11 +5,3 @@ omit = versioneer.py setup.py act/plotting/histogramdisplay.py - act/discovery/get_arm.py - act/discovery/arm.py - act/discovery/airnow.py - act/discovery/asos.py - act/discovery/cropscape.py - act/discovery/neon.py - act/discovery/noaapsl.py - act/discovery/surfrad.py diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 9f8b0b3b90..b06bfb4150 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -278,6 +278,18 @@ the tools documentation for details on this process. - https://flake8.pycqa.org/en/latest/ - https://www.pylint.org/ +Naming Convenction +---------------------------------------- + +Discovery +~~~~~~~~~ +When adding discovery modules or functions please adhere to the following +* Filenames should just include the name of the organization (arm) or portal (airnow) and no other filler words like get or download +* Functions should follow [get/download]_[org/portal]_[data/other description]. If it is getting data but not downloading a file, it should start with get, like get_asos_data. If it downloads a file, it should start with download. The other description can vary depending on what you are retrieving. Please check out the existing functions for ideas. + +Discovery +~~~~~~~~~ +Similarly, for the io modules, the names should not have filler and just be the organization or portal name. The functions should clearly indicate what it is doing like read_arm_netcdf instead of read_netcdf if the function is specific to ARM files. Adding Secrets and Environment Variables ---------------------------------------- diff --git a/act/discovery/__init__.py b/act/discovery/__init__.py index 6bc9066d4c..c0a5fab10d 100644 --- a/act/discovery/__init__.py +++ b/act/discovery/__init__.py @@ -8,15 +8,14 @@ __getattr__, __dir__, __all__ = lazy.attach( __name__, - submodules=['get_armfiles', 'get_cropscape', 'get_airnow', 'get_noaapsl', 'get_neon', 'get_surfrad'], + submodules=['arm', 'cropscape', 'airnow', 'noaapsl', 'neon', 'surfrad'], submod_attrs={ - 'get_arm': ['download_arm_data'], - 'get_armfiles': ['download_data', 'download_arm_data', 'get_arm_doi'], - 'get_asos': ['get_asos'], - 'get_airnow': ['get_airnow_bounded_obs', 'get_airnow_obs', 'get_airnow_forecast'], - 'get_cropscape': ['croptype'], - 'get_noaapsl': ['download_noaa_psl_data'], - 'get_neon': ['get_site_products', 'get_product_avail', 'download_neon_data'], - 'get_surfrad': ['download_surfrad'] + 'arm': ['download_arm_data', 'get_arm_doi'], + 'asos': ['get_asos_data'], + 'airnow': ['get_airnow_bounded_obs', 'get_airnow_obs', 'get_airnow_forecast'], + 'cropscape': ['get_crop_type'], + 'noaapsl': ['download_noaa_psl_data'], + 'neon': ['get_neon_site_products', 'get_neon_product_avail', 'download_neon_data'], + 'surfrad': ['download_surfrad_data'] }, ) diff --git a/act/discovery/get_airnow.py b/act/discovery/get_airnow.py deleted file mode 100644 index b8b8937a8b..0000000000 --- a/act/discovery/get_airnow.py +++ /dev/null @@ -1,261 +0,0 @@ - -""" -Function for getting EPA data from AirNow data portal - -""" - -import pandas as pd -import numpy as np -import xarray as xr -import warnings - - -def get_airnow_forecast(token, date, zipcode=None, latlon=None, distance=25): - """ - This tool will get current or historical AQI values and categories for a - reporting area by either Zip code or Lat/Lon coordinate. - https://docs.airnowapi.org/ - - Parameters - ---------- - token : str - The access token for accesing the AirNowAPI web server - date : str - The date of the data to be acquired. Format is YYYY-MM-DD - zipcode : str - The zipcode of the location for the data request. - If zipcode is not defined then a latlon coordinate must be defined. - latlon : array - The latlon coordinate of the loaction for the data request. - If latlon is not defined then a zipcode must be defined. - distance : int - If no reporting are is associated with the specified zipcode or latlon, - return a forcast from a nearby reporting area with this distance (in miles). - Default is 25 miles - - Returns - ------- - ds : xarray.Dataset - Returns an Xarray dataset object - - Example - ------- - act.discovery.get_AirNow_forecast(token='XXXXXX', zipcode='60440', date='2012-05-31') - - """ - message = 'API will be changing from act.discovery.get_airnow to act.discovery.airnow' - warnings.warn(message, DeprecationWarning, 2) - - # default beginning of the query url - query_url = ('https://airnowapi.org/aq/forecast/') - - # checking is either a zipcode or latlon coordinate is defined - # if neither is defined then error is raised - if (zipcode is None) and (latlon is None): - raise NameError("Zipcode or latlon must be defined") - - if zipcode: - url = (query_url + ('zipcode/?' + 'format=text/csv' + '&zipCode=' - + str(zipcode) + '&date=' + str(date) - + '&distance=' + str(distance) - + '&API_KEY=' + str(token))) - - if latlon: - url = (query_url + ('latLong/?' + 'format=text/csv' - + '&latitude=' + str(latlon[0]) + '&longitude=' - + str(latlon[1]) + '&date=' + str(date) - + '&distance=' + str(distance) - + '&API_KEY=' + str(token))) - - df = pd.read_csv(url) - - # converting to xarray dataset object - ds = df.to_xarray() - - return ds - - -def get_airnow_obs(token, date=None, zipcode=None, latlon=None, distance=25): - """ - This tool will get current or historical observed AQI values and categories for a - reporting area by either Zip code or Lat/Lon coordinate. - https://docs.airnowapi.org/ - - Parameters - ---------- - token : str - The access token for accesing the AirNowAPI web server - date : str - The date of the data to be acquired. Format is YYYY-MM-DD - Default is None which will pull most recent observations - zipcode : str - The zipcode of the location for the data request. - If zipcode is not defined then a latlon coordinate must be defined. - latlon : array - The latlon coordinate of the loaction for the data request. - If latlon is not defined then a zipcode must be defined. - distance : int - If no reporting are is associated with the specified zipcode or latlon, - return a forcast from a nearby reporting area with this distance (in miles). - Default is 25 miles - - Returns - ------- - ds : xarray.Dataset - Returns an xarray dataset object - - Example - ------- - act.discovery.get_AirNow_obs(token='XXXXXX', date='2021-12-01', zipcode='60440') - act.discovery.get_AirNow_obs(token='XXXXXX', latlon=[45,-87]) - - """ - - message = 'API will be changing from act.discovery.get_airnow to act.discovery.airnow' - warnings.warn(message, DeprecationWarning, 2) - - # default beginning of the query url - query_url = ('https://www.airnowapi.org/aq/observation/') - - # checking is either a zipcode or latlon coordinate is defined - # if neither is defined then error is raised - if (zipcode is None) and (latlon is None): - raise NameError("Zipcode or latlon must be defined") - - # setting the observation type to either current or historical based on the date - if date is None: - obs_type = 'current' - if zipcode: - url = (query_url + ('zipCode/' + str(obs_type) + '/?' + 'format=text/csv' - + '&zipCode=' + str(zipcode) + '&distance=' + str(distance) - + '&API_KEY=' + str(token))) - if latlon: - url = (query_url + ('latLong/' + str(obs_type) + '/?' + 'format=text/csv' - + '&latitude=' + str(latlon[0]) - + '&longitude=' + str(latlon[1]) + '&distance=' - + str(distance) + '&API_KEY=' + str(token))) - else: - obs_type = 'historical' - if zipcode: - url = (query_url + ('zipCode/' + str(obs_type) + '/?' + 'format=text/csv' - + '&zipCode=' + str(zipcode) + '&date=' + str(date) - + 'T00-0000&distance=' + str(distance) + '&API_KEY=' + str(token))) - if latlon: - url = (query_url + ('latLong/' + str(obs_type) + '/?' + 'format=text/csv' - + '&latitude=' + str(latlon[0]) - + '&longitude=' + str(latlon[1]) + '&date=' - + str(date) + 'T00-0000&distance=' + str(distance) - + '&API_KEY=' + str(token))) - - df = pd.read_csv(url) - - # converting to xarray - ds = df.to_xarray() - - return ds - - -def get_airnow_bounded_obs(token, start_date, end_date, latlon_bnds, parameters='OZONE,PM25', data_type='B', - mon_type=0): - """ - Get AQI values or data concentrations for a specific date and time range and set of - parameters within a geographic area of intrest - https://docs.airnowapi.org/ - - Parameters - ---------- - token : str - The access token for accesing the AirNowAPI web server - start_date : str - The start date and hour (in UTC) of the data request. - Format is YYYY-MM-DDTHH - end_date : str - The end date and hour (in UTC) of the data request. - Format is YYYY-MM-DDTHH - latlon_bnds : str - Lat/Lon bounding box of the area of intrest. - Format is 'minX,minY,maxX,maxY' - parameters : str - Parameters to return data for. Options are: - Ozone, PM25, PM10, CO, NO2, SO2 - Format is 'PM25,PM10' - mon_type : int - The type of monitor to be returned. Default is 0 - 0-Permanent, 1-Mobile onlt, 2-Permanent & Mobile - data_type : char - The type of data to be returned. - A-AQI, C-Concentrations, B-AQI & Concentrations - - Returns - ------- - ds : xarray.Dataset - Returns an xarray dataset object - - """ - - message = 'API will be changing from act.discovery.get_airnow to act.discovery.airnow' - warnings.warn(message, DeprecationWarning, 2) - - verbose = 1 - inc_raw_con = 1 - - url = ('https://www.airnowapi.org/aq/data/?startDate=' + str(start_date) - + '&endDate=' + str(end_date) + '¶meters=' + str(parameters) - + '&BBOX=' + str(latlon_bnds) + '&dataType=' + str(data_type) - + '&format=text/csv' + '&verbose=' + str(verbose) - + '&monitorType=' + str(mon_type) + '&includerawconcentrations=' - + str(inc_raw_con) + '&API_KEY=' + str(token)) - - # Set Column names - names = ['latitude', 'longitude', 'time', 'parameter', 'concentration', 'unit', - 'raw_concentration', 'AQI', 'category', 'site_name', 'site_agency', 'aqs_id', 'full_aqs_id'] - - # Read data into CSV - df = pd.read_csv(url, names=names) - - # Each line is a different time or site or variable so need to parse out - sites = df['site_name'].unique() - times = df['time'].unique() - variables = list(df['parameter'].unique()) + ['AQI', 'category', 'raw_concentration'] - latitude = [list(df['latitude'].loc[df['site_name'] == s])[0] for s in sites] - longitude = [list(df['longitude'].loc[df['site_name'] == s])[0] for s in sites] - aqs_id = [list(df['aqs_id'].loc[df['site_name'] == s])[0] for s in sites] - - # Set up the dataset ahead of time - ds = xr.Dataset( - data_vars={ - 'latitude': (['sites'], latitude), - 'longitude': (['sites'], longitude), - 'aqs_id': (['sites'], aqs_id) - }, - coords={ - 'time': (['time'], times), - 'sites': (['sites'], sites) - } - ) - - # Set up emtpy data with nans - data = np.empty((len(variables), len(times), len(sites))) - data[:] = np.nan - - # For each variable, pull out the data from specific sites and times - for v in range(len(variables)): - for t in range(len(times)): - for s in range(len(sites)): - if variables[v] in ['AQI', 'category', 'raw_concentration']: - result = df.loc[(df['time'] == times[t]) & (df['site_name'] == sites[s])] - if len(result[variables[v]]) > 0: - data[v, t, s] = list(result[variables[v]])[0] - atts = {'units': ''} - else: - result = df.loc[(df['time'] == times[t]) & (df['site_name'] == sites[s]) & (df['parameter'] == variables[v])] - if len(result['concentration']) > 0: - data[v, t, s] = list(result['concentration'])[0] - atts = {'units': list(result['unit'])[0]} - - # Add variables to the dataset - ds[variables[v]] = xr.DataArray(data=data[v, :, :], dims=['time', 'sites'], attrs=atts) - - times = pd.to_datetime(times) - ds = ds.assign_coords({'time': times}) - return ds diff --git a/act/discovery/get_arm.py b/act/discovery/get_arm.py deleted file mode 100644 index 9942ad8151..0000000000 --- a/act/discovery/get_arm.py +++ /dev/null @@ -1,219 +0,0 @@ -""" -Script for downloading data from ARM's Live Data Webservice - -""" - -import argparse -import json -import os -import sys -from datetime import timedelta -import requests -import textwrap -import warnings - -try: - from urllib.request import urlopen -except ImportError: - from urllib import urlopen - -from act.utils import date_parser - - -def download_arm_data(username, token, datastream, startdate, enddate, time=None, output=None): - """ - This tool will help users utilize the ARM Live Data Webservice to download - ARM data. - - Parameters - ---------- - username : str - The username to use for logging into the ADC archive. - token : str - The access token for accessing the ADC archive. - datastream : str - The name of the datastream to acquire. - startdate : str - The start date of the data to acquire. Formats accepted are - YYYY-MM-DD, DD.MM.YYYY, DD/MM/YYYY, YYYYMMDD, YYYY/MM/DD or - any of the previous formats with THH:MM:SS added onto the end - (ex. 2020-09-15T12:00:00). - enddate : str - The end date of the data to acquire. Formats accepted are - YYYY-MM-DD, DD.MM.YYYY, DD/MM/YYYY, YYYYMMDD or YYYY/MM/DD, or - any of the previous formats with THH:MM:SS added onto the end - (ex. 2020-09-15T13:00:00). - time: str or None - The specific time. Format is HHMMSS. Set to None to download all files - in the given date interval. - output : str - The output directory for the data. Set to None to make a folder in the - current working directory with the same name as *datastream* to place - the files in. - - Returns - ------- - files : list - Returns list of files retrieved - - Notes - ----- - This programmatic interface allows users to query and automate - machine-to-machine downloads of ARM data. This tool uses a REST URL and - specific parameters (saveData, query), user ID and access token, a - datastream name, a start date, and an end date, and data files matching - the criteria will be returned to the user and downloaded. - - By using this web service, users can setup cron jobs and automatically - download data from /data/archive into their workspace. This will also - eliminate the manual step of following a link in an email to download data. - All other data files, which are not on the spinning - disk (on HPSS), will have to go through the regular ordering process. - More information about this REST API and tools can be found on `ARM Live - `_. - - To login/register for an access token click `here - `_. - - Author: Michael Giansiracusa - Email: giansiracumt@ornl.gov - - Examples - -------- - This code will download the netCDF files from the sgpmetE13.b1 datastream - and place them in a directory named sgpmetE13.b1. The data from 14 Jan to - 20 Jan 2017 will be downloaded. Replace *userName* and *XXXXXXXXXXXXXXXX* - with your username and token for ARM Data Discovery. See the Notes for - information on how to obtain a username and token. - - .. code-block:: python - - act.discovery.download_data( - "userName", "XXXXXXXXXXXXXXXX", "sgpmetE13.b1", "2017-01-14", "2017-01-20" - ) - - """ - message = 'API will be changing from act.discovery.get_arm to act.discovery.arm' - warnings.warn(message, DeprecationWarning, 2) - - # default start and end are empty - start, end = '', '' - # start and end strings for query_url are constructed - # if the arguments were provided - if startdate: - start_datetime = date_parser(startdate, return_datetime=True) - start = start_datetime.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z' - start = f'&start={start}' - if enddate: - end_datetime = date_parser(enddate, return_datetime=True) - # If the start and end date are the same, and a day to the end date - if start_datetime == end_datetime: - end_datetime += timedelta(hours=23, minutes=59, seconds=59) - end = end_datetime.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z' - end = f'&end={end}' - # build the url to query the web service using the arguments provided - query_url = ( - 'https://adc.arm.gov/armlive/livedata/query?' + 'user={0}&ds={1}{2}{3}&wt=json' - ).format(':'.join([username, token]), datastream, start, end) - - # get url response, read the body of the message, - # and decode from bytes type to utf-8 string - response_body = urlopen(query_url).read().decode('utf-8') - # if the response is an html doc, then there was an error with the user - if response_body[1:14] == '!DOCTYPE html': - raise ConnectionRefusedError('Error with user. Check username or token.') - - # parse into json object - response_body_json = json.loads(response_body) - - # construct output directory - if output: - # output files to directory specified - output_dir = os.path.join(output) - else: - # if no folder given, add datastream folder - # to current working dir to prevent file mix-up - output_dir = os.path.join(os.getcwd(), datastream) - - # not testing, response is successful and files were returned - if response_body_json is None: - print('ARM Data Live Webservice does not appear to be functioning') - return [] - - num_files = len(response_body_json['files']) - file_names = [] - if response_body_json['status'] == 'success' and num_files > 0: - for fname in response_body_json['files']: - if time is not None: - if time not in fname: - continue - # construct link to web service saveData function - save_data_url = ( - 'https://adc.arm.gov/armlive/livedata/' + 'saveData?user={0}&file={1}' - ).format(':'.join([username, token]), fname) - output_file = os.path.join(output_dir, fname) - # make directory if it doesn't exist - if not os.path.isdir(output_dir): - os.makedirs(output_dir) - # create file and write bytes to file - with open(output_file, 'wb') as open_bytes_file: - data = urlopen(save_data_url).read() - if 'This data file is not available' in str(data): - print(fname + ' is not available for download') - continue - else: - print(f'[DOWNLOADING] {fname}') - open_bytes_file.write(data) - file_names.append(output_file) - # Get ARM DOI and print it out - doi = get_arm_doi(datastream, start_datetime.strftime('%Y-%m-%d'), end_datetime.strftime('%Y-%m-%d')) - print('\nIf you use these data to prepare a publication, please cite:\n') - print(textwrap.fill(doi, width=80)) - print('') - else: - print( - 'No files returned or url status error.\n' 'Check datastream name, start, and end date.' - ) - - return file_names - - -def get_arm_doi(datastream, startdate, enddate): - """ - This function will return a citation with DOI, if available, for specified - datastream and date range - - Parameters - ---------- - datastream : str - The name of the datastream to get a DOI for. This must be ARM standard names - startdate : str - Start date for the citation in the format YY-MM-DD - enddate : str - End date for the citation in the format YY-MM-DD - - Returns - ------- - doi : str - Returns the citation as a string - - """ - - message = 'API will be changing from act.discovery.get_arm to act.discovery.arm' - warnings.warn(message, DeprecationWarning, 2) - - # Get the DOI information - doi_url = 'https://adc.arm.gov/citationservice/citation/datastream?id=' + datastream + '&citationType=apa' - doi_url += '&startDate=' + startdate - doi_url += '&endDate=' + enddate - try: - doi = requests.get(url=doi_url) - except ValueError as err: - return "Webservice potentially down or arguments are not valid: " + err - - if len(doi.text) > 0: - doi = doi.json()['citation'] - else: - doi = 'Please check your arguments. No DOI Found' - - return doi diff --git a/act/discovery/get_armfiles.py b/act/discovery/get_armfiles.py deleted file mode 100644 index a76b661d85..0000000000 --- a/act/discovery/get_armfiles.py +++ /dev/null @@ -1,219 +0,0 @@ -""" -Script for downloading data from ARM's Live Data Webservice - -""" - -import argparse -import json -import os -import sys -from datetime import timedelta -import requests -import textwrap -import warnings - -try: - from urllib.request import urlopen -except ImportError: - from urllib import urlopen - -from act.utils import date_parser - - -def download_data(username, token, datastream, startdate, enddate, time=None, output=None): - """ - This tool will help users utilize the ARM Live Data Webservice to download - ARM data. - - Parameters - ---------- - username : str - The username to use for logging into the ADC archive. - token : str - The access token for accessing the ADC archive. - datastream : str - The name of the datastream to acquire. - startdate : str - The start date of the data to acquire. Formats accepted are - YYYY-MM-DD, DD.MM.YYYY, DD/MM/YYYY, YYYYMMDD, YYYY/MM/DD or - any of the previous formats with THH:MM:SS added onto the end - (ex. 2020-09-15T12:00:00). - enddate : str - The end date of the data to acquire. Formats accepted are - YYYY-MM-DD, DD.MM.YYYY, DD/MM/YYYY, YYYYMMDD or YYYY/MM/DD, or - any of the previous formats with THH:MM:SS added onto the end - (ex. 2020-09-15T13:00:00). - time: str or None - The specific time. Format is HHMMSS. Set to None to download all files - in the given date interval. - output : str - The output directory for the data. Set to None to make a folder in the - current working directory with the same name as *datastream* to place - the files in. - - Returns - ------- - files : list - Returns list of files retrieved - - Notes - ----- - This programmatic interface allows users to query and automate - machine-to-machine downloads of ARM data. This tool uses a REST URL and - specific parameters (saveData, query), user ID and access token, a - datastream name, a start date, and an end date, and data files matching - the criteria will be returned to the user and downloaded. - - By using this web service, users can setup cron jobs and automatically - download data from /data/archive into their workspace. This will also - eliminate the manual step of following a link in an email to download data. - All other data files, which are not on the spinning - disk (on HPSS), will have to go through the regular ordering process. - More information about this REST API and tools can be found on `ARM Live - `_. - - To login/register for an access token click `here - `_. - - Author: Michael Giansiracusa - Email: giansiracumt@ornl.gov - - Examples - -------- - This code will download the netCDF files from the sgpmetE13.b1 datastream - and place them in a directory named sgpmetE13.b1. The data from 14 Jan to - 20 Jan 2017 will be downloaded. Replace *userName* and *XXXXXXXXXXXXXXXX* - with your username and token for ARM Data Discovery. See the Notes for - information on how to obtain a username and token. - - .. code-block:: python - - act.discovery.download_data( - "userName", "XXXXXXXXXXXXXXXX", "sgpmetE13.b1", "2017-01-14", "2017-01-20" - ) - - """ - message = 'act.discovery.get_armfiles.download_data will be retired in version 2.0.0. Please use act.discovery.arm.download_arm_data instead.' - warnings.warn(message, DeprecationWarning, 2) - - # default start and end are empty - start, end = '', '' - # start and end strings for query_url are constructed - # if the arguments were provided - if startdate: - start_datetime = date_parser(startdate, return_datetime=True) - start = start_datetime.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z' - start = f'&start={start}' - if enddate: - end_datetime = date_parser(enddate, return_datetime=True) - # If the start and end date are the same, and a day to the end date - if start_datetime == end_datetime: - end_datetime += timedelta(hours=23, minutes=59, seconds=59) - end = end_datetime.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z' - end = f'&end={end}' - # build the url to query the web service using the arguments provided - query_url = ( - 'https://adc.arm.gov/armlive/livedata/query?' + 'user={0}&ds={1}{2}{3}&wt=json' - ).format(':'.join([username, token]), datastream, start, end) - - # get url response, read the body of the message, - # and decode from bytes type to utf-8 string - response_body = urlopen(query_url).read().decode('utf-8') - # if the response is an html doc, then there was an error with the user - if response_body[1:14] == '!DOCTYPE html': - raise ConnectionRefusedError('Error with user. Check username or token.') - - # parse into json object - response_body_json = json.loads(response_body) - - # construct output directory - if output: - # output files to directory specified - output_dir = os.path.join(output) - else: - # if no folder given, add datastream folder - # to current working dir to prevent file mix-up - output_dir = os.path.join(os.getcwd(), datastream) - - # not testing, response is successful and files were returned - if response_body_json is None: - print('ARM Data Live Webservice does not appear to be functioning') - return [] - - num_files = len(response_body_json['files']) - file_names = [] - if response_body_json['status'] == 'success' and num_files > 0: - for fname in response_body_json['files']: - if time is not None: - if time not in fname: - continue - # construct link to web service saveData function - save_data_url = ( - 'https://adc.arm.gov/armlive/livedata/' + 'saveData?user={0}&file={1}' - ).format(':'.join([username, token]), fname) - output_file = os.path.join(output_dir, fname) - # make directory if it doesn't exist - if not os.path.isdir(output_dir): - os.makedirs(output_dir) - # create file and write bytes to file - with open(output_file, 'wb') as open_bytes_file: - data = urlopen(save_data_url).read() - if 'This data file is not available' in str(data): - print(fname + ' is not available for download') - continue - else: - print(f'[DOWNLOADING] {fname}') - open_bytes_file.write(data) - file_names.append(output_file) - # Get ARM DOI and print it out - doi = get_arm_doi(datastream, start_datetime.strftime('%Y-%m-%d'), end_datetime.strftime('%Y-%m-%d')) - print('\nIf you use these data to prepare a publication, please cite:\n') - print(textwrap.fill(doi, width=80)) - print('') - else: - print( - 'No files returned or url status error.\n' 'Check datastream name, start, and end date.' - ) - - return file_names - - -def get_arm_doi(datastream, startdate, enddate): - """ - This function will return a citation with DOI, if available, for specified - datastream and date range - - Parameters - ---------- - datastream : str - The name of the datastream to get a DOI for. This must be ARM standard names - startdate : str - Start date for the citation in the format YY-MM-DD - enddate : str - End date for the citation in the format YY-MM-DD - - Returns - ------- - doi : str - Returns the citation as a string - - """ - - message = 'API will be changing from act.discovery.get_armfiles to act.discovery.arm' - warnings.warn(message, DeprecationWarning, 2) - - # Get the DOI information - doi_url = 'https://adc.arm.gov/citationservice/citation/datastream?id=' + datastream + '&citationType=apa' - doi_url += '&startDate=' + startdate - doi_url += '&endDate=' + enddate - try: - doi = requests.get(url=doi_url) - except ValueError as err: - return "Webservice potentially down or arguments are not valid: " + err - - if len(doi.text) > 0: - doi = doi.json()['citation'] - else: - doi = 'Please check your arguments. No DOI Found' - - return doi diff --git a/act/discovery/get_asos.py b/act/discovery/get_asos.py deleted file mode 100644 index 4cad5def04..0000000000 --- a/act/discovery/get_asos.py +++ /dev/null @@ -1,288 +0,0 @@ -""" -Script for downloading ASOS data from the Iowa Mesonet API - -""" - -import json -import time -import warnings -from datetime import datetime - -import numpy as np -import pandas as pd -import xarray as xr -from six import StringIO - -try: - from urllib.request import urlopen -except ImportError: - from urllib2 import urlopen - - -def get_asos(time_window, lat_range=None, lon_range=None, station=None): - """ - Returns all of the station observations from the Iowa Mesonet from either - a given latitude and longitude window or a given station code. - - Parameters - ---------- - time_window: tuple - A 2 member list or tuple containing the start and end times. The - times must be python datetimes. - lat_range: tuple - The latitude window to grab all of the ASOS observations from. - lon_range: tuple - The longitude window to grab all of the ASOS observations from. - station: str - The station ID to grab the ASOS observations from. - - Returns - ------- - asos_ds: dict of xarray.Datasets - A dictionary of ACT datasets whose keys are the ASOS station IDs. - - Examples - -------- - If you want to obtain timeseries of ASOS observations for Chicago O'Hare - Airport, simply do:: - - $ time_window = [datetime(2020, 2, 4, 2, 0), datetime(2020, 2, 10, 10, 0)] - $ station = "KORD" - $ my_asoses = act.discovery.get_asos(time_window, station="ORD") - """ - - message = 'API will be changing from act.discovery.get_asos.get_asos to act.discovery.asos.get_asos_data' - warnings.warn(message, DeprecationWarning, 2) - - # First query the database for all of the JSON info for every station - # Only add stations whose lat/lon are within the Grid's boundaries - regions = """AF AL_ AI_ AQ_ AG_ AR_ AK AL AM_ - AO_ AS_ AR AW_ AU_ AT_ - AZ_ BA_ BE_ BB_ BG_ BO_ BR_ BF_ - BT_ BS_ BI_ BM_ BB_ BY_ BZ_ BJ_ BW_ AZ CA CA_AB - CA_BC CD_ CK_ CF_ CG_ CL_ CM_ CO CO_ CN_ CR_ CT - CU_ CV_ CY_ CZ_ DE DK_ DJ_ DM_ DO_ - DZ EE_ ET_ FK_ FM_ FJ_ FI_ FR_ GF_ PF_ - GA_ GM_ GE_ DE_ GH_ GI_ KY_ GB_ GR_ GL_ GD_ - GU_ GT_ GN_ GW_ GY_ HT_ HN_ HK_ HU_ IS_ IN_ - ID_ IR_ IQ_ IE_ IL_ IT_ CI_ JM_ JP_ - JO_ KZ_ KE_ KI_ KW_ LA_ LV_ LB_ LS_ LR_ LY_ - LT_ LU_ MK_ MG_ MW_ MY_ MV_ ML_ CA_MB - MH_ MR_ MU_ YT_ MX_ MD_ MC_ MA_ MZ_ MM_ NA_ NP_ - AN_ NL_ CA_NB NC_ CA_NF NF_ NI_ - NE_ NG_ MP_ KP_ CA_NT NO_ CA_NS CA_NU OM_ - CA_ON PK_ PA_ PG_ PY_ PE_ PH_ PN_ PL_ - PT_ CA_PE PR_ QA_ CA_QC RO_ RU_RW_ SH_ KN_ - LC_ VC_ WS_ ST_ CA_SK SA_ SN_ RS_ SC_ - SL_ SG_ SK_ SI_ SB_ SO_ ZA_ KR_ ES_ LK_ SD_ SR_ - SZ_ SE_ CH_ SY_ TW_ TJ_ TZ_ TH_ - TG_ TO_ TT_ TU TN_ TR_ TM_ UG_ UA_ AE_ UN_ UY_ - UZ_ VU_ VE_ VN_ VI_ YE_ CA_YT ZM_ ZW_ - EC_ EG_ FL GA GQ_ HI HR_ IA ID IL IO_ IN KS - KH_ KY KM_ LA MA MD ME - MI MN MO MS MT NC ND NE NH NJ NM NV NY OH OK - OR PA RI SC SV_ SD TD_ TN TX UT VA VT VG_ - WA WI WV WY""" - - networks = ['AWOS'] - metadata_list = {} - if lat_range is not None and lon_range is not None: - lon_min, lon_max = lon_range - lat_min, lat_max = lat_range - for region in regions.split(): - networks.append(f'{region}_ASOS') - - site_list = [] - for network in networks: - # Get metadata - uri = ('https://mesonet.agron.iastate.edu/' 'geojson/network/%s.geojson') % (network,) - data = urlopen(uri) - jdict = json.load(data) - for site in jdict['features']: - lat = site['geometry']['coordinates'][1] - lon = site['geometry']['coordinates'][0] - if lat >= lat_min and lat <= lat_max: - if lon >= lon_min and lon <= lon_max: - station_metadata_dict = {} - station_metadata_dict['site_latitude'] = lat - station_metadata_dict['site_longitude'] = lat - for my_keys in site['properties']: - station_metadata_dict[my_keys] = site['properties'][my_keys] - metadata_list[site['properties']['sid']] = station_metadata_dict - site_list.append(site['properties']['sid']) - elif station is not None: - site_list = [station] - for region in regions.split(): - networks.append(f'{region}_ASOS') - for network in networks: - # Get metadata - uri = ('https://mesonet.agron.iastate.edu/' 'geojson/network/%s.geojson') % (network,) - data = urlopen(uri) - jdict = json.load(data) - for site in jdict['features']: - lat = site['geometry']['coordinates'][1] - lon = site['geometry']['coordinates'][0] - if site['properties']['sid'] == station: - station_metadata_dict = {} - station_metadata_dict['site_latitude'] = lat - station_metadata_dict['site_longitude'] = lon - for my_keys in site['properties']: - if my_keys == 'elevation': - station_metadata_dict['elevation'] = ( - '%f meter' % site['properties'][my_keys] - ) - else: - station_metadata_dict[my_keys] = site['properties'][my_keys] - metadata_list[station] = station_metadata_dict - - # Get station metadata - else: - raise ValueError('Either both lat_range and lon_range or station must ' + 'be specified!') - - # Get the timestamp for each request - start_time = time_window[0] - end_time = time_window[1] - - SERVICE = 'http://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?' - service = SERVICE + 'data=all&tz=Etc/UTC&format=comma&latlon=yes&' - - service += start_time.strftime('year1=%Y&month1=%m&day1=%d&hour1=%H&minute1=%M&') - service += end_time.strftime('year2=%Y&month2=%m&day2=%d&hour2=%H&minute2=%M') - asos_ds = {} - for stations in site_list: - uri = f'{service}&station={stations}' - print(f'Downloading: {stations}') - data = _download_data(uri) - buf = StringIO() - buf.write(data) - buf.seek(0) - - my_df = pd.read_csv(buf, skiprows=5, na_values='M') - - if len(my_df['lat'].values) == 0: - warnings.warn( - 'No data available at station %s between time %s and %s' - % ( - stations, - start_time.strftime('%Y-%m-%d %H:%M:%S'), - end_time.strftime('%Y-%m-%d %H:%M:%S'), - ) - ) - else: - - def to_datetime(x): - return datetime.strptime(x, '%Y-%m-%d %H:%M') - - my_df['time'] = my_df['valid'].apply(to_datetime) - my_df = my_df.set_index('time') - my_df = my_df.drop('valid', axis=1) - my_df = my_df.drop('station', axis=1) - my_df = my_df.to_xarray() - - my_df.attrs = metadata_list[stations] - my_df['lon'].attrs['units'] = 'degree' - my_df['lon'].attrs['long_name'] = 'Longitude' - my_df['lat'].attrs['units'] = 'degree' - my_df['lat'].attrs['long_name'] = 'Latitude' - - my_df['tmpf'].attrs['units'] = 'degrees Fahrenheit' - my_df['tmpf'].attrs['long_name'] = 'Temperature in degrees Fahrenheit' - - # Fahrenheit to Celsius - my_df['temp'] = (5.0 / 9.0 * my_df['tmpf']) - 32.0 - my_df['temp'].attrs['units'] = 'degrees Celsius' - my_df['temp'].attrs['long_name'] = 'Temperature in degrees Celsius' - my_df['dwpf'].attrs['units'] = 'degrees Fahrenheit' - my_df['dwpf'].attrs['long_name'] = 'Dewpoint temperature in degrees Fahrenheit' - - # Fahrenheit to Celsius - my_df['dwpc'] = (5.0 / 9.0 * my_df['tmpf']) - 32.0 - my_df['dwpc'].attrs['units'] = 'degrees Celsius' - my_df['dwpc'].attrs['long_name'] = 'Dewpoint temperature in degrees Celsius' - my_df['relh'].attrs['units'] = 'percent' - my_df['relh'].attrs['long_name'] = 'Relative humidity' - my_df['drct'].attrs['units'] = 'degrees' - my_df['drct'].attrs['long_name'] = 'Wind speed in degrees' - my_df['sknt'].attrs['units'] = 'knots' - my_df['sknt'].attrs['long_name'] = 'Wind speed in knots' - my_df['spdms'] = my_df['sknt'] * 0.514444 - my_df['spdms'].attrs['units'] = 'm s-1' - my_df['spdms'].attrs['long_name'] = 'Wind speed in meters per second' - my_df['u'] = -np.sin(np.deg2rad(my_df['drct'])) * my_df['spdms'] - my_df['u'].attrs['units'] = 'm s-1' - my_df['u'].attrs['long_name'] = 'Zonal component of surface wind' - my_df['v'] = -np.cos(np.deg2rad(my_df['drct'])) * my_df['spdms'] - my_df['v'].attrs['units'] = 'm s-1' - my_df['v'].attrs['long_name'] = 'Meridional component of surface wind' - my_df['mslp'].attrs['units'] = 'mb' - my_df['mslp'].attrs['long_name'] = 'Mean Sea Level Pressure' - my_df['alti'].attrs['units'] = 'in Hg' - my_df['alti'].attrs['long_name'] = 'Atmospheric pressure in inches of Mercury' - my_df['vsby'].attrs['units'] = 'mi' - my_df['vsby'].attrs['long_name'] = 'Visibility' - my_df['vsbykm'] = my_df['vsby'] * 1.60934 - my_df['vsbykm'].attrs['units'] = 'km' - my_df['vsbykm'].attrs['long_name'] = 'Visibility' - my_df['gust'] = my_df['gust'] * 0.514444 - my_df['gust'].attrs['units'] = 'm s-1' - my_df['gust'].attrs['long_name'] = 'Wind gust speed' - my_df['skyc1'].attrs['long_name'] = 'Sky level 1 coverage' - my_df['skyc2'].attrs['long_name'] = 'Sky level 2 coverage' - my_df['skyc3'].attrs['long_name'] = 'Sky level 3 coverage' - my_df['skyc4'].attrs['long_name'] = 'Sky level 4 coverage' - my_df['skyl1'] = my_df['skyl1'] * 0.3048 - my_df['skyl2'] = my_df['skyl2'] * 0.3048 - my_df['skyl3'] = my_df['skyl3'] * 0.3048 - my_df['skyl4'] = my_df['skyl4'] * 0.3048 - my_df['skyl1'].attrs['long_name'] = 'Sky level 1 altitude' - my_df['skyl2'].attrs['long_name'] = 'Sky level 2 altitude' - my_df['skyl3'].attrs['long_name'] = 'Sky level 3 altitude' - my_df['skyl4'].attrs['long_name'] = 'Sky level 4 altitude' - my_df['skyl1'].attrs['long_name'] = 'meter' - my_df['skyl2'].attrs['long_name'] = 'meter' - my_df['skyl3'].attrs['long_name'] = 'meter' - my_df['skyl4'].attrs['long_name'] = 'meter' - - my_df['wxcodes'].attrs['long_name'] = 'Weather code' - my_df['ice_accretion_1hr'] = my_df['ice_accretion_1hr'] * 2.54 - my_df['ice_accretion_1hr'].attrs['units'] = 'cm' - my_df['ice_accretion_1hr'].attrs['long_name'] = '1 hour ice accretion' - my_df['ice_accretion_3hr'] = my_df['ice_accretion_3hr'] * 2.54 - my_df['ice_accretion_3hr'].attrs['units'] = 'cm' - my_df['ice_accretion_3hr'].attrs['long_name'] = '3 hour ice accretion' - my_df['ice_accretion_6hr'] = my_df['ice_accretion_3hr'] * 2.54 - my_df['ice_accretion_6hr'].attrs['units'] = 'cm' - my_df['ice_accretion_6hr'].attrs['long_name'] = '6 hour ice accretion' - my_df['peak_wind_gust'] = my_df['peak_wind_gust'] * 0.514444 - my_df['peak_wind_gust'].attrs['units'] = 'm s-1' - my_df['peak_wind_gust'].attrs['long_name'] = 'Peak wind gust speed' - my_df['peak_wind_drct'].attrs['drct'] = 'degree' - my_df['peak_wind_drct'].attrs['long_name'] = 'Peak wind gust direction' - my_df['u_peak'] = -np.sin(np.deg2rad(my_df['peak_wind_drct'])) * my_df['peak_wind_gust'] - my_df['u_peak'].attrs['units'] = 'm s-1' - my_df['u_peak'].attrs['long_name'] = 'Zonal component of surface wind' - my_df['v_peak'] = -np.cos(np.deg2rad(my_df['peak_wind_drct'])) * my_df['peak_wind_gust'] - my_df['v_peak'].attrs['units'] = 'm s-1' - my_df['v_peak'].attrs['long_name'] = 'Meridional component of surface wind' - my_df['metar'].attrs['long_name'] = 'Raw METAR code' - my_df.attrs['_datastream'] = stations - buf.close() - - asos_ds[stations] = my_df - return asos_ds - - -def _download_data(uri): - attempt = 0 - while attempt < 6: - try: - data = urlopen(uri, timeout=300).read().decode('utf-8') - if data is not None and not data.startswith('ERROR'): - return data - except Exception as exp: - print(f'download_data({uri}) failed with {exp}') - time.sleep(5) - attempt += 1 - - print('Exhausted attempts to download, returning empty data') - return '' diff --git a/act/discovery/get_cropscape.py b/act/discovery/get_cropscape.py deleted file mode 100644 index 8c7e6ce6e1..0000000000 --- a/act/discovery/get_cropscape.py +++ /dev/null @@ -1,112 +0,0 @@ -""" -Function for getting CropScape data based on an entered lat/lon. - -""" - -import datetime -import requests -import warnings - -try: - from pyproj import Transformer -except ImportError: - from pyproj.transformer import Transformer - - -def croptype(lat=None, lon=None, year=None): - """ - Function for working with the CropScape API to get a crop type based on - the lat,lon, and year entered. The lat/lon is converted to the projection - used by CropScape before pased to the API. CropScape - Copyright © Center - For Spatial Information Science and Systems 2009 - 2018 - - Parameters - ---------- - lat : float - Latitude of point to retrieve. - lon : float - Longitude of point to retrieve. - year : int - Year to get croptype for. - - Returns - ------- - category : string - String of the crop type at that specific lat/lon for the given year. - - References - ---------- - USDA National Agricultural Statistics Service Cropland Data Layer. {YEAR}. - Published crop-specific data layer [Online]. Available at https://nassgeodata.gmu.edu/CropScape/ - (accessed {DATE}; verified {DATE}). USDA-NASS, Washington, DC. - - Examples - -------- - To get the crop type, simply do: - - .. code-block :: python - - type = act.discovery.get_cropscape.croptype(36.8172,-97.1709,'2018') - - """ - - message = 'API will be changing from act.discovery.get_cropscape.get_croptype to act.discovery.croptype.get_crop_type' - warnings.warn(message, DeprecationWarning, 2) - - # Return if lat/lon are not passed in - if lat is None or lon is None: - raise RuntimeError('Lat and Lon need to be provided') - - # Set the CropScape Projection - outproj = ( - 'PROJCS["NAD_1983_Albers",' - 'GEOGCS["NAD83",' - 'DATUM["North_American_Datum_1983",' - 'SPHEROID["GRS 1980",6378137,298.257222101,' - 'AUTHORITY["EPSG","7019"]],' - 'TOWGS84[0,0,0,0,0,0,0],' - 'AUTHORITY["EPSG","6269"]],' - 'PRIMEM["Greenwich",0,' - 'AUTHORITY["EPSG","8901"]],' - 'UNIT["degree",0.0174532925199433,' - 'AUTHORITY["EPSG","9108"]],' - 'AUTHORITY["EPSG","4269"]],' - 'PROJECTION["Albers_Conic_Equal_Area"],' - 'PARAMETER["standard_parallel_1",29.5],' - 'PARAMETER["standard_parallel_2",45.5],' - 'PARAMETER["latitude_of_center",23],' - 'PARAMETER["longitude_of_center",-96],' - 'PARAMETER["false_easting",0],' - 'PARAMETER["false_northing",0],' - 'UNIT["meters",1]]' - ) - - # Set the input projection to be lat/lon - inproj = 'EPSG:4326' - - # Get the x/y coordinates for CropScape - transformer = Transformer.from_crs(inproj, outproj) - x, y = transformer.transform(lat, lon) - - # Build URL - url = 'https://nassgeodata.gmu.edu/axis2/services/CDLService/GetCDLValue?' - if year is None: - now = datetime.datetime.now() - year = now.year - 1 - - # Add year, lat, and lon as parameters - params = {'year': str(year), 'x': str(x), 'y': str(y)} - - try: - req = requests.get(url, params=params, timeout=1) - except Exception: - return - - # Return from the webservice is not convertable to json - # So we need to do some text mining - text = req.text - text = text.split(',') - category = [t for t in text if 'category' in t] - category = category[0].split(': ')[-1][1:-1] - - return category diff --git a/act/discovery/get_neon.py b/act/discovery/get_neon.py deleted file mode 100644 index 91dfb838e4..0000000000 --- a/act/discovery/get_neon.py +++ /dev/null @@ -1,173 +0,0 @@ -""" -Function for downloading data from NSF NEON program -using their API. - -NEON sites can be found through the NEON website -https://www.neonscience.org/field-sites/explore-field-sites - -""" - -import json -import requests -import os -import shutil -import pandas as pd -import warnings - - -def get_site_products(site_code, print_to_screen=False): - """ - Returns a list of data products available for a NEON site - NEON sites can be found through the NEON website - https://www.neonscience.org/field-sites/explore-field-sites - - Parameters - ---------- - site : str - NEON site identifier. Required variable - print_to_screen : boolean - If set to True will print to screen - - Returns - ------- - products : list - Returns 2D list of data product code and title - - """ - - message = 'API will be changing from act.discovery.get_neon.get_site_products to act.discovery.neon.get_neon_site_products' - warnings.warn(message, DeprecationWarning, 2) - - # Every request begins with the server's URL - server = 'http://data.neonscience.org/api/v0/' - - # Make request, using the sites/ endpoint - site_request = requests.get(server + 'sites/' + site_code) - - # Convert to Python JSON object - site_json = site_request.json() - - products = {} - # View product code and name for every available data product - for product in site_json['data']['dataProducts']: - if print_to_screen: - print(product['dataProductCode'], product['dataProductTitle']) - products[product['dataProductCode']] = product['dataProductTitle'] - - return products - - -def get_product_avail(site_code, product_code, print_to_screen=False): - """ - Returns a list of data products available for a NEON site - NEON sites can be found through the NEON website - https://www.neonscience.org/field-sites/explore-field-sites - - Parameters - ---------- - site : str - NEON site identifier. Required variable - product_code : str - NEON product code. Required variable - print_to_screen : boolean - If set to True will print to screen - - Returns - ------- - dates : list - Returns list of available months of data - - """ - - message = 'API will be changing from act.discovery.get_neon.get_product_avail to act.discovery.neon.get_neon_product_avail' - warnings.warn(message, DeprecationWarning, 2) - - # Every request begins with the server's URL - server = 'http://data.neonscience.org/api/v0/' - - # Make request, using the sites/ endpoint - site_request = requests.get(server + 'sites/' + site_code) - - # Convert to Python JSON object - site_json = site_request.json() - - # View product code and name for every available data product - for product in site_json['data']['dataProducts']: - if product['dataProductCode'] != product_code: - continue - if print_to_screen: - print(product['availableMonths']) - dates = product['availableMonths'] - - return dates - - -def download_neon_data(site_code, product_code, start_date, end_date=None, output_dir=None): - """ - Returns a list of data products available for a NEON site. Please be sure to view the - readme files that are downloaded as well as there may be a number of different products. - - If you want more information on the NEON file formats, please see: - https://www.neonscience.org/data-samples/data-management/data-formats-conventions - - NEON sites can be found through the NEON website - https://www.neonscience.org/field-sites/explore-field-sites - - Please be sure to acknowledge and cite the NEON program and data products appropriately: - https://www.neonscience.org/data-samples/data-policies-citation - - Parameters - ---------- - site : str - NEON site identifier. Required variable - product_code : str - NEON product code. Required variable - start_date : str - Start date of the range to download in YYYY-MM format - end_date : str - End date of the range to download in YYYY-MM format. - If None, will just download data for start_date - output_dir : str - Local directory to store the data. If None, will default to - [current working directory]/[site_code]_[product_code] - - Returns - ------- - files : list - Returns a list of files that were downloaded - - """ - - message = 'API will be changing from act.discovery.get_neon to act.discovery.neon' - warnings.warn(message, DeprecationWarning, 2) - - # Every request begins with the server's URL - server = 'http://data.neonscience.org/api/v0/' - - # Get dates to pass in - if end_date is not None: - date_range = pd.date_range(start_date, end_date, freq='MS').strftime('%Y-%m').tolist() - else: - date_range = [start_date] - - # For each month, download data for specified site/product - files = [] - for date in date_range: - # Make Request - data_request = requests.get(server + 'data/' + product_code + '/' + site_code + '/' + date) - data_json = data_request.json() - - if output_dir is None: - output_dir = os.path.join(os.getcwd(), site_code + '_' + product_code) - if not os.path.exists(output_dir): - os.makedirs(output_dir) - - for file in data_json['data']['files']: - print('[DOWNLOADING] ', file['name']) - output_filename = os.path.join(output_dir, file['name']) - with requests.get(file['url'], stream=True) as r: - with open(output_filename, 'wb') as f: - shutil.copyfileobj(r.raw, f) - files.append(output_filename) - - return files diff --git a/act/discovery/get_noaapsl.py b/act/discovery/get_noaapsl.py deleted file mode 100644 index a76280e235..0000000000 --- a/act/discovery/get_noaapsl.py +++ /dev/null @@ -1,173 +0,0 @@ -""" -Function for downloading data from NOAA PSL Profiler Network - -""" -import json -from datetime import datetime -import pandas as pd -import numpy as np -import os -import warnings - -try: - from urllib.request import urlopen -except ImportError: - from urllib import urlopen - - -def download_noaa_psl_data(site=None, instrument=None, startdate=None, enddate=None, - hour=None, output=None): - """ - Function to download data from the NOAA PSL Profiler Network Data Library - https://psl.noaa.gov/data/obs/datadisplay/ - - Parameters - ---------- - site : str - 3 letter NOAA site identifier. Required variable - instrument : str - Name of the dataset to download. Options currently include (name prior to -). Required variable - 'Parsivel' - Parsivel disdrometer data - 'Pressure', 'Datalogger', 'Net Radiation', 'Temp/RH', 'Solar Radiation' - Surface meteorology/radiation data - 'Tipping Bucket', 'TBRG', 'Wind Speed', 'Wind Direction' - Surface meteorology/radiation data - 'Wind Speed and Direction' - Surface meteorology/radiation data - 'GpsTrimble' - GPS Trimble water vapor data - 'Radar S-band Moment' - 3 GHz Precipitation Profiler moment data - 'Radar S-band Bright Band' - 3 GHz Precipitation Profiler bright band data - '449RWP Bright Band' - 449 MHz Wind Profiler bright band data - '449RWP Wind' - 449 MHz Wind Profiler wind data - '449RWP Sub-Hour Wind' - 449 MHz Wind Profiler sub-hourly wind data - '449RWP Sub-Hour Temp' - 449 MHz Wind Profiler sub-hourly temperature data - '915RWP Wind' - 915 MHz Wind Profiler wind data - '915RWP Temp' - 915 MHz Wind Profiler temperature data - '915RWP Sub-Hour Wind' - 915 MHz Wind Profiler sub-hourly wind data - '915WP Sub-Hour Temp' - 915 MHz Wind Profiler sub-hourly temperature data - 'Radar FMCW Moment' - FMCW Radar moments data - 'Radar FMCW Bright Band' - FMCW Radar bright band data - startdate : str - The start date of the data to acquire. Format is YYYYMMDD. Required variable - enddate : str - The end date of the data to acquire. Format is YYYYMMDD - hour : str - Two digit hour of file to dowload if wanting a specific time - output : str - The output directory for the data. Set to None to make a folder in the - current working directory with the same name as *datastream* to place - the files in. - - Returns - ------- - files : list - Returns list of files retrieved - - """ - - message = 'API will be changing from act.discovery.get_noaapsl to act.discovery.noaapsl' - warnings.warn(message, DeprecationWarning, 2) - - if (site is None) or (instrument is None) or (startdate is None): - raise ValueError('site, instrument, and startdate need to be set') - - datastream = site + '_' + instrument.replace(' ', '_') - # Convert dates to day of year (doy) for NOAA folder structure - s_doy = datetime.strptime(startdate, '%Y%m%d').timetuple().tm_yday - year = datetime.strptime(startdate, '%Y%m%d').year - if enddate is None: - enddate = startdate - e_doy = datetime.strptime(enddate, '%Y%m%d').timetuple().tm_yday - - # Set base URL - url = 'https://downloads.psl.noaa.gov/psd2/data/realtime/' - - # Set list of strings that all point to the surface meteorology dataset - met_ds = ['Pressure', 'Datalogger', 'Net Radiation', 'Temp/RH', - 'Solar Radiation', 'Tipping Bucket', 'TBRG', 'Wind Speed', - 'Wind Direction', 'Wind Speed and Direction'] - - # Add to the url depending on which instrument is requested - if 'Parsivel' in instrument: - url += 'DisdrometerParsivel/Stats/' - elif any([d in instrument for d in met_ds]): - url += 'CsiDatalogger/SurfaceMet/' - elif 'GpsTrimble' in instrument: - url += 'GpsTrimble/WaterVapor/' - elif 'Radar S-band Moment' in instrument: - url += 'Radar3000/PopMoments/' - elif 'Radar S-band Bright Band' in instrument: - url += 'Radar3000/BrightBand/' - elif '449RWP Bright Band' in instrument: - url += 'Radar449/BrightBand/' - elif '449RWP Wind' in instrument: - url += 'Radar449/WwWind/' - elif '449RWP Sub-Hour Wind' in instrument: - url += 'Radar449/WwWindSubHourly/' - elif '449RWP Sub-Hour Temp' in instrument: - url += 'Radar449/WwTempSubHourly/' - elif '915RWP Wind' in instrument: - url += 'Radar915/WwWind/' - elif '915RWP Temp' in instrument: - url += 'Radar915/WwTemp/' - elif '915RWP Sub-Hour Wind' in instrument: - url += 'Radar915/WwWindSubHourly/' - elif '915RWP Sub-Hour Temp' in instrument: - url += 'Radar915/WwTempSubHourly/' - elif 'Radar FMCW Moment' in instrument: - url += 'RadarFMCW/PopMoments/' - elif 'Radar FMCW Bright Band' in instrument: - url += 'RadarFMCW/BrightBand/' - else: - raise ValueError('Instrument not supported') - - # Construct output directory - if output: - # Output files to directory specified - output_dir = os.path.join(output) - else: - # If no folder given, add datastream folder - # to current working dir to prevent file mix-up - output_dir = os.path.join(os.getcwd(), datastream) - - if not os.path.isdir(output_dir): - os.makedirs(output_dir) - - # Set up doy ranges, taking into account changes for a new year - prev_doy = 0 - if e_doy < s_doy: - r = list(range(s_doy, 366)) + list(range(1, e_doy + 1)) - else: - r = list(range(s_doy, e_doy + 1)) - - # Set filename variable to return - filenames = [] - - # Loop through each doy in range - for doy in r: - # if the previous day is greater than current, assume a new year - # i.e. 365 -> 001 - if prev_doy > doy: - year += 1 - # Add site, year, and 3-digit day to url - new_url = url + site + '/' + str(year) + '/' + str(doy).zfill(3) + '/' - - # User pandas to get a list of filenames to download - # Exclude the first and last records which are "parent directory" and "nan" - files = pd.read_html(new_url, skiprows=[1])[0]['Name'] - files = list(files[1:-1]) - - # Write each file out to a file with same name as online - for f in files: - if hour is not None: - if (str(doy).zfill(3) + str(hour)) not in f and\ - (str(doy).zfill(3) + '.' + str(hour)) not in f: - continue - output_file = os.path.join(output_dir, f) - try: - print('Downloading ' + f) - with open(output_file, 'wb') as open_bytes_file: - open_bytes_file.write(urlopen(new_url + f).read()) - filenames.append(output_file) - except Exception: - pass - prev_doy = doy - - return filenames diff --git a/act/discovery/get_surfrad.py b/act/discovery/get_surfrad.py deleted file mode 100644 index 1f08925b6e..0000000000 --- a/act/discovery/get_surfrad.py +++ /dev/null @@ -1,119 +0,0 @@ -""" -Function for downloading data from -NOAA Surface Radiation Budget network - -""" -import json -from datetime import datetime -import pandas as pd -import numpy as np -import os -import re -import requests -import warnings - -try: - from urllib.request import urlopen -except ImportError: - from urllib import urlopen - - -def download_surfrad(site=None, startdate=None, enddate=None, output=None): - """ - Function to download data from the NOAA Surface Radiation Budget network. - https://gml.noaa.gov/grad/surfrad/ - - Parameters - ---------- - site : str - 3 letter NOAA site identifier. Required variable - List of sites can be found at https://gml.noaa.gov/grad/surfrad/sitepage.html - startdate : str - The start date of the data to acquire. Format is YYYYMMDD. Required variable - enddate : str - The end date of the data to acquire. Format is YYYYMMDD - output : str - The output directory for the data. Set to None to make a folder in the - current working directory with the same name as *datastream* to place - the files in. - - Returns - ------- - files : list - Returns list of files retrieved - - """ - - message = 'API will be changing from act.discovery.get_surfrad.download_surfrad to act.discovery.surfrad.download_surfrad_data' - warnings.warn(message, DeprecationWarning, 2) - - if (site is None) or (startdate is None): - raise ValueError('site and startdate need to be set') - - site = site.lower() - site_dict = { - 'bnd': 'Bondville_IL', - 'tbl': 'Boulder_CO', - 'dra': 'Desert_Rock_NV', - 'fpk': 'Fort_Peck_MT', - 'gwn': 'Goodwin_Creek_MS', - 'psu': 'Penn_State_PA', - 'sxf': 'Sioux_Falls_SD', - } - site_name = site_dict[site] - - # Convert dates to day of year (doy) for NOAA folder structure - s_doy = datetime.strptime(startdate, '%Y%m%d').timetuple().tm_yday - year = datetime.strptime(startdate, '%Y%m%d').year - if enddate is None: - enddate = startdate - e_doy = datetime.strptime(enddate, '%Y%m%d').timetuple().tm_yday - - # Set base URL - url = 'https://gml.noaa.gov/aftp/data/radiation/surfrad/' - - # Construct output directory - if output: - # Output files to directory specified - output_dir = os.path.join(output) - else: - # If no folder given, add datastream folder - # to current working dir to prevent file mix-up - output_dir = os.path.join(os.getcwd(), site_name + '_surfrad') - - if not os.path.isdir(output_dir): - os.makedirs(output_dir) - - # Set up doy ranges, taking into account changes for a new year - prev_doy = 0 - if e_doy < s_doy: - r = list(range(s_doy, 366)) + list(range(1, e_doy + 1)) - else: - r = list(range(s_doy, e_doy + 1)) - - # Set filename variable to return - filenames = [] - - # Loop through each doy in range - for doy in r: - # if the previous day is greater than current, assume a new year - # i.e. 365 -> 001 - if prev_doy > doy: - year += 1 - - # Add filename to url - file = site + str(year)[2:4] + str(doy) + '.dat' - new_url = url + site_name + '/' + str(year) + '/' + file - - # Write each file out to a file with same name as online - output_file = os.path.join(output_dir, file) - try: - print('Downloading ' + file) - with open(output_file, 'wb') as open_bytes_file: - open_bytes_file.write(urlopen(new_url).read()) - filenames.append(output_file) - except Exception: - pass - prev_doy = doy - - return filenames diff --git a/act/tests/test_discovery.py b/act/tests/test_discovery.py index 75b06b1b32..97fc028984 100644 --- a/act/tests/test_discovery.py +++ b/act/tests/test_discovery.py @@ -1,15 +1,10 @@ import glob import os from datetime import datetime - import numpy as np import requests from requests.packages.urllib3.exceptions import InsecureRequestWarning - import act -from act.discovery import get_asos - -requests.packages.urllib3.disable_warnings(InsecureRequestWarning) def test_cropType(): @@ -18,8 +13,8 @@ def test_cropType(): lon = -98.362 # Try for when the cropscape API is not working try: - crop = act.discovery.get_cropscape.croptype(lat, lon, year) - crop2 = act.discovery.get_cropscape.croptype(lat, lon) + crop = act.discovery.cropscape.get_crop_type(lat, lon, year) + crop2 = act.discovery.cropscape.get_crop_type(lat, lon) except Exception: return @@ -33,7 +28,7 @@ def test_cropType(): def test_get_ord(): time_window = [datetime(2020, 2, 4, 2, 0), datetime(2020, 2, 12, 10, 0)] - my_asoses = get_asos(time_window, station='ORD') + my_asoses = act.discovery.get_asos_data(time_window, station='ORD') assert 'ORD' in my_asoses.keys() assert np.all( np.equal( @@ -48,7 +43,7 @@ def test_get_region(): time_window = [datetime(2020, 2, 4, 2, 0), datetime(2020, 2, 12, 10, 0)] lat_window = (41.8781 - 0.5, 41.8781 + 0.5) lon_window = (-87.6298 - 0.5, -87.6298 + 0.5) - my_asoses = get_asos(time_window, lat_range=lat_window, lon_range=lon_window) + my_asoses = act.discovery.get_asos_data(time_window, lat_range=lat_window, lon_range=lon_window) asos_keys = [x for x in my_asoses.keys()] assert asos_keys == my_keys @@ -69,7 +64,7 @@ def test_get_armfile(): enddate = startdate outdir = os.getcwd() + '/data/' - results = act.discovery.get_armfiles.download_data( + results = act.discovery.arm.download_arm_data( username, token, datastream, startdate, enddate, output=outdir ) files = glob.glob(outdir + datastream + '*20200101*cdf') @@ -82,19 +77,19 @@ def test_get_armfile(): os.remove(files[0]) datastream = 'sgpmeetE13.b1' - act.discovery.get_armfiles.download_data( + act.discovery.arm.download_arm_data( username, token, datastream, startdate, enddate, output=outdir ) files = glob.glob(outdir + datastream + '*20200101*cdf') assert len(files) == 0 with np.testing.assert_raises(ConnectionRefusedError): - act.discovery.get_armfiles.download_data( + act.discovery.arm.download_arm_data( username, token + '1234', datastream, startdate, enddate, output=outdir ) datastream = 'sgpmetE13.b1' - results = act.discovery.get_armfiles.download_data( + results = act.discovery.arm.download_arm_data( username, token, datastream, startdate, enddate ) assert len(results) == 1 @@ -116,7 +111,7 @@ def test_get_armfile_hourly(): enddate = '2020-01-01T12:00:00' outdir = os.getcwd() + '/data/' - results = act.discovery.get_armfiles.download_data( + results = act.discovery.arm.download_arm_data( username, token, datastream, startdate, enddate, output=outdir ) files = glob.glob(outdir + datastream + '*20200101*cdf') @@ -129,19 +124,19 @@ def test_get_armfile_hourly(): os.remove(files[0]) datastream = 'sgpmeetE13.b1' - act.discovery.get_armfiles.download_data( + act.discovery.arm.download_arm_data( username, token, datastream, startdate, enddate, output=outdir ) files = glob.glob(outdir + datastream + '*20200101*cdf') assert len(files) == 0 with np.testing.assert_raises(ConnectionRefusedError): - act.discovery.get_armfiles.download_data( + act.discovery.arm.download_arm_data( username, token + '1234', datastream, startdate, enddate, output=outdir ) datastream = 'sgpmetE13.b1' - results = act.discovery.get_armfiles.download_data( + results = act.discovery.arm.download_arm_data( username, token, datastream, startdate, enddate ) assert len(results) == 1 @@ -253,23 +248,23 @@ def test_noaa_psl(): def test_neon(): site_code = 'BARR' - result = act.discovery.get_neon.get_site_products(site_code, print_to_screen=True) + result = act.discovery.get_neon_site_products(site_code, print_to_screen=True) assert 'DP1.00002.001' in result assert result['DP1.00003.001'] == 'Triple aspirated air temperature' product_code = 'DP1.00002.001' - result = act.discovery.get_neon.get_product_avail(site_code, product_code, print_to_screen=True) + result = act.discovery.get_neon_product_avail(site_code, product_code, print_to_screen=True) assert '2017-09' in result assert '2022-11' in result output_dir = os.path.join(os.getcwd(), site_code + '_' + product_code) - result = act.discovery.get_neon.download_neon_data(site_code, product_code, '2022-10', output_dir=output_dir) + result = act.discovery.download_neon_data(site_code, product_code, '2022-10', output_dir=output_dir) assert len(result) == 20 assert any('readme' in r for r in result) assert any('sensor_position' in r for r in result) - result = act.discovery.get_neon.download_neon_data(site_code, product_code, '2022-09', - end_date='2022-10', output_dir=output_dir) + result = act.discovery.download_neon_data(site_code, product_code, '2022-09', + end_date='2022-10', output_dir=output_dir) assert len(result) == 40 assert any('readme' in r for r in result) assert any('sensor_position' in r for r in result) @@ -291,6 +286,6 @@ def test_arm_doi(): def test_download_surfrad(): - results = act.discovery.download_surfrad(site='tbl', startdate='20230601', enddate='20230602') + results = act.discovery.download_surfrad_data(site='tbl', startdate='20230601', enddate='20230602') assert len(results) == 2 assert 'tbl23152.dat' in results[0] diff --git a/codecov.yml b/codecov.yml index edde4fd145..37d633bc41 100644 --- a/codecov.yml +++ b/codecov.yml @@ -7,14 +7,6 @@ comment: false ignore: - 'act/tests/*.py' - 'act/plotting/histogramdisplay.py' - - 'act/discovery/get_arm.py' - - 'act/discovery/arm.py' - - 'act/discovery/airnow.py' - - 'act/discovery/asos.py' - - 'act/discovery/cropscape.py' - - 'act/discovery/neon.py' - - 'act/discovery/noaapsl.py' - - 'act/discovery/surfrad.py' - 'act/*version*py' - 'setup.py' - 'versioneer.py' diff --git a/examples/discovery/plot_airnow.py b/examples/discovery/plot_airnow.py index 8bacae79b5..24ded44078 100644 --- a/examples/discovery/plot_airnow.py +++ b/examples/discovery/plot_airnow.py @@ -9,9 +9,7 @@ """ import os - import matplotlib.pyplot as plt - import act # You need an account and token from https://docs.airnowapi.org/ first diff --git a/examples/discovery/plot_asos_temp.py b/examples/discovery/plot_asos_temp.py index ef1235e4b7..d145d1212d 100644 --- a/examples/discovery/plot_asos_temp.py +++ b/examples/discovery/plot_asos_temp.py @@ -7,14 +7,12 @@ """ from datetime import datetime - import matplotlib.pyplot as plt - import act time_window = [datetime(2020, 2, 4, 2, 0), datetime(2020, 2, 10, 10, 0)] station = 'KORD' -my_asoses = act.discovery.get_asos(time_window, station='ORD') +my_asoses = act.discovery.get_asos_data(time_window, station='ORD') display = act.plotting.TimeSeriesDisplay(my_asoses['ORD'], subplot_shape=(2,), figsize=(15, 10)) display.plot('temp', subplot_index=(0,)) diff --git a/examples/discovery/plot_neon.py b/examples/discovery/plot_neon.py index 43d9d7a61b..610d3a006d 100644 --- a/examples/discovery/plot_neon.py +++ b/examples/discovery/plot_neon.py @@ -21,7 +21,7 @@ if token is not None and len(token) > 0: # Download ARM data if a username/token are set - files = act.discovery.download_data(username, token, 'nsametC1.b1', '2022-10-01', '2022-10-07') + files = act.discovery.download_arm_data(username, token, 'nsametC1.b1', '2022-10-01', '2022-10-07') ds = act.io.armfiles.read_netcdf(files) # Download NEON Data @@ -29,7 +29,7 @@ # https://www.neonscience.org/field-sites/explore-field-sites site_code = 'BARR' product_code = 'DP1.00002.001' - result = act.discovery.get_neon.download_neon_data(site_code, product_code, '2022-10') + result = act.discovery.neon.download_neon_data(site_code, product_code, '2022-10') # A number of files are downloaded and further explained in the readme file that's downloaded. # These are the files we will need for reading 1 minute NEON data diff --git a/examples/io/plot_surfrad.py b/examples/io/plot_surfrad.py index 3adccdbb48..473185f24d 100644 --- a/examples/io/plot_surfrad.py +++ b/examples/io/plot_surfrad.py @@ -13,7 +13,7 @@ import matplotlib.pyplot as plt # Easily download data from SURFRAD -results = act.discovery.download_surfrad('tbl', startdate='20230601', enddate='20230602') +results = act.discovery.download_surfrad_data('tbl', startdate='20230601', enddate='20230602') print(results) # But it's easy enough to read form the URLs as well diff --git a/examples/plotting/plot_ceil.py b/examples/plotting/plot_ceil.py index ef84721782..d8c03f3a86 100644 --- a/examples/plotting/plot_ceil.py +++ b/examples/plotting/plot_ceil.py @@ -21,7 +21,7 @@ ceil_ds = act.io.armfiles.read_netcdf(act.tests.sample_files.EXAMPLE_CEIL1, engine='netcdf4') else: # Example to show how easy it is to download ARM data if a username/token are set - results = act.discovery.download_data(username, token, 'sgpceilC1.b1', '2022-01-14', '2022-01-19') + results = act.discovery.download_arm_data(username, token, 'sgpceilC1.b1', '2022-01-14', '2022-01-19') ceil_ds = act.io.armfiles.read_netcdf(results) # Adjust ceilometer data for plotting diff --git a/examples/qc/plot_arm_qc.py b/examples/qc/plot_arm_qc.py index c775fec1ec..8a4f8de373 100644 --- a/examples/qc/plot_arm_qc.py +++ b/examples/qc/plot_arm_qc.py @@ -26,7 +26,7 @@ if username is None or token is None or len(username) == 0 or len(token) == 0: results = act.tests.sample_files.EXAMPLE_MFRSR else: - results = act.discovery.download_data( + results = act.discovery.download_arm_data( username, token, 'sgpmfrsr7nchE11.b1', '2021-03-29', '2021-03-29' ) print(results) diff --git a/examples/templates/example_template.py b/examples/templates/example_template.py index dc15a3337f..6c594ac40e 100644 --- a/examples/templates/example_template.py +++ b/examples/templates/example_template.py @@ -10,7 +10,7 @@ # Download and read file or files with the IO and discovery functions # within ACT, example: -results = act.discovery.download_data( +results = act.discovery.download_arm_data( username, token, 'sgpceilC1.b1', '2022-01-14', '2022-01-19') ceil_ds = act.io.armfiles.read_netcdf(results) diff --git a/examples/workflows/plot_aerioe_with_cbh.py b/examples/workflows/plot_aerioe_with_cbh.py index f9240334de..a697a4c967 100644 --- a/examples/workflows/plot_aerioe_with_cbh.py +++ b/examples/workflows/plot_aerioe_with_cbh.py @@ -22,9 +22,9 @@ if username is None or token is None or len(username) == 0 or len(token) == 0: pass else: - results = act.discovery.download_data(username, token, 'sgpaerioe1turnC1.c1', '2022-02-11', '2022-02-11') + results = act.discovery.download_arm_data(username, token, 'sgpaerioe1turnC1.c1', '2022-02-11', '2022-02-11') aerioe_ds = act.io.armfiles.read_netcdf(results) - results = act.discovery.download_data(username, token, 'sgpceilC1.b1', '2022-02-11', '2022-02-11') + results = act.discovery.download_arm_data(username, token, 'sgpceilC1.b1', '2022-02-11', '2022-02-11') ceil_ds = act.io.armfiles.read_netcdf(results) # There isn't information content from the AERI above 3 km diff --git a/examples/workflows/plot_multiple_dataset.py b/examples/workflows/plot_multiple_dataset.py index 9a0d9fbf01..fc7c0c3f12 100644 --- a/examples/workflows/plot_multiple_dataset.py +++ b/examples/workflows/plot_multiple_dataset.py @@ -24,9 +24,9 @@ met_ds = act.io.armfiles.read_netcdf(act.tests.sample_files.EXAMPLE_MET1) else: # Download and read data - results = act.discovery.download_data(username, token, 'sgpceilC1.b1', '2022-01-01', '2022-01-07') + results = act.discovery.download_arm_data(username, token, 'sgpceilC1.b1', '2022-01-01', '2022-01-07') ceil_ds = act.io.armfiles.read_netcdf(results) - results = act.discovery.download_data(username, token, 'sgpmetE13.b1', '2022-01-01', '2022-01-07') + results = act.discovery.download_arm_data(username, token, 'sgpmetE13.b1', '2022-01-01', '2022-01-07') met_ds = act.io.armfiles.read_netcdf(results) # Read in CEIL data and correct it