Skip to content

Commit

Permalink
ADD: MPLNET Discovery Module (#855)
Browse files Browse the repository at this point in the history
* ADD: MPLNET Discovery Module

* ENH: Removal of conflicting test

* ENH: Fixed linting issues

* ENH: Changed MPLNET example data request processing level
  • Loading branch information
jrobrien91 authored Sep 18, 2024
1 parent 8c15563 commit a4c01b8
Show file tree
Hide file tree
Showing 4 changed files with 305 additions and 1 deletion.
13 changes: 12 additions & 1 deletion act/discovery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,17 @@

__getattr__, __dir__, __all__ = lazy.attach(
__name__,
submodules=['arm', 'asos', 'airnow', 'cropscape', 'improve', 'noaapsl', 'neon', 'surfrad'],
submodules=[
'arm',
'asos',
'airnow',
'cropscape',
'improve',
'noaapsl',
'neon',
'surfrad',
'nasa',
],
submod_attrs={
'arm': ['download_arm_data', 'get_arm_doi'],
'asos': ['get_asos_data'],
Expand All @@ -18,5 +28,6 @@
'noaapsl': ['download_noaa_psl_data'],
'neon': ['get_neon_site_products', 'get_neon_product_avail', 'download_neon_data'],
'surfrad': ['download_surfrad_data'],
'nasa': ["download_mplnet_data", "get_mplnet_meta"],
},
)
234 changes: 234 additions & 0 deletions act/discovery/nasa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
"""
Function for downloading data from the NASA Atmospheric Science Data Center
(ASDC), which hosts data including the Atmopsheric Composition
Ground Observation Network.
"""
import os
import requests
import re
import shutil


def download_mplnet_data(
version=None,
level=None,
product=None,
site=None,
year=None,
month=None,
day=None,
outdir=None,
):
"""
Function to download data from the NASA MPL Network Data
https://mplnet.gsfc.nasa.gov/mplnet_web_services.cgi?download
Downloaded Products are contained within NETCDF-4, CF compliant files.
Parameters
----------
version : int
MPLNet Dataset Version Number (2 or 3).
All data from 2000 have been processed to Version 3.
Information on the MPLNet Dataset Version can be found here:
https://mplnet.gsfc.nasa.gov/versions.htm
level : int
MPLNet Product Levels (1, 15, 2).
MPLNet Levels used to differentiate quality assurance screens.
Information on the MPLNet Product levels can be found here:
https://mplnet.gsfc.nasa.gov/product-info/
Level 1 data should never be used for publication.
product : str
MPLNet Product (NRB, CLD, PBL, AER).
NRB - Lidar signals; volume depolarization ratos, diagnostics
CLD - Cloud Heights, thin cloud extinction and optical depths, cloud
phase
AER - Aerosol heights; extinction, backscatter, and aerosol
depolarization ratio profiles; lidar ratio
PBL - Surface-Attached Mixed Layer Top and estimated mixed layer AOD
Information on the MPLNet Products can be found here:
https://mplnet.gsfc.nasa.gov/product-info/
year : str
Four digit Year for desired product download (YYYY).
Note Level 1 and 1.5 products are available for
download the day after automated collection.
Information on the MPLNet naming convention can be found here:
https://mplnet.gsfc.nasa.gov/product-info/mplnet_file_name.htm
month : str
Two digit month for desired product download (MM).
day : str
Two digit desired day for product download (DD).
If day not supplied, will download all data for month supplied
in a zip file.
site : str
MPLNet four letter site identifier.
outdir : str
The output directory for the data. Set to None to make a folder in the
current working directory with the same name as *datastream* to place
the files in.
Returns
-------
files : list
Returns list of files retrieved.
"""

# Generate the data policy agreement information
print("\nPlease Review the MPLNET Data Policy Prior to Use of MPLNET Data")
print("The MPLNET Data Policy can be found at:\n\thttps://mplnet.gsfc.nasa.gov/data-policy\n")

# Generate the data acknowledgement statement, might require site information.
print(
"Please Include the Following Acknowledgements in Any Publication \nor"
+ " presentation of MPLNET data, regardless of co-authorship status:"
)
print(
"\n\tThe MPLNET project is funded by the NASA Radiation Sciences Program"
+ " \n\tand Earth Observing System."
)
print(
"\n\tWe thank the MPLNET (PI) for (its/theirs) effort in establishing"
+ " \n\tand maintaining sites.\n"
)

# Define the base URL
base_url = "https://mplnet.gsfc.nasa.gov/download?"

# Add specific information to the base URL
if version is None:
raise ValueError("Please provide a MPLNet Product Version")
else:
base_url += "version=V" + str(version)

if level is None:
raise ValueError("Please provide a MPLNet Product Level")
else:
base_url += "&level=L" + str(level)

if product is None:
raise ValueError("Please provide a specific MPLNet Product identifer")
else:
base_url += "&product=" + str(product)

if site is None:
raise ValueError("Please provide a specific MPLNet site")
else:
base_url += "&site=" + str(site)

if year is None:
raise ValueError("Year of desired data download is required to download MPLNET data")
else:
base_url += "&year=" + str(year)

if month is None:
raise ValueError("Month of desired data download is required to download MPLNet data")
else:
base_url += "&month=" + str(month)

if day:
# Note: Day is not required for the MPLNet download
base_url += "&day=" + str(day)

# Construct output directory
if outdir:
# Output files to directory specified
output_dir = os.path.join(outdir)
else:
# If no folder given, add MPLNET folder
# to current working dir to prevent file mix-up
output_dir = os.path.join(os.getcwd(), "MPLNET")

if not os.path.isdir(output_dir):
os.makedirs(output_dir)

# Make a Request
files = []
with requests.get(base_url, stream=True) as r:
fname = re.findall("filename=(.+)", r.headers['Content-Disposition'])
# Check for successful file check
if fname[0][1:-1] == "MPLNET_download_fail.txt":
raise ValueError(
"Failed MPLNET Download\n"
+ " File could not be found for the desired input parameters"
+ " for MPLNET Download API"
)
else:
output_filename = os.path.join(output_dir, fname[0][1:-1])
print("[DOWNLOADING] ", fname[0][1:-1])
with open(output_filename, 'wb') as f:
shutil.copyfileobj(r.raw, f)
files.append(output_filename)

return files


def get_mplnet_meta(
sites=None, method=None, year=None, month=None, day=None, print_to_screen=False
):
"""
Returns a list of meta data from the NASA MPL Network Data
https://mplnet.gsfc.nasa.gov/mplnet_web_services.cgi?metadata
Parameters
----------
sites : str
How to return MPLNET Site Information
all - produces output on all sites (active and inactive)
active - produces output file containing only active sites
(if year, month, or day are not set then uses today's date)
inactive - produces output file containing only inactive sites
(if year, month, or day are not set then uses today's date)
planned - produces output file containing only planned sites
site_name - produces output file containing only requested site
collection - produces output file containing sites in pre-defined
collections (e.g. field campaigns or regions)
year : str
Four digit Year for desired product download (YYYY).
Note Level 1 and 1.5 products are available for
download the day after automated collection.
Information on the MPLNet naming convention can be found here:
https://mplnet.gsfc.nasa.gov/product-info/mplnet_file_name.htm
month : str
Two digit month for desired product download (MM).
day : str
Two digit desired day for product download (DD).
If day not supplied, will download all data for month supplied
in a zip file.
method : str
Method for returning JSON list of MPLNET GALION format parameters.
station - returns GALION JSON with only station and PI contact info
data - return GALION JSON with data elements, station, date and PI
contact information
print_to_screen : Boolean
If true, print MPLNET site identifiers to screen
"""
# Define the base URL
base_url = "https://mplnet.gsfc.nasa.gov/operations/sites?api&format=galion"

if sites is None:
raise ValueError("Site Parameter is required to download MPLNET Meta Data")
else:
base_url += "&sites=" + str(sites)

if method:
base_url += "&method=" + str(method)

if year:
base_url += "&year=" + str(year)

if month:
base_url += "&month=" + str(month)

if day:
base_url += "&day=" + str(day)

with requests.get(base_url, stream=True) as r:
# Convert to JSON
site_request = r.json()
if print_to_screen:
for i in range(len(site_request)):
print(site_request[i]['id'])

return site_request
18 changes: 18 additions & 0 deletions examples/discovery/download_mplnet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""
NASA MPLNET
-----------
This example shows how to download data from
NASA's MicroPulsed Lidar Network
"""

import act

# Determine MPLNET site of interest
meta = act.discovery.get_mplnet_meta(sites="active", method="data", print_to_screen=True)

# Download MPLNET data for site of interest
output = act.discovery.download_mplnet_data(
version=3, level=1, product="NRB", site="GSFC", year="2022", month="09", day="01"
)
41 changes: 41 additions & 0 deletions tests/discovery/test_nasa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import numpy as np
import act


def test_get_mplnet_meta():
output = act.discovery.get_mplnet_meta(
sites="GSFC", method="data", year="2024", month="09", day="12"
)

assert 'id' in output[0]
assert 'station' in output[0]
assert output[0]['station']['latitude_unit'] == "deg"

with np.testing.assert_raises(ValueError):
output = act.discovery.get_mplnet_meta()
with np.testing.assert_raises(ValueError):
output = act.discovery.get_mplnet_meta(sites=10)


def test_download_mplnet_data():
output = act.discovery.download_mplnet_data(
version=3, level=1, product="NRB", site="GSFC", year="2020", month="09", day="01"
)

assert len(output) == 1
assert output[0][-3:] == "nc4"

with np.testing.assert_raises(ValueError):
output = act.discovery.download_mplnet_data()
with np.testing.assert_raises(ValueError):
output = act.discovery.download_mplnet_data(version=3)
with np.testing.assert_raises(ValueError):
output = act.discovery.download_mplnet_data(version=3, level=1)
with np.testing.assert_raises(ValueError):
output = act.discovery.download_mplnet_data(version=3, level=1, product='NRB')
with np.testing.assert_raises(ValueError):
output = act.discovery.download_mplnet_data(version=3, level=1, product='NRB', site="GSFC")
with np.testing.assert_raises(ValueError):
output = act.discovery.download_mplnet_data(
version=3, level=1, product='NRB', site="GSFC", year="2020"
)

0 comments on commit a4c01b8

Please sign in to comment.