diff --git a/act/discovery/asos.py b/act/discovery/asos.py index 59ce415159..5f5ae27534 100644 --- a/act/discovery/asos.py +++ b/act/discovery/asos.py @@ -50,9 +50,6 @@ def get_asos_data(time_window, lat_range=None, lon_range=None, station=None): $ station = "KORD" $ my_asoses = act.discovery.get_asos(time_window, station="ORD") """ - message = 'act.discovery.get_asos will be renamed in version 2.0.0 to act.discovery.asos' - warnings.warn(message, DeprecationWarning, 2) - # First query the database for all of the JSON info for every station # Only add stations whose lat/lon are within the Grid's boundaries regions = """AF AL_ AI_ AQ_ AG_ AR_ AK AL AM_ diff --git a/act/io/__init__.py b/act/io/__init__.py index d50c05623a..8227cf89da 100644 --- a/act/io/__init__.py +++ b/act/io/__init__.py @@ -36,6 +36,7 @@ 'read_psl_wind_profiler_temperature', 'read_psl_parsivel', 'read_psl_radar_fmcw_moment', + 'read_psl_surface_met', ], 'pysp2': ['read_hk_file', 'read_sp2', 'read_sp2_dat' ], diff --git a/act/retrievals/__init__.py b/act/retrievals/__init__.py index d33a211175..a1849d92c6 100644 --- a/act/retrievals/__init__.py +++ b/act/retrievals/__init__.py @@ -23,6 +23,7 @@ 'calculate_pbl_liu_liang', 'calculate_precipitable_water', 'calculate_stability_indicies', + 'calculate_pbl_heffter', ], 'sp2': ['calc_sp2_diams_masses', 'process_sp2_psds'], }, diff --git a/act/tests/corrections/test_ceil.py b/act/tests/corrections/test_ceil.py new file mode 100644 index 0000000000..b50fd85e2e --- /dev/null +++ b/act/tests/corrections/test_ceil.py @@ -0,0 +1,20 @@ +import numpy as np +import xarray as xr + +import act + + +def test_correct_ceil(): + # Make a fake ARM dataset to test with, just an array with 1e-7 for half + # of it + fake_data = 10 * np.ones((300, 20)) + fake_data[:, 10:] = -1 + arm_ds = {} + arm_ds['backscatter'] = xr.DataArray(fake_data) + arm_ds = act.corrections.ceil.correct_ceil(arm_ds) + assert np.all(arm_ds['backscatter'].data[:, 10:] == -7) + assert np.all(arm_ds['backscatter'].data[:, 1:10] == 1) + + arm_ds['backscatter'].attrs['units'] = 'dummy' + arm_ds = act.corrections.ceil.correct_ceil(arm_ds) + assert arm_ds['backscatter'].units == 'log(dummy)' diff --git a/act/tests/corrections/test_doppler_lidar.py b/act/tests/corrections/test_doppler_lidar.py new file mode 100644 index 0000000000..6a7cb12f11 --- /dev/null +++ b/act/tests/corrections/test_doppler_lidar.py @@ -0,0 +1,19 @@ +import numpy as np + +import act + + +def test_correct_dl(): + # Test the DL correction script on a PPI dataset eventhough it will + # mostlikely be used on FPT scans. Doing this to save space with only + # one datafile in the repo. + files = act.tests.sample_files.EXAMPLE_DLPPI + ds = act.io.arm.read_arm_netcdf(files) + + new_ds = act.corrections.doppler_lidar.correct_dl(ds, fill_value=np.nan) + data = new_ds['attenuated_backscatter'].values + np.testing.assert_almost_equal(np.nansum(data), -186479.83, decimal=0.1) + + new_ds = act.corrections.doppler_lidar.correct_dl(ds, range_normalize=False) + data = new_ds['attenuated_backscatter'].values + np.testing.assert_almost_equal(np.nansum(data), -200886.0, decimal=0.1) diff --git a/act/tests/corrections/test_mpl_corrections.py b/act/tests/corrections/test_mpl_corrections.py new file mode 100644 index 0000000000..a6ab821fe7 --- /dev/null +++ b/act/tests/corrections/test_mpl_corrections.py @@ -0,0 +1,54 @@ +import numpy as np + +import act + + +def test_correct_mpl(): + # Make a fake ARM dataset to test with, just an array with 1e-7 for half + # of it + test_data = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_MPL_1SAMPLE) + ds = act.corrections.mpl.correct_mpl(test_data) + sig_cross_pol = ds['signal_return_cross_pol'].values[1, 10:15] + sig_co_pol = ds['signal_return_co_pol'].values[1, 10:15] + height = ds['height'].values[0:10] + overlap0 = ds['overlap_correction'].values[1, 0, 0:5] + overlap1 = ds['overlap_correction'].values[1, 1, 0:5] + overlap2 = ds['overlap_correction'].values[1, 2, 0:5] + np.testing.assert_allclose(overlap0, [0.0, 0.0, 0.0, 0.0, 0.0]) + np.testing.assert_allclose(overlap1, [754.338, 754.338, 754.338, 754.338, 754.338]) + np.testing.assert_allclose(overlap2, [181.9355, 181.9355, 181.9355, 181.9355, 181.9355]) + np.testing.assert_allclose( + sig_cross_pol, + [-0.5823283, -1.6066532, -1.7153032, -2.520143, -2.275405], + rtol=4e-06, + ) + np.testing.assert_allclose( + sig_co_pol, [12.5631485, 11.035495, 11.999875, 11.09393, 11.388968], rtol=1e-6 + ) + np.testing.assert_allclose( + height, + [ + 0.00749012, + 0.02247084, + 0.03745109, + 0.05243181, + 0.06741206, + 0.08239277, + 0.09737302, + 0.11235374, + 0.12733398, + 0.14231472, + ], + rtol=1e-6, + ) + assert ds['signal_return_co_pol'].attrs['units'] == '10 * log10(count/us)' + assert ds['signal_return_cross_pol'].attrs['units'] == '10 * log10(count/us)' + assert ds['cross_co_ratio'].attrs['long_name'] == 'Cross-pol / Co-pol ratio * 100' + assert ds['cross_co_ratio'].attrs['units'] == '1' + assert 'description' not in ds['cross_co_ratio'].attrs.keys() + assert 'ancillary_variables' not in ds['cross_co_ratio'].attrs.keys() + assert np.all(np.round(ds['cross_co_ratio'].data[0, 500]) == 34.0) + assert np.all(np.round(ds['signal_return_co_pol'].data[0, 11]) == 11) + assert np.all(np.round(ds['signal_return_co_pol'].data[0, 500]) == -6) + test_data.close() + ds.close() diff --git a/act/tests/corrections/test_raman_lidar.py b/act/tests/corrections/test_raman_lidar.py new file mode 100644 index 0000000000..389dd83b7b --- /dev/null +++ b/act/tests/corrections/test_raman_lidar.py @@ -0,0 +1,20 @@ +import numpy as np + +import act + + +def test_correct_rl(): + # Using ceil data in RL place to save memory + files = act.tests.sample_files.EXAMPLE_RL1 + ds = act.io.arm.read_arm_netcdf(files) + + ds = act.corrections.raman_lidar.correct_rl(ds, range_normalize_log_values=True) + np.testing.assert_almost_equal( + np.max(ds['depolarization_counts_high'].values), 9.91, decimal=2 + ) + np.testing.assert_almost_equal( + np.min(ds['depolarization_counts_high'].values), -7.00, decimal=2 + ) + np.testing.assert_almost_equal( + np.mean(ds['depolarization_counts_high'].values), -1.45, decimal=2 + ) diff --git a/act/tests/corrections/test_ship.py b/act/tests/corrections/test_ship.py new file mode 100644 index 0000000000..fb50b779be --- /dev/null +++ b/act/tests/corrections/test_ship.py @@ -0,0 +1,16 @@ +import xarray as xr + +import act + + +def test_correct_wind(): + nav = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_NAV) + nav = act.utils.ship_utils.calc_cog_sog(nav) + + aosmet = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_AOSMET) + + ds = xr.merge([nav, aosmet], compat='override') + ds = act.corrections.ship.correct_wind(ds) + + assert round(ds['wind_speed_corrected'].values[800]) == 5.0 + assert round(ds['wind_direction_corrected'].values[800]) == 92.0 diff --git a/act/tests/discovery/test_airnow.py b/act/tests/discovery/test_airnow.py new file mode 100644 index 0000000000..eabc454f36 --- /dev/null +++ b/act/tests/discovery/test_airnow.py @@ -0,0 +1,53 @@ +import os + +import numpy as np + +import act + + +def test_get_airnow(): + token = os.getenv('AIRNOW_API') + if token is not None: + if len(token) == 0: + return + results = act.discovery.get_airnow_forecast(token, '2022-05-01', zipcode=60108, distance=50) + assert results['CategoryName'].values[0] == 'Good' + assert results['AQI'].values[2] == -1 + assert results['ReportingArea'].values[3] == 'Aurora and Elgin' + + results = act.discovery.get_airnow_forecast( + token, '2022-05-01', distance=50, latlon=[41.958, -88.12] + ) + assert results['CategoryName'].values[3] == 'Good' + assert results['AQI'].values[2] == -1 + assert results['ReportingArea'][3] == 'Aurora and Elgin' + + results = act.discovery.get_airnow_obs(token, date='2022-05-01', zipcode=60108, distance=50) + assert results['AQI'].values[0] == 26 + assert results['ParameterName'].values[1] == 'PM2.5' + assert results['CategoryName'].values[0] == 'Good' + + results = act.discovery.get_airnow_obs(token, zipcode=60108, distance=50) + assert results['ReportingArea'].values[0] == 'Aurora and Elgin' + results = act.discovery.get_airnow_obs(token, latlon=[41.958, -88.12], distance=50) + assert results['StateCode'].values[0] == 'IL' + + with np.testing.assert_raises(NameError): + results = act.discovery.get_airnow_obs(token) + with np.testing.assert_raises(NameError): + results = act.discovery.get_airnow_forecast(token, '2022-05-01') + + results = act.discovery.get_airnow_obs( + token, date='2022-05-01', distance=50, latlon=[41.958, -88.12] + ) + assert results['AQI'].values[0] == 26 + assert results['ParameterName'].values[1] == 'PM2.5' + assert results['CategoryName'].values[0] == 'Good' + + lat_lon = '-88.245401,41.871346,-87.685099,42.234359' + results = act.discovery.get_airnow_bounded_obs( + token, '2022-05-01T00', '2022-05-01T12', lat_lon, 'OZONE,PM25', data_type='B' + ) + assert results['PM2.5'].values[-1, 0] == 1.8 + assert results['OZONE'].values[0, 0] == 37.0 + assert len(results['time'].values) == 13 diff --git a/act/tests/discovery/test_arm_discovery.py b/act/tests/discovery/test_arm_discovery.py new file mode 100644 index 0000000000..c689edcf11 --- /dev/null +++ b/act/tests/discovery/test_arm_discovery.py @@ -0,0 +1,115 @@ +import glob +import os + +import numpy as np + +import act + + +def test_download_armdata(): + if not os.path.isdir(os.getcwd() + '/data/'): + os.makedirs(os.getcwd() + '/data/') + + # Place your username and token here + username = os.getenv('ARM_USERNAME') + token = os.getenv('ARM_PASSWORD') + + if username is not None and token is not None: + if len(username) == 0 and len(token) == 0: + return + datastream = 'sgpmetE13.b1' + startdate = '2020-01-01' + enddate = startdate + outdir = os.getcwd() + '/data/' + + results = act.discovery.arm.download_arm_data( + username, token, datastream, startdate, enddate, output=outdir + ) + files = glob.glob(outdir + datastream + '*20200101*cdf') + if len(results) > 0: + assert files is not None + assert 'sgpmetE13' in files[0] + + if files is not None: + if len(files) > 0: + os.remove(files[0]) + + datastream = 'sgpmeetE13.b1' + act.discovery.arm.download_arm_data( + username, token, datastream, startdate, enddate, output=outdir + ) + files = glob.glob(outdir + datastream + '*20200101*cdf') + assert len(files) == 0 + + with np.testing.assert_raises(ConnectionRefusedError): + act.discovery.arm.download_arm_data( + username, token + '1234', datastream, startdate, enddate, output=outdir + ) + + datastream = 'sgpmetE13.b1' + results = act.discovery.arm.download_arm_data( + username, token, datastream, startdate, enddate + ) + assert len(results) == 1 + + +def test_download_armdata_hourly(): + if not os.path.isdir(os.getcwd() + '/data/'): + os.makedirs(os.getcwd() + '/data/') + + # Place your username and token here + username = os.getenv('ARM_USERNAME') + token = os.getenv('ARM_PASSWORD') + + if username is not None and token is not None: + if len(username) == 0 and len(token) == 0: + return + datastream = 'sgpmetE13.b1' + startdate = '2020-01-01T00:00:00' + enddate = '2020-01-01T12:00:00' + outdir = os.getcwd() + '/data/' + + results = act.discovery.arm.download_arm_data( + username, token, datastream, startdate, enddate, output=outdir + ) + files = glob.glob(outdir + datastream + '*20200101*cdf') + if len(results) > 0: + assert files is not None + assert 'sgpmetE13' in files[0] + + if files is not None: + if len(files) > 0: + os.remove(files[0]) + + datastream = 'sgpmeetE13.b1' + act.discovery.arm.download_arm_data( + username, token, datastream, startdate, enddate, output=outdir + ) + files = glob.glob(outdir + datastream + '*20200101*cdf') + assert len(files) == 0 + + with np.testing.assert_raises(ConnectionRefusedError): + act.discovery.arm.download_arm_data( + username, token + '1234', datastream, startdate, enddate, output=outdir + ) + + datastream = 'sgpmetE13.b1' + results = act.discovery.arm.download_arm_data( + username, token, datastream, startdate, enddate + ) + assert len(results) == 1 + + +def test_arm_doi(): + datastream = 'sgpmetE13.b1' + startdate = '2022-01-01' + enddate = '2022-12-31' + doi = act.discovery.get_arm_doi(datastream, startdate, enddate) + + assert len(doi) > 10 + assert isinstance(doi, str) + assert 'doi' in doi + assert 'Kyrouac' in doi + + doi = act.discovery.get_arm_doi('test', startdate, enddate) + assert "No DOI Found" in doi diff --git a/act/tests/discovery/test_asos.py b/act/tests/discovery/test_asos.py new file mode 100644 index 0000000000..e0f44842df --- /dev/null +++ b/act/tests/discovery/test_asos.py @@ -0,0 +1,27 @@ +from datetime import datetime + +import numpy as np + +import act + + +def test_get_ord(): + time_window = [datetime(2020, 2, 4, 2, 0), datetime(2020, 2, 12, 10, 0)] + my_asoses = act.discovery.get_asos_data(time_window, station='ORD') + assert 'ORD' in my_asoses.keys() + assert np.all( + np.equal( + my_asoses['ORD']['sknt'].values[:10], + np.array([13.0, 11.0, 14.0, 14.0, 13.0, 11.0, 14.0, 13.0, 13.0, 13.0]), + ) + ) + + +def test_get_region(): + my_keys = ['MDW', 'IGQ', 'ORD', '06C', 'PWK', 'LOT', 'GYY'] + time_window = [datetime(2020, 2, 4, 2, 0), datetime(2020, 2, 12, 10, 0)] + lat_window = (41.8781 - 0.5, 41.8781 + 0.5) + lon_window = (-87.6298 - 0.5, -87.6298 + 0.5) + my_asoses = act.discovery.get_asos_data(time_window, lat_range=lat_window, lon_range=lon_window) + asos_keys = list(my_asoses.keys()) + assert asos_keys == my_keys diff --git a/act/tests/discovery/test_cropscape.py b/act/tests/discovery/test_cropscape.py new file mode 100644 index 0000000000..3d594cc35d --- /dev/null +++ b/act/tests/discovery/test_cropscape.py @@ -0,0 +1,20 @@ +import act + + +def test_croptype(): + year = 2018 + lat = 37.15 + lon = -98.362 + # Try for when the cropscape API is not working + try: + crop = act.discovery.cropscape.get_crop_type(lat, lon, year) + crop2 = act.discovery.cropscape.get_crop_type(lat, lon) + except Exception: + return + + # print(crop, crop2) + if crop is not None: + assert crop == 'Dbl Crop WinWht/Sorghum' + if crop2 is not None: + # assert crop2 == 'Sorghum' + assert crop2 == 'Soybeans' diff --git a/act/tests/discovery/test_neon_discovery.py b/act/tests/discovery/test_neon_discovery.py new file mode 100644 index 0000000000..762036a340 --- /dev/null +++ b/act/tests/discovery/test_neon_discovery.py @@ -0,0 +1,27 @@ +import os + +import act + + +def test_neon(): + site_code = 'BARR' + result = act.discovery.get_neon_site_products(site_code, print_to_screen=True) + assert 'DP1.00002.001' in result + assert result['DP1.00003.001'] == 'Triple aspirated air temperature' + + product_code = 'DP1.00002.001' + result = act.discovery.get_neon_product_avail(site_code, product_code, print_to_screen=True) + assert '2017-09' in result + assert '2022-11' in result + + output_dir = os.path.join(os.getcwd(), site_code + '_' + product_code) + result = act.discovery.download_neon_data(site_code, product_code, '2022-10', output_dir=output_dir) + assert len(result) == 20 + assert any('readme' in r for r in result) + assert any('sensor_position' in r for r in result) + + result = act.discovery.download_neon_data(site_code, product_code, '2022-09', + end_date='2022-10', output_dir=output_dir) + assert len(result) == 40 + assert any('readme' in r for r in result) + assert any('sensor_position' in r for r in result) diff --git a/act/tests/discovery/test_noaapsl_discovery.py b/act/tests/discovery/test_noaapsl_discovery.py new file mode 100644 index 0000000000..3369371f91 --- /dev/null +++ b/act/tests/discovery/test_noaapsl_discovery.py @@ -0,0 +1,59 @@ +import numpy as np + +import act + + +def test_noaa_psl(): + result = act.discovery.download_noaa_psl_data( + site='ctd', + instrument='Parsivel', + startdate='20211231', + enddate='20220101', + output='./data/', + ) + assert len(result) == 48 + + result = act.discovery.download_noaa_psl_data( + site='ctd', instrument='Pressure', startdate='20220101', hour='00' + ) + assert len(result) == 1 + + result = act.discovery.download_noaa_psl_data( + site='ctd', instrument='GpsTrimble', startdate='20220104', hour='00' + ) + assert len(result) == 6 + + types = [ + 'Radar S-band Moment', + 'Radar S-band Bright Band', + '449RWP Bright Band', + '449RWP Wind', + '449RWP Sub-Hour Wind', + '449RWP Sub-Hour Temp', + '915RWP Wind', + '915RWP Temp', + '915RWP Sub-Hour Wind', + '915RWP Sub-Hour Temp', + ] + for t in types: + result = act.discovery.download_noaa_psl_data( + site='ctd', instrument=t, startdate='20220601', hour='01' + ) + assert len(result) == 1 + + types = ['Radar FMCW Moment', 'Radar FMCW Bright Band'] + files = [3, 1] + for i, t in enumerate(types): + result = act.discovery.download_noaa_psl_data( + site='bck', instrument=t, startdate='20220101', hour='01' + ) + assert len(result) == files[i] + + with np.testing.assert_raises(ValueError): + result = act.discovery.download_noaa_psl_data( + instrument='Parsivel', startdate='20220601', hour='01' + ) + with np.testing.assert_raises(ValueError): + result = act.discovery.download_noaa_psl_data( + site='ctd', instrument='dongle', startdate='20220601', hour='01' + ) diff --git a/act/tests/discovery/test_surfrad.py b/act/tests/discovery/test_surfrad.py new file mode 100644 index 0000000000..a7d478b6c7 --- /dev/null +++ b/act/tests/discovery/test_surfrad.py @@ -0,0 +1,7 @@ +import act + + +def test_download_surfrad(): + results = act.discovery.download_surfrad_data(site='tbl', startdate='20230601', enddate='20230602') + assert len(results) == 2 + assert 'tbl23152.dat' in results[0] diff --git a/act/tests/io/test_arm.py b/act/tests/io/test_arm.py new file mode 100644 index 0000000000..a50fea323b --- /dev/null +++ b/act/tests/io/test_arm.py @@ -0,0 +1,294 @@ +from pathlib import Path +import tempfile + +import numpy as np + +import act +from act.tests import sample_files + + +def test_read_arm_netcdf(): + ds = act.io.arm.read_arm_netcdf([act.tests.EXAMPLE_MET1]) + assert 'temp_mean' in ds.variables.keys() + assert 'rh_mean' in ds.variables.keys() + assert ds.attrs['_arm_standards_flag'] == (1 << 0) + + with np.testing.assert_raises(OSError): + ds = act.io.arm.read_arm_netcdf([]) + + ds = act.io.arm.read_arm_netcdf([], return_None=True) + assert ds is None + ds = act.io.arm.read_arm_netcdf(['./randomfile.nc'], return_None=True) + assert ds is None + + ds = act.io.arm.read_arm_netcdf([act.tests.EXAMPLE_MET_TEST1]) + assert 'time' in ds + + ds = act.io.arm.read_arm_netcdf([act.tests.EXAMPLE_MET_TEST2]) + assert ds['time'].values[10].astype('datetime64[ms]') == np.datetime64('2019-01-01T00:10:00', 'ms') + + ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_MET1, use_base_time=True, drop_variables='time') + assert 'time' in ds + assert np.issubdtype(ds['time'].dtype, np.datetime64) + assert ds['time'].values[10].astype('datetime64[ms]') == np.datetime64('2019-01-01T00:10:00', 'ms') + + del ds + + +def test_keep_variables(): + + var_names = [ + 'temp_mean', + 'rh_mean', + 'wdir_vec_mean', + 'tbrg_precip_total_corr', + 'atmos_pressure', + 'wspd_vec_mean', + 'pwd_pw_code_inst', + 'pwd_pw_code_15min', + 'pwd_mean_vis_10min', + 'logger_temp', + 'pwd_precip_rate_mean_1min', + 'pwd_cumul_snow', + 'pwd_mean_vis_1min', + 'pwd_pw_code_1hr', + 'org_precip_rate_mean', + 'tbrg_precip_total', + 'pwd_cumul_rain', + ] + var_names = var_names + ['qc_' + ii for ii in var_names] + drop_variables = act.io.arm.keep_variables_to_drop_variables( + act.tests.EXAMPLE_MET1, var_names + ) + + expected_drop_variables = [ + 'wdir_vec_std', + 'base_time', + 'alt', + 'qc_wspd_arith_mean', + 'pwd_err_code', + 'logger_volt', + 'temp_std', + 'lon', + 'qc_logger_volt', + 'time_offset', + 'wspd_arith_mean', + 'lat', + 'vapor_pressure_std', + 'vapor_pressure_mean', + 'rh_std', + 'qc_vapor_pressure_mean', + ] + assert drop_variables.sort() == expected_drop_variables.sort() + + ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_MET1, keep_variables='temp_mean') + assert list(ds.data_vars) == ['temp_mean'] + del ds + + var_names = ['temp_mean', 'qc_temp_mean'] + ds = act.io.arm.read_arm_netcdf( + act.tests.EXAMPLE_MET1, keep_variables=var_names, drop_variables='nonsense' + ) + assert list(ds.data_vars).sort() == var_names.sort() + del ds + + var_names = ['temp_mean', 'qc_temp_mean', 'alt', 'lat', 'lon'] + ds = act.io.arm.read_arm_netcdf( + act.tests.EXAMPLE_MET_WILDCARD, keep_variables=var_names, drop_variables=['lon'] + ) + var_names = list(set(var_names) - {'lon'}) + assert list(ds.data_vars).sort() == var_names.sort() + del ds + + filenames = list(Path(file) for file in act.tests.EXAMPLE_MET_WILDCARD) + var_names = ['temp_mean', 'qc_temp_mean', 'alt', 'lat', 'lon'] + ds = act.io.arm.read_arm_netcdf(filenames, keep_variables=var_names) + assert list(ds.data_vars).sort() == var_names.sort() + del ds + + +def test_read_arm_netcdf_mfdataset(): + met_ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_MET_WILDCARD) + met_ds.load() + assert 'temp_mean' in met_ds.variables.keys() + assert 'rh_mean' in met_ds.variables.keys() + assert len(met_ds.attrs['_file_times']) == 7 + assert met_ds.attrs['_arm_standards_flag'] == (1 << 0) + met_ds.close() + del met_ds + + met_ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_MET_WILDCARD, cleanup_qc=True) + met_ds.load() + var_name = 'temp_mean' + qc_var_name = 'qc_' + var_name + attr_names = [ + 'long_name', + 'units', + 'flag_masks', + 'flag_meanings', + 'flag_assessments', + 'fail_min', + 'fail_max', + 'fail_delta', + 'standard_name', + ] + assert var_name in met_ds.variables.keys() + assert qc_var_name in met_ds.variables.keys() + assert sorted(attr_names) == sorted(list(met_ds[qc_var_name].attrs.keys())) + assert met_ds[qc_var_name].attrs['flag_masks'] == [1, 2, 4, 8] + assert met_ds[qc_var_name].attrs['flag_assessments'] == ['Bad', 'Bad', 'Bad', 'Indeterminate'] + met_ds.close() + del met_ds + + +def test_io_dod(): + dims = {'time': 1440, 'drop_diameter': 50} + + try: + ds = act.io.arm.create_ds_from_arm_dod( + 'vdis.b1', dims, version='1.2', scalar_fill_dim='time' + ) + assert 'moment1' in ds + assert len(ds['base_time'].values) == 1440 + assert len(ds['drop_diameter'].values) == 50 + with np.testing.assert_warns(UserWarning): + ds2 = act.io.arm.create_ds_from_arm_dod('vdis.b1', dims, scalar_fill_dim='time') + assert 'moment1' in ds2 + assert len(ds2['base_time'].values) == 1440 + assert len(ds2['drop_diameter'].values) == 50 + with np.testing.assert_raises(ValueError): + ds = act.io.arm.create_ds_from_arm_dod('vdis.b1', {}, version='1.2') + ds = act.io.arm.create_ds_from_arm_dod( + sample_files.EXAMPLE_DOD, dims, version=1.2, scalar_fill_dim='time', + local_file=True) + assert 'moment1' in ds + assert len(ds['base_time'].values) == 1440 + assert len(ds['drop_diameter'].values) == 50 + except Exception: + return + ds.close() + ds2.close() + + +def test_io_write(): + sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) + sonde_ds.clean.cleanup() + + with tempfile.TemporaryDirectory() as tmpdirname: + write_file = Path(tmpdirname, Path(sample_files.EXAMPLE_SONDE1).name) + keep_vars = ['tdry', 'qc_tdry', 'dp', 'qc_dp'] + for var_name in list(sonde_ds.data_vars): + if var_name not in keep_vars: + del sonde_ds[var_name] + sonde_ds.write.write_netcdf(path=write_file, FillValue=-9999) + + sonde_ds_read = act.io.arm.read_arm_netcdf(str(write_file)) + assert list(sonde_ds_read.data_vars) == keep_vars + assert isinstance(sonde_ds_read['qc_tdry'].attrs['flag_meanings'], str) + assert sonde_ds_read['qc_tdry'].attrs['flag_meanings'].count('__') == 21 + for attr in ['qc_standards_version', 'qc_method', 'qc_comment']: + assert attr not in list(sonde_ds_read.attrs) + sonde_ds_read.close() + del sonde_ds_read + + sonde_ds.close() + + sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_EBBR1) + sonde_ds.clean.cleanup() + assert 'fail_min' in sonde_ds['qc_home_signal_15'].attrs + assert 'standard_name' in sonde_ds['qc_home_signal_15'].attrs + assert 'flag_masks' in sonde_ds['qc_home_signal_15'].attrs + + with tempfile.TemporaryDirectory() as tmpdirname: + cf_convention = 'CF-1.8' + write_file = Path(tmpdirname, Path(sample_files.EXAMPLE_EBBR1).name) + sonde_ds.write.write_netcdf( + path=write_file, + make_copy=False, + join_char='_', + cf_compliant=True, + cf_convention=cf_convention, + ) + + sonde_ds_read = act.io.arm.read_arm_netcdf(str(write_file)) + + assert cf_convention in sonde_ds_read.attrs['Conventions'].split() + assert sonde_ds_read.attrs['FeatureType'] == 'timeSeries' + global_att_keys = [ii for ii in sonde_ds_read.attrs.keys() if not ii.startswith('_')] + assert global_att_keys[-1] == 'history' + assert sonde_ds_read['alt'].attrs['axis'] == 'Z' + assert sonde_ds_read['alt'].attrs['positive'] == 'up' + + sonde_ds_read.close() + del sonde_ds_read + + sonde_ds.close() + + ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_CEIL1) + with tempfile.TemporaryDirectory() as tmpdirname: + cf_convention = 'CF-1.8' + write_file = Path(tmpdirname, Path(sample_files.EXAMPLE_CEIL1).name) + ds.write.write_netcdf( + path=write_file, + make_copy=False, + join_char='_', + cf_compliant=True, + cf_convention=cf_convention, + ) + + ds_read = act.io.arm.read_arm_netcdf(str(write_file)) + + assert cf_convention in ds_read.attrs['Conventions'].split() + assert ds_read.attrs['FeatureType'] == 'timeSeriesProfile' + assert len(ds_read.dims) > 1 + + ds_read.close() + del ds_read + + +def test_clean_cf_qc(): + with tempfile.TemporaryDirectory() as tmpdirname: + ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_MET1, cleanup_qc=True) + ds.load() + var_name = 'temp_mean' + qc_var_name = 'qc_' + var_name + ds.qcfilter.remove_test(var_name, test_number=4) + ds.qcfilter.remove_test(var_name, test_number=3) + ds.qcfilter.remove_test(var_name, test_number=2) + ds[qc_var_name].attrs['flag_masks'] = ds[qc_var_name].attrs['flag_masks'][0] + flag_meanings = ds[qc_var_name].attrs['flag_meanings'][0] + ds[qc_var_name].attrs['flag_meanings'] = flag_meanings.replace(' ', '__') + flag_meanings = ds[qc_var_name].attrs['flag_assessments'][0] + ds[qc_var_name].attrs['flag_assessments'] = flag_meanings.replace(' ', '__') + + write_file = str(Path(tmpdirname, Path(sample_files.EXAMPLE_MET1).name)) + ds.write.write_netcdf(path=write_file, cf_compliant=True) + ds.close() + del ds + + read_ds = act.io.arm.read_arm_netcdf(write_file, cleanup_qc=True) + read_ds.load() + + assert type(read_ds[qc_var_name].attrs['flag_masks']).__module__ == 'numpy' + assert read_ds[qc_var_name].attrs['flag_masks'].size == 1 + assert read_ds[qc_var_name].attrs['flag_masks'][0] == 1 + assert isinstance(read_ds[qc_var_name].attrs['flag_meanings'], list) + assert len(read_ds[qc_var_name].attrs['flag_meanings']) == 1 + assert isinstance(read_ds[qc_var_name].attrs['flag_assessments'], list) + assert len(read_ds[qc_var_name].attrs['flag_assessments']) == 1 + assert read_ds[qc_var_name].attrs['flag_assessments'] == ['Bad'] + assert read_ds[qc_var_name].attrs['flag_meanings'] == ['Value is equal to missing_value.'] + + read_ds.close() + del read_ds + + +def test_read_mmcr(): + results = act.tests.EXAMPLE_MMCR + ds = act.io.arm.read_arm_mmcr(results) + assert 'MeanDopplerVelocity_PR' in ds + assert 'SpectralWidth_BL' in ds + np.testing.assert_almost_equal( + ds['Reflectivity_GE'].mean(), -34.62, decimal=2) + np.testing.assert_almost_equal( + ds['MeanDopplerVelocity_Receiver1'].max(), 9.98, decimal=2) diff --git a/act/tests/io/test_csv.py b/act/tests/io/test_csv.py new file mode 100644 index 0000000000..aff5315112 --- /dev/null +++ b/act/tests/io/test_csv.py @@ -0,0 +1,42 @@ +import glob + +import act + + +def test_io_csv(): + headers = [ + 'day', + 'month', + 'year', + 'time', + 'pasquill', + 'wdir_60m', + 'wspd_60m', + 'wdir_60m_std', + 'temp_60m', + 'wdir_10m', + 'wspd_10m', + 'wdir_10m_std', + 'temp_10m', + 'temp_dp', + 'rh', + 'avg_temp_diff', + 'total_precip', + 'solar_rad', + 'net_rad', + 'atmos_press', + 'wv_pressure', + 'temp_soil_10cm', + 'temp_soil_100cm', + 'temp_soil_10ft', + ] + anl_ds = act.io.csv.read_csv(act.tests.EXAMPLE_ANL_CSV, sep=r'\s+', column_names=headers) + assert 'temp_60m' in anl_ds.variables.keys() + assert 'rh' in anl_ds.variables.keys() + assert anl_ds['temp_60m'].values[10] == -1.7 + anl_ds.close() + + files = glob.glob(act.tests.EXAMPLE_MET_CSV) + ds = act.io.csv.read_csv(files[0]) + assert 'date_time' in ds + assert '_datastream' in ds.attrs diff --git a/act/tests/io/test_icartt.py b/act/tests/io/test_icartt.py new file mode 100644 index 0000000000..1dacb39f2a --- /dev/null +++ b/act/tests/io/test_icartt.py @@ -0,0 +1,17 @@ + +import numpy as np +import pytest + +import act + + +@pytest.mark.skipif(not act.io.icartt._ICARTT_AVAILABLE, + reason="ICARTT is not installed.") +def test_read_icartt(): + result = act.io.icartt.read_icartt(act.tests.EXAMPLE_AAF_ICARTT) + assert 'pitch' in result + assert len(result['time'].values) == 14087 + assert result['true_airspeed'].units == 'm/s' + assert 'Revision' in result.attrs + np.testing.assert_almost_equal( + result['static_pressure'].mean(), 708.75, decimal=2) diff --git a/act/tests/io/test_mpl.py b/act/tests/io/test_mpl.py new file mode 100644 index 0000000000..115070adc0 --- /dev/null +++ b/act/tests/io/test_mpl.py @@ -0,0 +1,24 @@ +import act + + +def test_io_mpldataset(): + try: + mpl_ds = act.io.mpl.read_sigma_mplv5(act.tests.EXAMPLE_SIGMA_MPLV5) + except Exception: + return + + # Tests fields + assert 'channel_1' in mpl_ds.variables.keys() + assert 'temp_0' in mpl_ds.variables.keys() + assert mpl_ds.channel_1.values.shape == (102, 1000) + + # Tests coordinates + assert 'time' in mpl_ds.coords.keys() + assert 'range' in mpl_ds.coords.keys() + assert mpl_ds.coords['time'].values.shape == (102,) + assert mpl_ds.coords['range'].values.shape == (1000,) + assert '_arm_standards_flag' in mpl_ds.attrs.keys() + + # Tests attributes + assert '_datastream' in mpl_ds.attrs.keys() + mpl_ds.close() diff --git a/act/tests/io/test_neon.py b/act/tests/io/test_neon.py new file mode 100644 index 0000000000..7ec87edaf1 --- /dev/null +++ b/act/tests/io/test_neon.py @@ -0,0 +1,21 @@ +import glob + +import act + + +def test_read_neon(): + data_file = glob.glob(act.tests.EXAMPLE_NEON) + variable_file = glob.glob(act.tests.EXAMPLE_NEON_VARIABLE) + position_file = glob.glob(act.tests.EXAMPLE_NEON_POSITION) + + ds = act.io.neon.read_neon_csv(data_file) + assert len(ds['time'].values) == 17280 + assert 'time' in ds + assert 'tempSingleMean' in ds + assert ds['tempSingleMean'].values[0] == -0.6003 + + ds = act.io.neon.read_neon_csv(data_file, variable_files=variable_file, position_files=position_file) + assert ds['northOffset'].values == -5.79 + assert ds['tempSingleMean'].attrs['units'] == 'celsius' + assert 'lat' in ds + assert ds['lat'].values == 71.282425 diff --git a/act/tests/io/test_noaagml.py b/act/tests/io/test_noaagml.py new file mode 100644 index 0000000000..dbea952652 --- /dev/null +++ b/act/tests/io/test_noaagml.py @@ -0,0 +1,130 @@ +import numpy as np + +import act +from act.tests import sample_files +from act.io import read_gml + + +def test_read_gml(): + # Test Radiation + ds = read_gml(sample_files.EXAMPLE_GML_RADIATION, datatype='RADIATION') + assert np.isclose(np.nansum(ds['solar_zenith_angle']), 1725.28) + assert np.isclose(np.nansum(ds['upwelling_infrared_case_temp']), 4431.88) + assert ( + ds['upwelling_infrared_case_temp'].attrs['ancillary_variables'] + == 'qc_upwelling_infrared_case_temp' + ) + assert ds['qc_upwelling_infrared_case_temp'].attrs['flag_values'] == [0, 1, 2] + assert ds['qc_upwelling_infrared_case_temp'].attrs['flag_meanings'] == [ + 'Not failing any tests', + 'Knowingly bad value', + 'Should be used with scrutiny', + ] + assert ds['qc_upwelling_infrared_case_temp'].attrs['flag_assessments'] == [ + 'Good', + 'Bad', + 'Indeterminate', + ] + assert ds['time'].values[-1] == np.datetime64('2021-01-01T00:17:00') + + ds = read_gml(sample_files.EXAMPLE_GML_RADIATION, convert_missing=False) + assert np.isclose(np.nansum(ds['solar_zenith_angle']), 1725.28) + assert np.isclose(np.nansum(ds['upwelling_infrared_case_temp']), 4431.88) + assert ( + ds['upwelling_infrared_case_temp'].attrs['ancillary_variables'] + == 'qc_upwelling_infrared_case_temp' + ) + assert ds['qc_upwelling_infrared_case_temp'].attrs['flag_values'] == [0, 1, 2] + assert ds['qc_upwelling_infrared_case_temp'].attrs['flag_meanings'] == [ + 'Not failing any tests', + 'Knowingly bad value', + 'Should be used with scrutiny', + ] + assert ds['qc_upwelling_infrared_case_temp'].attrs['flag_assessments'] == [ + 'Good', + 'Bad', + 'Indeterminate', + ] + assert ds['time'].values[-1] == np.datetime64('2021-01-01T00:17:00') + + # Test MET + ds = read_gml(sample_files.EXAMPLE_GML_MET, datatype='MET') + assert np.isclose(np.nansum(ds['wind_speed'].values), 148.1) + assert ds['wind_speed'].attrs['units'] == 'm/s' + assert np.isnan(ds['wind_speed'].attrs['_FillValue']) + assert np.sum(np.isnan(ds['preciptation_intensity'].values)) == 20 + assert ds['preciptation_intensity'].attrs['units'] == 'mm/hour' + assert ds['time'].values[0] == np.datetime64('2020-01-01T00:00:00') + + ds = read_gml(sample_files.EXAMPLE_GML_MET, convert_missing=False) + assert np.isclose(np.nansum(ds['wind_speed'].values), 148.1) + assert ds['wind_speed'].attrs['units'] == 'm/s' + assert np.isclose(ds['wind_speed'].attrs['_FillValue'], -999.9) + assert np.sum(ds['preciptation_intensity'].values) == -1980 + assert ds['preciptation_intensity'].attrs['units'] == 'mm/hour' + assert ds['time'].values[0] == np.datetime64('2020-01-01T00:00:00') + + # Test Ozone + ds = read_gml(sample_files.EXAMPLE_GML_OZONE, datatype='OZONE') + assert np.isclose(np.nansum(ds['ozone'].values), 582.76) + assert ds['ozone'].attrs['long_name'] == 'Ozone' + assert ds['ozone'].attrs['units'] == 'ppb' + assert np.isnan(ds['ozone'].attrs['_FillValue']) + assert ds['time'].values[0] == np.datetime64('2020-12-01T00:00:00') + + ds = read_gml(sample_files.EXAMPLE_GML_OZONE) + assert np.isclose(np.nansum(ds['ozone'].values), 582.76) + assert ds['ozone'].attrs['long_name'] == 'Ozone' + assert ds['ozone'].attrs['units'] == 'ppb' + assert np.isnan(ds['ozone'].attrs['_FillValue']) + assert ds['time'].values[0] == np.datetime64('2020-12-01T00:00:00') + + # Test Carbon Dioxide + ds = read_gml(sample_files.EXAMPLE_GML_CO2, datatype='co2') + assert np.isclose(np.nansum(ds['co2'].values), 2307.630) + assert ( + ds['qc_co2'].values == np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=int) + ).all() + assert ds['co2'].attrs['units'] == 'ppm' + assert np.isnan(ds['co2'].attrs['_FillValue']) + assert ds['qc_co2'].attrs['flag_assessments'] == ['Bad', 'Indeterminate'] + assert ds['latitude'].attrs['standard_name'] == 'latitude' + + ds = read_gml(sample_files.EXAMPLE_GML_CO2, convert_missing=False) + assert np.isclose(np.nansum(ds['co2'].values), -3692.3098) + assert ds['co2'].attrs['_FillValue'] == -999.99 + assert ( + ds['qc_co2'].values == np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=int) + ).all() + assert ds['co2'].attrs['units'] == 'ppm' + assert np.isclose(ds['co2'].attrs['_FillValue'], -999.99) + assert ds['qc_co2'].attrs['flag_assessments'] == ['Bad', 'Indeterminate'] + assert ds['latitude'].attrs['standard_name'] == 'latitude' + + # Test Halocarbon + ds = read_gml(sample_files.EXAMPLE_GML_HALO, datatype='HALO') + assert np.isclose(np.nansum(ds['CCl4'].values), 1342.65) + assert ds['CCl4'].attrs['units'] == 'ppt' + assert ds['CCl4'].attrs['long_name'] == 'Carbon Tetrachloride (CCl4) daily median' + assert np.isnan(ds['CCl4'].attrs['_FillValue']) + assert ds['time'].values[0] == np.datetime64('1998-06-16T00:00:00') + + ds = read_gml(sample_files.EXAMPLE_GML_HALO) + assert np.isclose(np.nansum(ds['CCl4'].values), 1342.65) + assert ds['CCl4'].attrs['units'] == 'ppt' + assert ds['CCl4'].attrs['long_name'] == 'Carbon Tetrachloride (CCl4) daily median' + assert np.isnan(ds['CCl4'].attrs['_FillValue']) + assert ds['time'].values[0] == np.datetime64('1998-06-16T00:00:00') + + +def test_read_surfrad(): + url = ['https://gml.noaa.gov/aftp/data/radiation/surfrad/Boulder_CO/2023/tbl23008.dat'] + ds = act.io.noaagml.read_surfrad(url) + + assert 'qc_pressure' in ds + assert 'time' in ds + assert ds['wind_speed'].attrs['units'] == 'ms^-1' + assert len(ds) == 48 + assert ds['temperature'].values[0] == 2.0 + assert 'standard_name' in ds['temperature'].attrs + assert ds['temperature'].attrs['standard_name'] == 'air_temperature' diff --git a/act/tests/io/test_noaapsl.py b/act/tests/io/test_noaapsl.py new file mode 100644 index 0000000000..1e0c74f4c7 --- /dev/null +++ b/act/tests/io/test_noaapsl.py @@ -0,0 +1,149 @@ +import numpy as np +import pytest + +import act +from act.tests import sample_files +from act.io import read_psl_surface_met, read_psl_wind_profiler_temperature + + +def test_read_psl_wind_profiler(): + test_ds_low, test_ds_hi = act.io.noaapsl.read_psl_wind_profiler( + act.tests.EXAMPLE_NOAA_PSL, transpose=False + ) + # test dimensions + assert 'time' and 'HT' in test_ds_low.dims.keys() + assert 'time' and 'HT' in test_ds_hi.dims.keys() + assert test_ds_low.dims['time'] == 4 + assert test_ds_hi.dims['time'] == 4 + assert test_ds_low.dims['HT'] == 49 + assert test_ds_hi.dims['HT'] == 50 + + # test coordinates + assert ( + test_ds_low.coords['HT'][0:5] == np.array([0.151, 0.254, 0.356, 0.458, 0.561]) + ).all() + assert ( + test_ds_low.coords['time'][0:2] + == np.array( + ['2021-05-05T15:00:01.000000000', '2021-05-05T15:15:49.000000000'], + dtype='datetime64[ns]', + ) + ).all() + + # test attributes + assert test_ds_low.attrs['site_identifier'] == 'CTD' + assert test_ds_low.attrs['data_type'] == 'WINDS' + assert test_ds_low.attrs['revision_number'] == '5.1' + assert test_ds_low.attrs['latitude'] == 34.66 + assert test_ds_low.attrs['longitude'] == -87.35 + assert test_ds_low.attrs['elevation'] == 187.0 + assert (test_ds_low.attrs['beam_azimuth'] == np.array( + [38.0, 38.0, 308.0], dtype='float32')).all() + assert (test_ds_low.attrs['beam_elevation'] == np.array( + [90.0, 74.7, 74.7], dtype='float32')).all() + assert test_ds_low.attrs['consensus_average_time'] == 24 + assert test_ds_low.attrs['oblique-beam_vertical_correction'] == 0 + assert test_ds_low.attrs['number_of_beams'] == 3 + assert test_ds_low.attrs['number_of_range_gates'] == 49 + assert test_ds_low.attrs['number_of_gates_oblique'] == 49 + assert test_ds_low.attrs['number_of_gates_vertical'] == 49 + assert test_ds_low.attrs['number_spectral_averages_oblique'] == 50 + assert test_ds_low.attrs['number_spectral_averages_vertical'] == 50 + assert test_ds_low.attrs['pulse_width_oblique'] == 708 + assert test_ds_low.attrs['pulse_width_vertical'] == 708 + assert test_ds_low.attrs['inner_pulse_period_oblique'] == 50 + assert test_ds_low.attrs['inner_pulse_period_vertical'] == 50 + assert test_ds_low.attrs['full_scale_doppler_value_oblique'] == 20.9 + assert test_ds_low.attrs['full_scale_doppler_value_vertical'] == 20.9 + assert test_ds_low.attrs['delay_to_first_gate_oblique'] == 4000 + assert test_ds_low.attrs['delay_to_first_gate_vertical'] == 4000 + assert test_ds_low.attrs['spacing_of_gates_oblique'] == 708 + assert test_ds_low.attrs['spacing_of_gates_vertical'] == 708 + + # test fields + assert test_ds_low['RAD1'].shape == (4, 49) + assert test_ds_hi['RAD1'].shape == (4, 50) + assert (test_ds_low['RAD1'][0, 0:5] == np.array( + [0.2, 0.1, 0.1, 0.0, -0.1])).all() + assert (test_ds_hi['RAD1'][0, 0:5] == np.array( + [0.1, 0.1, -0.1, 0.0, -0.2])).all() + + assert test_ds_low['SPD'].shape == (4, 49) + assert test_ds_hi['SPD'].shape == (4, 50) + assert (test_ds_low['SPD'][0, 0:5] == np.array( + [2.5, 3.3, 4.3, 4.3, 4.8])).all() + assert (test_ds_hi['SPD'][0, 0:5] == np.array( + [3.7, 4.6, 6.3, 5.2, 6.8])).all() + + # test transpose + test_ds_low, test_ds_hi = act.io.noaapsl.read_psl_wind_profiler( + act.tests.EXAMPLE_NOAA_PSL, transpose=True + ) + assert test_ds_low['RAD1'].shape == (49, 4) + assert test_ds_hi['RAD1'].shape == (50, 4) + assert test_ds_low['SPD'].shape == (49, 4) + assert test_ds_hi['SPD'].shape == (50, 4) + test_ds_low.close() + + +def test_read_psl_wind_profiler_temperature(): + ds = read_psl_wind_profiler_temperature( + act.tests.EXAMPLE_NOAA_PSL_TEMPERATURE) + + assert ds.attrs['site_identifier'] == 'CTD' + assert ds.attrs['elevation'] == 600.0 + assert ds.T.values[0] == 33.2 + + +def test_read_psl_surface_met(): + ds = read_psl_surface_met(sample_files.EXAMPLE_NOAA_PSL_SURFACEMET) + assert ds.time.size == 2 + assert np.isclose(np.sum(ds['Pressure'].values), 1446.9) + assert np.isclose(ds['lat'].values, 38.972425) + assert ds['lat'].attrs['units'] == 'degree_N' + assert ds['Upward_Longwave_Irradiance'].attrs['long_name'] == 'Upward Longwave Irradiance' + assert ds['Upward_Longwave_Irradiance'].dtype.str == '= np.datetime64('2019-01-01 06:00:00') + ) + ds = ds.sel({'time': index}) + + index = (ds.time.values <= np.datetime64('2019-01-01 18:34:00')) | ( + ds.time.values >= np.datetime64('2019-01-01 19:06:00') + ) + ds = ds.sel({'time': index}) + + index = (ds.time.values <= np.datetime64('2019-01-01 12:30:00')) | ( + ds.time.values >= np.datetime64('2019-01-01 12:40:00') + ) + ds = ds.sel({'time': index}) + + display = TimeSeriesDisplay(ds, figsize=(15, 10), subplot_shape=(1,)) + display.plot('temp_mean', subplot_index=(0,), add_nan=True, day_night_background=True) + ds.close() + + try: + return display.fig + finally: + matplotlib.pyplot.close(display.fig) + + +@pytest.mark.mpl_image_compare(tolerance=30) +def test_timeseries_invert(): + ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_IRT25m20s) + display = TimeSeriesDisplay(ds, figsize=(10, 8)) + display.plot('inst_sfc_ir_temp', invert_y_axis=True) + ds.close() + return display.fig + + +def test_plot_time_rng(): + # Test if setting the xrange can be done with pandas or datetime datatype + # eventhough the data is numpy. Check for correctly converting xrange values + # before setting and not causing an exception. + met = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_MET1) + + # Plot data + xrng = [datetime(2019, 1, 1, 0, 0), datetime(2019, 1, 2, 0, 0)] + display = TimeSeriesDisplay(met) + display.plot('temp_mean', time_rng=xrng) + + xrng = [pd.to_datetime('2019-01-01'), pd.to_datetime('2019-01-02')] + display = TimeSeriesDisplay(met) + display.plot('temp_mean', time_rng=xrng) + + +@pytest.mark.mpl_image_compare(tolerance=30) +def test_match_ylimits_plot(): + files = sample_files.EXAMPLE_MET_WILDCARD + ds = act.io.arm.read_arm_netcdf(files) + display = act.plotting.TimeSeriesDisplay(ds, figsize=(10, 8), subplot_shape=(2, 2)) + groupby = display.group_by('day') + groupby.plot_group('plot', None, field='temp_mean', marker=' ') + groupby.display.set_yrng([-20, 20], match_axes_ylimits=True) + ds.close() + return display.fig + + +@pytest.mark.mpl_image_compare(tolerance=30) +def test_xlim_correction_plot(): + ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_MET1) + + # Plot data + xrng = [datetime(2019, 1, 1, 0, 0, 0), datetime(2019, 1, 1, 0, 0, 0)] + display = TimeSeriesDisplay(ds) + display.plot('temp_mean', time_rng=xrng) + + ds.close() + + return display.fig diff --git a/act/tests/plotting/test_windrosedisplay.py b/act/tests/plotting/test_windrosedisplay.py new file mode 100644 index 0000000000..086423208c --- /dev/null +++ b/act/tests/plotting/test_windrosedisplay.py @@ -0,0 +1,156 @@ +import matplotlib +import numpy as np +import pytest + +import act +from act.tests import sample_files +from act.plotting import WindRoseDisplay + +matplotlib.use('Agg') + + +@pytest.mark.mpl_image_compare(tolerance=30) +def test_wind_rose(): + sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_TWP_SONDE_WILDCARD) + + WindDisplay = WindRoseDisplay(sonde_ds, figsize=(10, 10)) + WindDisplay.plot( + 'deg', + 'wspd', + spd_bins=np.linspace(0, 20, 10), + num_dirs=30, + tick_interval=2, + cmap='viridis', + ) + WindDisplay.set_thetarng(trng=(0.0, 360.0)) + WindDisplay.set_rrng((0.0, 14)) + + sonde_ds.close() + + try: + return WindDisplay.fig + finally: + matplotlib.pyplot.close(WindDisplay.fig) + + +@pytest.mark.mpl_image_compare(tolerance=30) +def test_plot_datarose(): + files = sample_files.EXAMPLE_MET_WILDCARD + ds = act.io.arm.read_arm_netcdf(files) + display = act.plotting.WindRoseDisplay(ds, subplot_shape=(2, 3), figsize=(16, 10)) + display.plot_data( + 'wdir_vec_mean', + 'wspd_vec_mean', + 'temp_mean', + num_dirs=12, + plot_type='line', + subplot_index=(0, 0), + ) + display.plot_data( + 'wdir_vec_mean', + 'wspd_vec_mean', + 'temp_mean', + num_dirs=12, + plot_type='line', + subplot_index=(0, 1), + line_plot_calc='median', + ) + display.plot_data( + 'wdir_vec_mean', + 'wspd_vec_mean', + 'temp_mean', + num_dirs=12, + plot_type='line', + subplot_index=(0, 2), + line_plot_calc='stdev', + ) + display.plot_data( + 'wdir_vec_mean', + 'wspd_vec_mean', + 'temp_mean', + num_dirs=12, + plot_type='contour', + subplot_index=(1, 0), + ) + display.plot_data( + 'wdir_vec_mean', + 'wspd_vec_mean', + 'temp_mean', + num_dirs=12, + plot_type='contour', + contour_type='mean', + num_data_bins=10, + clevels=21, + cmap='rainbow', + vmin=-5, + vmax=20, + subplot_index=(1, 1), + ) + display.plot_data( + 'wdir_vec_mean', + 'wspd_vec_mean', + 'temp_mean', + num_dirs=12, + plot_type='boxplot', + subplot_index=(1, 2), + ) + + display2 = act.plotting.WindRoseDisplay( + {'ds1': ds, 'ds2': ds}, subplot_shape=(2, 3), figsize=(16, 10) + ) + with np.testing.assert_raises(ValueError): + display2.plot_data( + 'wdir_vec_mean', + 'wspd_vec_mean', + 'temp_mean', + dsname='ds1', + num_dirs=12, + plot_type='line', + line_plot_calc='T', + subplot_index=(0, 0), + ) + with np.testing.assert_raises(ValueError): + display2.plot_data( + 'wdir_vec_mean', + 'wspd_vec_mean', + 'temp_mean', + num_dirs=12, + plot_type='line', + subplot_index=(0, 0), + ) + with np.testing.assert_raises(ValueError): + display2.plot_data( + 'wdir_vec_mean', + 'wspd_vec_mean', + 'temp_mean', + num_dirs=12, + plot_type='groovy', + subplot_index=(0, 0), + ) + + return display.fig + + +@pytest.mark.mpl_image_compare(tolerance=30) +def test_groupby_plot(): + ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_MET_WILDCARD) + + # Create Plot Display + display = WindRoseDisplay(ds, figsize=(15, 15), subplot_shape=(3, 3)) + groupby = display.group_by('day') + groupby.plot_group( + 'plot_data', + None, + dir_field='wdir_vec_mean', + spd_field='wspd_vec_mean', + data_field='temp_mean', + num_dirs=12, + plot_type='line', + ) + + # Set theta tick markers for each axis inside display to be inside the polar axes + for i in range(3): + for j in range(3): + display.axes[i, j].tick_params(pad=-20) + ds.close() + return display.fig diff --git a/act/tests/plotting/test_xsectiondisplay.py b/act/tests/plotting/test_xsectiondisplay.py new file mode 100644 index 0000000000..b687da5977 --- /dev/null +++ b/act/tests/plotting/test_xsectiondisplay.py @@ -0,0 +1,75 @@ +import matplotlib +import numpy as np +import pytest + +import act +from act.tests import sample_files +from act.plotting import XSectionDisplay + +try: + import cartopy + + CARTOPY_AVAILABLE = True +except ImportError: + CARTOPY_AVAILABLE = False + +matplotlib.use('Agg') + + +def test_xsection_errors(): + ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_CEIL1) + + display = XSectionDisplay(ds, figsize=(10, 8), subplot_shape=(2,)) + display.axes = None + with np.testing.assert_raises(RuntimeError): + display.set_yrng([0, 10]) + with np.testing.assert_raises(RuntimeError): + display.set_xrng([-40, 40]) + + display = XSectionDisplay(ds, figsize=(10, 8), subplot_shape=(1,)) + with np.testing.assert_raises(RuntimeError): + display.plot_xsection(None, 'backscatter', x='time', cmap='HomeyerRainbow') + + ds.close() + matplotlib.pyplot.close(fig=display.fig) + + +@pytest.mark.mpl_image_compare(tolerance=31) +def test_xsection_plot(): + visst_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_CEIL1) + + xsection = XSectionDisplay(visst_ds, figsize=(10, 8)) + xsection.plot_xsection( + None, 'backscatter', x='time', y='range', cmap='coolwarm', vmin=0, vmax=320 + ) + visst_ds.close() + + try: + return xsection.fig + finally: + matplotlib.pyplot.close(xsection.fig) + + +@pytest.mark.skipif(not CARTOPY_AVAILABLE, reason='Cartopy is not installed.') +@pytest.mark.mpl_image_compare(tolerance=30) +def test_xsection_plot_map(): + radar_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_VISST, combine='nested', concat_dim='time') + try: + xsection = XSectionDisplay(radar_ds, figsize=(15, 8)) + xsection.plot_xsection_map( + None, + 'ir_temperature', + vmin=220, + vmax=300, + cmap='Greys', + x='longitude', + y='latitude', + isel_kwargs={'time': 0}, + ) + radar_ds.close() + try: + return xsection.fig + finally: + matplotlib.pyplot.close(xsection.fig) + except Exception: + pass diff --git a/act/tests/qc/test_add_supplemental_qc.py b/act/tests/qc/test_add_supplemental_qc.py new file mode 100644 index 0000000000..d1c9883bac --- /dev/null +++ b/act/tests/qc/test_add_supplemental_qc.py @@ -0,0 +1,57 @@ +from pathlib import Path + +import numpy as np + +from act.io.arm import read_arm_netcdf +from act.tests import EXAMPLE_MET1, EXAMPLE_MET_YAML +from act.qc.add_supplemental_qc import read_yaml_supplemental_qc, apply_supplemental_qc + + +def test_read_yaml_supplemental_qc(): + ds = read_arm_netcdf(EXAMPLE_MET1, keep_variables=['temp_mean', 'qc_temp_mean'], cleanup_qc=True) + + result = read_yaml_supplemental_qc(ds, EXAMPLE_MET_YAML) + assert isinstance(result, dict) + assert len(result.keys()) == 3 + + result = read_yaml_supplemental_qc(ds, Path(EXAMPLE_MET_YAML).parent, variables='temp_mean', + assessments=['Bad', 'Incorrect', 'Suspect']) + assert len(result.keys()) == 2 + assert sorted(result['temp_mean'].keys()) == ['Bad', 'Suspect'] + + result = read_yaml_supplemental_qc(ds, 'sgpmetE13.b1.yaml', quiet=True) + assert result is None + + apply_supplemental_qc(ds, EXAMPLE_MET_YAML) + assert ds['qc_temp_mean'].attrs['flag_masks'] == [1, 2, 4, 8, 16, 32, 64, 128, 256] + assert ds['qc_temp_mean'].attrs['flag_assessments'] == [ + 'Bad', 'Bad', 'Bad', 'Indeterminate', 'Bad', 'Bad', 'Suspect', 'Good', 'Bad'] + assert ds['qc_temp_mean'].attrs['flag_meanings'][0] == 'Value is equal to missing_value.' + assert ds['qc_temp_mean'].attrs['flag_meanings'][-1] == 'Values are bad for all' + assert ds['qc_temp_mean'].attrs['flag_meanings'][-2] == 'Values are good' + assert np.sum(ds['qc_temp_mean'].values) == 81344 + assert np.count_nonzero(ds['qc_temp_mean'].values) == 1423 + + del ds + + ds = read_arm_netcdf(EXAMPLE_MET1, keep_variables=['temp_mean', 'qc_temp_mean'], cleanup_qc=True) + apply_supplemental_qc(ds, Path(EXAMPLE_MET_YAML).parent, apply_all=False) + assert ds['qc_temp_mean'].attrs['flag_masks'] == [1, 2, 4, 8, 16, 32, 64, 128] + + ds = read_arm_netcdf(EXAMPLE_MET1, cleanup_qc=True) + apply_supplemental_qc(ds, Path(EXAMPLE_MET_YAML).parent, exclude_all_variables='temp_mean') + assert ds['qc_rh_mean'].attrs['flag_masks'] == [1, 2, 4, 8, 16, 32, 64, 128] + assert 'Values are bad for all' in ds['qc_rh_mean'].attrs['flag_meanings'] + assert 'Values are bad for all' not in ds['qc_temp_mean'].attrs['flag_meanings'] + + del ds + + ds = read_arm_netcdf(EXAMPLE_MET1, keep_variables=['temp_mean', 'rh_mean']) + apply_supplemental_qc(ds, Path(EXAMPLE_MET_YAML).parent, exclude_all_variables='temp_mean', + assessments='Bad', quiet=True) + assert ds['qc_rh_mean'].attrs['flag_assessments'] == ['Bad'] + assert ds['qc_temp_mean'].attrs['flag_assessments'] == ['Bad', 'Bad'] + assert np.sum(ds['qc_rh_mean'].values) == 124 + assert np.sum(ds['qc_temp_mean'].values) == 2840 + + del ds diff --git a/act/tests/qc/test_arm_qc.py b/act/tests/qc/test_arm_qc.py new file mode 100644 index 0000000000..e118648706 --- /dev/null +++ b/act/tests/qc/test_arm_qc.py @@ -0,0 +1,33 @@ +import numpy as np + +from act.io.arm import read_arm_netcdf +from act.qc.arm import add_dqr_to_qc +from act.tests import EXAMPLE_ENA_MET, EXAMPLE_OLD_QC + + +def test_scalar_dqr(): + # Test DQR Webservice using known DQR + ds = read_arm_netcdf(EXAMPLE_ENA_MET) + + # DQR webservice does go down, so ensure it + # properly runs first before testing + try: + ds = add_dqr_to_qc(ds) + ran = True + except ValueError: + ran = False + + if ran: + assert 'qc_lat' in ds + assert np.size(ds['qc_lon'].values) == 1 + assert np.size(ds['qc_lat'].values) == 1 + assert np.size(ds['qc_alt'].values) == 1 + assert np.size(ds['base_time'].values) == 1 + + +def test_get_attr_info(): + ds = read_arm_netcdf(EXAMPLE_OLD_QC, cleanup_qc=True) + assert 'flag_assessments' in ds['qc_lv'].attrs + assert 'fail_min' in ds['qc_lv'].attrs + assert ds['qc_lv'].attrs['flag_assessments'][0] == 'Bad' + assert ds['qc_lv'].attrs['flag_masks'][-1] == 4 diff --git a/act/tests/qc/test_bsrn_tests.py b/act/tests/qc/test_bsrn_tests.py new file mode 100644 index 0000000000..c54773e705 --- /dev/null +++ b/act/tests/qc/test_bsrn_tests.py @@ -0,0 +1,251 @@ +import copy +import dask.array as da +import numpy as np +import xarray as xr + +from act.io.arm import read_arm_netcdf +from act.tests import EXAMPLE_BRS +from act.qc.bsrn_tests import _calculate_solar_parameters + + +def test_bsrn_limits_test(): + for use_dask in [False, True]: + ds = read_arm_netcdf(EXAMPLE_BRS) + var_names = list(ds.data_vars) + # Remove QC variables to make testing easier + for var_name in var_names: + if var_name.startswith('qc_'): + del ds[var_name] + + # Add atmospheric temperature fake data + ds['temp_mean'] = xr.DataArray( + data=np.full(ds.time.size, 13.5), dims=['time'], + attrs={'long_name': 'Atmospheric air temperature', 'units': 'degC'}) + + # Make a short direct variable since BRS does not have one + ds['short_direct'] = copy.deepcopy(ds['short_direct_normal']) + ds['short_direct'].attrs['ancillary_variables'] = 'qc_short_direct' + ds['short_direct'].attrs['long_name'] = 'Shortwave direct irradiance, pyrheliometer' + _, _ = _calculate_solar_parameters(ds, 'lat', 'lon', 1360.8) + ds['short_direct'].data = ds['short_direct'].data * .5 + + # Make up long variable since BRS does not have values + ds['up_long_hemisp'].data = copy.deepcopy(ds['down_long_hemisp_shaded'].data) + data = copy.deepcopy(ds['down_short_hemisp'].data) + ds['up_short_hemisp'].data = data + + # Test that nothing happens when no variable names are provided + ds.qcfilter.bsrn_limits_test() + + # Mess with data to get tests to trip + data = ds['down_short_hemisp'].values + data[200:300] -= 10 + data[800:850] += 330 + data[1340:1380] += 600 + ds['down_short_hemisp'].data = da.from_array(data) + + data = ds['down_short_diffuse_hemisp'].values + data[200:250] = data[200:250] - 1.9 + data[250:300] = data[250:300] - 3.9 + data[800:850] += 330 + data[1340:1380] += 600 + ds['down_short_diffuse_hemisp'].data = da.from_array(data) + + data = ds['short_direct_normal'].values + data[200:250] = data[200:250] - 1.9 + data[250:300] = data[250:300] - 3.9 + data[800:850] += 600 + data[1340:1380] += 800 + ds['short_direct_normal'].data = da.from_array(data) + + data = ds['short_direct'].values + data[200:250] = data[200:250] - 1.9 + data[250:300] = data[250:300] - 3.9 + data[800:850] += 300 + data[1340:1380] += 800 + ds['short_direct'].data = da.from_array(data) + + data = ds['down_long_hemisp_shaded'].values + data[200:250] = data[200:250] - 355 + data[250:300] = data[250:300] - 400 + data[800:850] += 200 + data[1340:1380] += 400 + ds['down_long_hemisp_shaded'].data = da.from_array(data) + + data = ds['up_long_hemisp'].values + data[200:250] = data[200:250] - 355 + data[250:300] = data[250:300] - 400 + data[800:850] += 300 + data[1340:1380] += 500 + ds['up_long_hemisp'].data = da.from_array(data) + + ds.qcfilter.bsrn_limits_test( + gbl_SW_dn_name='down_short_hemisp', + glb_diffuse_SW_dn_name='down_short_diffuse_hemisp', + direct_normal_SW_dn_name='short_direct_normal', + glb_SW_up_name='up_short_hemisp', + glb_LW_dn_name='down_long_hemisp_shaded', + glb_LW_up_name='up_long_hemisp', + direct_SW_dn_name='short_direct', + use_dask=use_dask) + + assert ds['qc_down_short_hemisp'].attrs['flag_masks'] == [1, 2] + assert ds['qc_down_short_hemisp'].attrs['flag_meanings'][-2] == \ + 'Value less than BSRN physically possible limit of -4.0 W/m^2' + assert ds['qc_down_short_hemisp'].attrs['flag_meanings'][-1] == \ + 'Value greater than BSRN physically possible limit' + + assert ds['qc_down_short_diffuse_hemisp'].attrs['flag_masks'] == [1, 2] + assert ds['qc_down_short_diffuse_hemisp'].attrs['flag_assessments'] == ['Bad', 'Bad'] + + assert ds['qc_short_direct'].attrs['flag_masks'] == [1, 2] + assert ds['qc_short_direct'].attrs['flag_assessments'] == ['Bad', 'Bad'] + assert ds['qc_short_direct'].attrs['flag_meanings'] == \ + ['Value less than BSRN physically possible limit of -4.0 W/m^2', + 'Value greater than BSRN physically possible limit'] + + assert ds['qc_short_direct_normal'].attrs['flag_masks'] == [1, 2] + assert ds['qc_short_direct_normal'].attrs['flag_meanings'][-1] == \ + 'Value greater than BSRN physically possible limit' + + assert ds['qc_down_short_hemisp'].attrs['flag_masks'] == [1, 2] + assert ds['qc_down_short_hemisp'].attrs['flag_meanings'][-1] == \ + 'Value greater than BSRN physically possible limit' + + assert ds['qc_up_short_hemisp'].attrs['flag_masks'] == [1, 2] + assert ds['qc_up_short_hemisp'].attrs['flag_meanings'][-1] == \ + 'Value greater than BSRN physically possible limit' + + assert ds['qc_up_long_hemisp'].attrs['flag_masks'] == [1, 2] + assert ds['qc_up_long_hemisp'].attrs['flag_meanings'][-1] == \ + 'Value greater than BSRN physically possible limit of 900.0 W/m^2' + + ds.qcfilter.bsrn_limits_test( + test="Extremely Rare", + gbl_SW_dn_name='down_short_hemisp', + glb_diffuse_SW_dn_name='down_short_diffuse_hemisp', + direct_normal_SW_dn_name='short_direct_normal', + glb_SW_up_name='up_short_hemisp', + glb_LW_dn_name='down_long_hemisp_shaded', + glb_LW_up_name='up_long_hemisp', + direct_SW_dn_name='short_direct', + use_dask=use_dask) + + assert ds['qc_down_short_hemisp'].attrs['flag_masks'] == [1, 2, 4, 8] + assert ds['qc_down_short_diffuse_hemisp'].attrs['flag_masks'] == [1, 2, 4, 8] + assert ds['qc_short_direct'].attrs['flag_masks'] == [1, 2, 4, 8] + assert ds['qc_short_direct_normal'].attrs['flag_masks'] == [1, 2, 4, 8] + assert ds['qc_up_short_hemisp'].attrs['flag_masks'] == [1, 2, 4, 8] + assert ds['qc_up_long_hemisp'].attrs['flag_masks'] == [1, 2, 4, 8] + + assert ds['qc_up_long_hemisp'].attrs['flag_meanings'][-1] == \ + 'Value greater than BSRN extremely rare limit of 700.0 W/m^2' + + assert ds['qc_down_long_hemisp_shaded'].attrs['flag_meanings'][-1] == \ + 'Value greater than BSRN extremely rare limit of 500.0 W/m^2' + + # down_short_hemisp + result = ds.qcfilter.get_qc_test_mask('down_short_hemisp', test_number=1) + assert np.sum(result) == 100 + result = ds.qcfilter.get_qc_test_mask('down_short_hemisp', test_number=2) + assert np.sum(result) == 26 + result = ds.qcfilter.get_qc_test_mask('down_short_hemisp', test_number=3) + assert np.sum(result) == 337 + result = ds.qcfilter.get_qc_test_mask('down_short_hemisp', test_number=4) + assert np.sum(result) == 66 + + # down_short_diffuse_hemisp + result = ds.qcfilter.get_qc_test_mask('down_short_diffuse_hemisp', test_number=1) + assert np.sum(result) == 50 + result = ds.qcfilter.get_qc_test_mask('down_short_diffuse_hemisp', test_number=2) + assert np.sum(result) == 56 + result = ds.qcfilter.get_qc_test_mask('down_short_diffuse_hemisp', test_number=3) + assert np.sum(result) == 100 + result = ds.qcfilter.get_qc_test_mask('down_short_diffuse_hemisp', test_number=4) + assert np.sum(result) == 90 + + # short_direct_normal + result = ds.qcfilter.get_qc_test_mask('short_direct_normal', test_number=1) + assert np.sum(result) == 46 + result = ds.qcfilter.get_qc_test_mask('short_direct_normal', test_number=2) + assert np.sum(result) == 26 + result = ds.qcfilter.get_qc_test_mask('short_direct_normal', test_number=3) + assert np.sum(result) == 94 + result = ds.qcfilter.get_qc_test_mask('short_direct_normal', test_number=4) + assert np.sum(result) == 38 + + # short_direct_normal + result = ds.qcfilter.get_qc_test_mask('short_direct', test_number=1) + assert np.sum(result) == 41 + result = ds.qcfilter.get_qc_test_mask('short_direct', test_number=2) + assert np.sum(result) == 607 + result = ds.qcfilter.get_qc_test_mask('short_direct', test_number=3) + assert np.sum(result) == 89 + result = ds.qcfilter.get_qc_test_mask('short_direct', test_number=4) + assert np.sum(result) == 79 + + # down_long_hemisp_shaded + result = ds.qcfilter.get_qc_test_mask('down_long_hemisp_shaded', test_number=1) + assert np.sum(result) == 50 + result = ds.qcfilter.get_qc_test_mask('down_long_hemisp_shaded', test_number=2) + assert np.sum(result) == 40 + result = ds.qcfilter.get_qc_test_mask('down_long_hemisp_shaded', test_number=3) + assert np.sum(result) == 89 + result = ds.qcfilter.get_qc_test_mask('down_long_hemisp_shaded', test_number=4) + assert np.sum(result) == 90 + + # up_long_hemisp + result = ds.qcfilter.get_qc_test_mask('up_long_hemisp', test_number=1) + assert np.sum(result) == 50 + result = ds.qcfilter.get_qc_test_mask('up_long_hemisp', test_number=2) + assert np.sum(result) == 40 + result = ds.qcfilter.get_qc_test_mask('up_long_hemisp', test_number=3) + assert np.sum(result) == 89 + result = ds.qcfilter.get_qc_test_mask('up_long_hemisp', test_number=4) + assert np.sum(result) == 90 + + # Change data values to trip tests + ds['down_short_diffuse_hemisp'].values[0:100] = \ + ds['down_short_diffuse_hemisp'].values[0:100] + 100 + ds['up_long_hemisp'].values[0:100] = \ + ds['up_long_hemisp'].values[0:100] - 200 + + ds.qcfilter.bsrn_comparison_tests( + ['Global over Sum SW Ratio', 'Diffuse Ratio', 'SW up', 'LW down to air temp', + 'LW up to air temp', 'LW down to LW up'], + gbl_SW_dn_name='down_short_hemisp', + glb_diffuse_SW_dn_name='down_short_diffuse_hemisp', + direct_normal_SW_dn_name='short_direct_normal', + glb_SW_up_name='up_short_hemisp', + glb_LW_dn_name='down_long_hemisp_shaded', + glb_LW_up_name='up_long_hemisp', + air_temp_name='temp_mean', + test_assessment='Indeterminate', + lat_name='lat', + lon_name='lon', + use_dask=use_dask + ) + + # Ratio of Global over Sum SW + result = ds.qcfilter.get_qc_test_mask('down_short_hemisp', test_number=5) + assert np.sum(result) == 190 + + # Diffuse Ratio + result = ds.qcfilter.get_qc_test_mask('down_short_hemisp', test_number=6) + assert np.sum(result) == 47 + + # Shortwave up comparison + result = ds.qcfilter.get_qc_test_mask('up_short_hemisp', test_number=5) + assert np.sum(result) == 226 + + # Longwave up to air temperature comparison + result = ds.qcfilter.get_qc_test_mask('up_long_hemisp', test_number=5) + assert np.sum(result) == 290 + + # Longwave down to air temperature compaison + result = ds.qcfilter.get_qc_test_mask('down_long_hemisp_shaded', test_number=5) + assert np.sum(result) == 976 + + # Lonwave down to longwave up comparison + result = ds.qcfilter.get_qc_test_mask('down_long_hemisp_shaded', test_number=6) + assert np.sum(result) == 100 diff --git a/act/tests/qc/test_clean.py b/act/tests/qc/test_clean.py new file mode 100644 index 0000000000..cdbde35966 --- /dev/null +++ b/act/tests/qc/test_clean.py @@ -0,0 +1,160 @@ +import numpy as np + +from act.io.arm import read_arm_netcdf +from act.tests import ( + EXAMPLE_CEIL1, + EXAMPLE_CO2FLX4M, + EXAMPLE_MET1, +) + + +def test_global_qc_cleanup(): + ds = read_arm_netcdf(EXAMPLE_MET1) + ds.load() + ds.clean.cleanup() + + assert ds['qc_wdir_vec_mean'].attrs['flag_meanings'] == [ + 'Value is equal to missing_value.', + 'Value is less than the fail_min.', + 'Value is greater than the fail_max.', + ] + assert ds['qc_wdir_vec_mean'].attrs['flag_masks'] == [1, 2, 4] + assert ds['qc_wdir_vec_mean'].attrs['flag_assessments'] == [ + 'Bad', + 'Bad', + 'Bad', + ] + + assert ds['qc_temp_mean'].attrs['flag_meanings'] == [ + 'Value is equal to missing_value.', + 'Value is less than the fail_min.', + 'Value is greater than the fail_max.', + 'Difference between current and previous values exceeds fail_delta.', + ] + assert ds['qc_temp_mean'].attrs['flag_masks'] == [1, 2, 4, 8] + assert ds['qc_temp_mean'].attrs['flag_assessments'] == [ + 'Bad', + 'Bad', + 'Bad', + 'Indeterminate', + ] + + ds.close() + del ds + + +def test_clean(): + # Read test data + ceil_ds = read_arm_netcdf([EXAMPLE_CEIL1]) + # Cleanup QC data + ceil_ds.clean.cleanup(clean_arm_state_vars=['detection_status']) + + # Check that global attribures are removed + global_attributes = [ + 'qc_bit_comment', + 'qc_bit_1_description', + 'qc_bit_1_assessment', + 'qc_bit_2_description', + 'qc_bit_2_assessment' 'qc_bit_3_description', + 'qc_bit_3_assessment', + ] + + for glb_att in global_attributes: + assert glb_att not in ceil_ds.attrs.keys() + + # Check that CF attributes are set including new flag_assessments + var_name = 'qc_first_cbh' + for attr_name in ['flag_masks', 'flag_meanings', 'flag_assessments']: + assert attr_name in ceil_ds[var_name].attrs.keys() + assert isinstance(ceil_ds[var_name].attrs[attr_name], list) + + # Check that the flag_mask values are set correctly + assert ceil_ds['qc_first_cbh'].attrs['flag_masks'] == [1, 2, 4] + + # Check that the flag_meanings values are set correctly + assert ceil_ds['qc_first_cbh'].attrs['flag_meanings'] == [ + 'Value is equal to missing_value.', + 'Value is less than the fail_min.', + 'Value is greater than the fail_max.', + ] + + # Check the value of flag_assessments is as expected + assert ceil_ds['qc_first_cbh'].attrs['flag_assessments'] == ['Bad', 'Bad', 'Bad'] + + # Check that ancillary varibles is being added + assert 'qc_first_cbh' in ceil_ds['first_cbh'].attrs['ancillary_variables'].split() + + # Check that state field is updated to CF + assert 'flag_values' in ceil_ds['detection_status'].attrs.keys() + assert isinstance(ceil_ds['detection_status'].attrs['flag_values'], list) + assert ceil_ds['detection_status'].attrs['flag_values'] == [0, 1, 2, 3, 4, 5] + + assert 'flag_meanings' in ceil_ds['detection_status'].attrs.keys() + assert isinstance(ceil_ds['detection_status'].attrs['flag_meanings'], list) + assert ceil_ds['detection_status'].attrs['flag_meanings'] == [ + 'No significant backscatter', + 'One cloud base detected', + 'Two cloud bases detected', + 'Three cloud bases detected', + 'Full obscuration determined but no cloud base detected', + 'Some obscuration detected but determined to be transparent', + ] + + assert 'flag_0_description' not in ceil_ds['detection_status'].attrs.keys() + assert 'detection_status' in ceil_ds['first_cbh'].attrs['ancillary_variables'].split() + + ceil_ds.close() + + +def test_qc_remainder(): + ds = read_arm_netcdf(EXAMPLE_MET1) + assert ds.clean.get_attr_info(variable='bad_name') is None + del ds.attrs['qc_bit_comment'] + assert isinstance(ds.clean.get_attr_info(), dict) + ds.attrs['qc_flag_comment'] = 'testing' + ds.close() + + ds = read_arm_netcdf(EXAMPLE_MET1) + ds.clean.cleanup(normalize_assessment=True) + ds['qc_atmos_pressure'].attrs['units'] = 'testing' + del ds['qc_temp_mean'].attrs['units'] + del ds['qc_temp_mean'].attrs['flag_masks'] + ds.clean.handle_missing_values() + ds.close() + + ds = read_arm_netcdf(EXAMPLE_MET1) + ds.attrs['qc_bit_1_comment'] = 'tesing' + data = ds['qc_atmos_pressure'].values.astype(np.int64) + data[0] = 2**32 + ds['qc_atmos_pressure'].values = data + ds.clean.get_attr_info(variable='qc_atmos_pressure') + ds.clean.clean_arm_state_variables('testname') + ds.clean.cleanup() + ds['qc_atmos_pressure'].attrs['standard_name'] = 'wrong_name' + ds.clean.link_variables() + assert ds['qc_atmos_pressure'].attrs['standard_name'] == 'quality_flag' + ds.close() + + +def test_qc_flag_description(): + """ + This will check if the cleanup() method will correctly convert convert + flag_#_description to CF flag_masks and flag_meanings. + + """ + + ds = read_arm_netcdf(EXAMPLE_CO2FLX4M) + ds.clean.cleanup() + qc_var_name = ds.qcfilter.check_for_ancillary_qc( + 'momentum_flux', add_if_missing=False, cleanup=False + ) + + assert isinstance(ds[qc_var_name].attrs['flag_masks'], list) + assert isinstance(ds[qc_var_name].attrs['flag_meanings'], list) + assert isinstance(ds[qc_var_name].attrs['flag_assessments'], list) + assert ds[qc_var_name].attrs['standard_name'] == 'quality_flag' + + assert len(ds[qc_var_name].attrs['flag_masks']) == 9 + unique_flag_assessments = list({'Acceptable', 'Indeterminate', 'Bad'}) + for f in list(set(ds[qc_var_name].attrs['flag_assessments'])): + assert f in unique_flag_assessments diff --git a/act/tests/qc/test_comparison_tests.py b/act/tests/qc/test_comparison_tests.py new file mode 100644 index 0000000000..98b8149420 --- /dev/null +++ b/act/tests/qc/test_comparison_tests.py @@ -0,0 +1,97 @@ +import copy + +import numpy as np + +from act.io.arm import read_arm_netcdf +from act.tests import EXAMPLE_MET1 + + +def test_compare_time_series_trends(): + drop_vars = [ + 'base_time', + 'time_offset', + 'atmos_pressure', + 'qc_atmos_pressure', + 'temp_std', + 'rh_mean', + 'qc_rh_mean', + 'rh_std', + 'vapor_pressure_mean', + 'qc_vapor_pressure_mean', + 'vapor_pressure_std', + 'wspd_arith_mean', + 'qc_wspd_arith_mean', + 'wspd_vec_mean', + 'qc_wspd_vec_mean', + 'wdir_vec_mean', + 'qc_wdir_vec_mean', + 'wdir_vec_std', + 'tbrg_precip_total', + 'qc_tbrg_precip_total', + 'tbrg_precip_total_corr', + 'qc_tbrg_precip_total_corr', + 'org_precip_rate_mean', + 'qc_org_precip_rate_mean', + 'pwd_err_code', + 'pwd_mean_vis_1min', + 'qc_pwd_mean_vis_1min', + 'pwd_mean_vis_10min', + 'qc_pwd_mean_vis_10min', + 'pwd_pw_code_inst', + 'qc_pwd_pw_code_inst', + 'pwd_pw_code_15min', + 'qc_pwd_pw_code_15min', + 'pwd_pw_code_1hr', + 'qc_pwd_pw_code_1hr', + 'pwd_precip_rate_mean_1min', + 'qc_pwd_precip_rate_mean_1min', + 'pwd_cumul_rain', + 'qc_pwd_cumul_rain', + 'pwd_cumul_snow', + 'qc_pwd_cumul_snow', + 'logger_volt', + 'qc_logger_volt', + 'logger_temp', + 'qc_logger_temp', + 'lat', + 'lon', + 'alt', + ] + ds = read_arm_netcdf(EXAMPLE_MET1, drop_variables=drop_vars) + ds.clean.cleanup() + ds2 = copy.deepcopy(ds) + + var_name = 'temp_mean' + qc_var_name = ds.qcfilter.check_for_ancillary_qc( + var_name, add_if_missing=False, cleanup=False, flag_type=False + ) + ds.qcfilter.compare_time_series_trends( + var_name=var_name, + time_shift=60, + comp_var_name=var_name, + comp_dataset=ds2, + time_qc_threshold=60 * 10, + ) + + test_description = ( + 'Time shift detected with Minimum Difference test. Comparison of ' + 'temp_mean with temp_mean off by 0 seconds exceeding absolute ' + 'threshold of 600 seconds.' + ) + assert ds[qc_var_name].attrs['flag_meanings'][-1] == test_description + + time = ds2['time'].values + np.timedelta64(1, 'h') + time_attrs = ds2['time'].attrs + ds2 = ds2.assign_coords({'time': time}) + ds2['time'].attrs = time_attrs + + ds.qcfilter.compare_time_series_trends( + var_name=var_name, comp_dataset=ds2, time_step=60, time_match_threshhold=50 + ) + + test_description = ( + 'Time shift detected with Minimum Difference test. Comparison of ' + 'temp_mean with temp_mean off by 3600 seconds exceeding absolute ' + 'threshold of 900 seconds.' + ) + assert ds[qc_var_name].attrs['flag_meanings'][-1] == test_description diff --git a/act/tests/qc/test_qcfilter.py b/act/tests/qc/test_qcfilter.py new file mode 100644 index 0000000000..a62bcff04b --- /dev/null +++ b/act/tests/qc/test_qcfilter.py @@ -0,0 +1,475 @@ +import copy +from datetime import datetime + +import dask.array as da +import numpy as np +import pandas as pd +import pytest +import xarray as xr + +from act.io.arm import read_arm_netcdf +from act.qc.arm import add_dqr_to_qc +from act.qc.qcfilter import parse_bit, set_bit, unset_bit +from act.tests import ( + EXAMPLE_MET1, + EXAMPLE_METE40, + EXAMPLE_IRT25m20s, +) + +try: + import scikit_posthocs + SCIKIT_POSTHOCS_AVAILABLE = True +except ImportError: + SCIKIT_POSTHOCS_AVAILABLE = False + + +def test_qc_test_errors(): + ds = read_arm_netcdf(EXAMPLE_MET1) + var_name = 'temp_mean' + + assert ds.qcfilter.add_less_test(var_name, None) is None + assert ds.qcfilter.add_greater_test(var_name, None) is None + assert ds.qcfilter.add_less_equal_test(var_name, None) is None + assert ds.qcfilter.add_equal_to_test(var_name, None) is None + assert ds.qcfilter.add_not_equal_to_test(var_name, None) is None + + +def test_arm_qc(): + # Test DQR Webservice using known DQR + variable = 'wspd_vec_mean' + ds = read_arm_netcdf(EXAMPLE_METE40) + ds_org = copy.deepcopy(ds) + qc_variable = ds.qcfilter.check_for_ancillary_qc(variable) + + # DQR webservice does go down, so ensure it properly runs first before testing + try: + ds = add_dqr_to_qc(ds) + + except ValueError: + return + + assert 'Suspect' not in ds[qc_variable].attrs['flag_assessments'] + assert 'Incorrect' not in ds[qc_variable].attrs['flag_assessments'] + assert 'Bad' in ds[qc_variable].attrs['flag_assessments'] + assert 'Indeterminate' in ds[qc_variable].attrs['flag_assessments'] + + # Check that defualt will update all variables in DQR + for var_name in ['wdir_vec_mean', 'wdir_vec_std', 'wspd_arith_mean', 'wspd_vec_mean']: + qc_var = ds.qcfilter.check_for_ancillary_qc(var_name) + assert ds[qc_var].attrs['flag_meanings'][-1].startswith('D190529.4') + + # Check that variable keyword works as expected. + ds = copy.deepcopy(ds_org) + add_dqr_to_qc(ds, variable=variable) + qc_var = ds.qcfilter.check_for_ancillary_qc(variable) + assert ds[qc_var].attrs['flag_meanings'][-1].startswith('D190529.4') + qc_var = ds.qcfilter.check_for_ancillary_qc('wdir_vec_std') + assert len(ds[qc_var].attrs['flag_masks']) == 0 + + # Check that include and exclude keywords work as expected + ds = copy.deepcopy(ds_org) + add_dqr_to_qc(ds, variable=variable, exclude=['D190529.4']) + assert len(ds[qc_variable].attrs['flag_meanings']) == 4 + add_dqr_to_qc(ds, variable=variable, include=['D400101.1']) + assert len(ds[qc_variable].attrs['flag_meanings']) == 4 + add_dqr_to_qc(ds, variable=variable, include=['D190529.4']) + assert len(ds[qc_variable].attrs['flag_meanings']) == 5 + add_dqr_to_qc(ds, variable=variable, assessment='Incorrect') + assert len(ds[qc_variable].attrs['flag_meanings']) == 5 + + # Test additional keywords + add_dqr_to_qc(ds, variable=variable, assessment='Suspect', cleanup_qc=False, + dqr_link=True, skip_location_vars=True) + assert len(ds[qc_variable].attrs['flag_meanings']) == 6 + + # Default is to normalize assessment terms. Check that we can turn off. + add_dqr_to_qc(ds, variable=variable, normalize_assessment=False) + assert 'Suspect' in ds[qc_variable].attrs['flag_assessments'] + + # Test that an error is raised when no datastream global attributes + with np.testing.assert_raises(ValueError): + ds4 = copy.deepcopy(ds) + del ds4.attrs['datastream'] + del ds4.attrs['_datastream'] + add_dqr_to_qc(ds4, variable=variable) + + +def test_qcfilter(): + ds = read_arm_netcdf(EXAMPLE_IRT25m20s) + var_name = 'inst_up_long_dome_resist' + expected_qc_var_name = 'qc_' + var_name + + ds.qcfilter.check_for_ancillary_qc( + var_name, add_if_missing=True, cleanup=False, flag_type=False + ) + assert expected_qc_var_name in list(ds.keys()) + del ds[expected_qc_var_name] + + # Perform adding of quality control variables to Xarray dataset + result = ds.qcfilter.add_test(var_name, test_meaning='Birds!') + assert isinstance(result, dict) + qc_var_name = result['qc_variable_name'] + assert qc_var_name == expected_qc_var_name + + # Check that new linking and describing attributes are set + assert ds[qc_var_name].attrs['standard_name'] == 'quality_flag' + assert ds[var_name].attrs['ancillary_variables'] == qc_var_name + + # Check that CF attributes are set including new flag_assessments + assert 'flag_masks' in ds[qc_var_name].attrs.keys() + assert 'flag_meanings' in ds[qc_var_name].attrs.keys() + assert 'flag_assessments' in ds[qc_var_name].attrs.keys() + + # Check that the values of the attributes are set correctly + assert ds[qc_var_name].attrs['flag_assessments'][0] == 'Bad' + assert ds[qc_var_name].attrs['flag_meanings'][0] == 'Birds!' + assert ds[qc_var_name].attrs['flag_masks'][0] == 1 + + # Set some test values + index = [0, 1, 2, 30] + ds.qcfilter.set_test(var_name, index=index, test_number=result['test_number']) + + # Add a new test and set values + index2 = [6, 7, 8, 50] + ds.qcfilter.add_test( + var_name, + index=index2, + test_number=9, + test_meaning='testing high number', + test_assessment='Suspect', + ) + + # Retrieve data from Xarray dataset as numpy masked array. Count number of masked + # elements and ensure equal to size of index array. + data = ds.qcfilter.get_masked_data(var_name, rm_assessments='Bad') + assert np.ma.count_masked(data) == len(index) + + data = ds.qcfilter.get_masked_data( + var_name, rm_assessments='Suspect', return_nan_array=True + ) + assert np.sum(np.isnan(data)) == len(index2) + + data = ds.qcfilter.get_masked_data( + var_name, rm_assessments=['Bad', 'Suspect'], ma_fill_value=np.nan + ) + assert np.ma.count_masked(data) == len(index + index2) + + # Test internal function for returning the index array of where the + # tests are set. + assert ( + np.sum( + ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) + - np.array(index, dtype=int) + ) + == 0 + ) + + # Test adding QC for length-1 variables + ds['west'] = ('west', ['W']) + ds['avg_wind_speed'] = ('west', [20]) + + # Should not fail the test + ds.qcfilter.add_test( + 'avg_wind_speed', + index=ds.avg_wind_speed.data > 100, + test_meaning='testing bool flag: false', + test_assessment='Suspect', + ) + assert ds.qc_avg_wind_speed.data == 0 + + # Should fail the test + ds.qcfilter.add_test( + 'avg_wind_speed', + index=ds.avg_wind_speed.data < 100, + test_meaning='testing bool flag: true', + test_assessment='Suspect', + ) + assert ds.qc_avg_wind_speed.data == 2 + + # Should fail the test + ds.qcfilter.add_test( + 'avg_wind_speed', + index=[0], + test_meaning='testing idx flag: true', + test_assessment='Suspect', + ) + assert ds.qc_avg_wind_speed.data == 6 + + # Should not fail the test + ds.qcfilter.add_test( + 'avg_wind_speed', + test_meaning='testing idx flag: false', + test_assessment='Suspect', + ) + assert ds.qc_avg_wind_speed.data == 6 + + # Unset a test + ds.qcfilter.unset_test(var_name, index=0, test_number=result['test_number']) + # Remove the test + ds.qcfilter.remove_test(var_name, test_number=33) + + # Ensure removal works when flag_masks is a numpy array + ds['qc_' + var_name].attrs['flag_masks'] = np.array(ds['qc_' + var_name].attrs['flag_masks']) + ds.qcfilter.remove_test(var_name, test_number=result['test_number']) + pytest.raises(ValueError, ds.qcfilter.add_test, var_name) + pytest.raises(ValueError, ds.qcfilter.remove_test, var_name) + + ds.close() + + assert np.all(parse_bit([257]) == np.array([1, 9], dtype=np.int32)) + pytest.raises(ValueError, parse_bit, [1, 2]) + pytest.raises(ValueError, parse_bit, -1) + + assert set_bit(0, 16) == 32768 + data = range(0, 4) + assert isinstance(set_bit(list(data), 2), list) + assert isinstance(set_bit(tuple(data), 2), tuple) + assert isinstance(unset_bit(list(data), 2), list) + assert isinstance(unset_bit(tuple(data), 2), tuple) + + # Fill in missing tests + ds = read_arm_netcdf(EXAMPLE_IRT25m20s) + del ds[var_name].attrs['long_name'] + # Test creating a qc variable + ds.qcfilter.create_qc_variable(var_name) + # Test creating a second qc variable and of flag type + ds.qcfilter.create_qc_variable(var_name, flag_type=True) + result = ds.qcfilter.add_test( + var_name, + index=[1, 2, 3], + test_number=9, + test_meaning='testing high number', + flag_value=True, + ) + ds.qcfilter.set_test(var_name, index=5, test_number=9, flag_value=True) + data = ds.qcfilter.get_masked_data(var_name) + assert np.isclose(np.sum(data), 42674.766, 0.01) + data = ds.qcfilter.get_masked_data(var_name, rm_assessments='Bad') + assert np.isclose(np.sum(data), 42643.195, 0.01) + + ds.qcfilter.unset_test(var_name, test_number=9, flag_value=True) + ds.qcfilter.unset_test(var_name, index=1, test_number=9, flag_value=True) + assert ds.qcfilter.available_bit(result['qc_variable_name']) == 10 + assert ds.qcfilter.available_bit(result['qc_variable_name'], recycle=True) == 1 + ds.qcfilter.remove_test(var_name, test_number=9, flag_value=True) + + ds.qcfilter.update_ancillary_variable(var_name) + # Test updating ancillary variable if does not exist + ds.qcfilter.update_ancillary_variable('not_a_variable_name') + # Change ancillary_variables attribute to test if add correct qc variable correctly + ds[var_name].attrs['ancillary_variables'] = 'a_different_name' + ds.qcfilter.update_ancillary_variable(var_name, qc_var_name=expected_qc_var_name) + assert expected_qc_var_name in ds[var_name].attrs['ancillary_variables'] + + # Test flag QC + var_name = 'inst_sfc_ir_temp' + qc_var_name = 'qc_' + var_name + ds.qcfilter.create_qc_variable(var_name, flag_type=True) + assert qc_var_name in list(ds.data_vars) + assert 'flag_values' in ds[qc_var_name].attrs.keys() + assert 'flag_masks' not in ds[qc_var_name].attrs.keys() + del ds[qc_var_name] + + qc_var_name = ds.qcfilter.check_for_ancillary_qc( + var_name, add_if_missing=True, cleanup=False, flag_type=True + ) + assert qc_var_name in list(ds.data_vars) + assert 'flag_values' in ds[qc_var_name].attrs.keys() + assert 'flag_masks' not in ds[qc_var_name].attrs.keys() + del ds[qc_var_name] + + ds.qcfilter.add_missing_value_test(var_name, flag_value=True, prepend_text='arm') + ds.qcfilter.add_test( + var_name, + index=list(range(0, 20)), + test_number=2, + test_meaning='Testing flag', + flag_value=True, + test_assessment='Suspect', + ) + assert qc_var_name in list(ds.data_vars) + assert 'flag_values' in ds[qc_var_name].attrs.keys() + assert 'flag_masks' not in ds[qc_var_name].attrs.keys() + assert 'standard_name' in ds[qc_var_name].attrs.keys() + assert ds[qc_var_name].attrs['flag_values'] == [1, 2] + assert ds[qc_var_name].attrs['flag_assessments'] == ['Bad', 'Suspect'] + + ds.close() + + +@pytest.mark.skipif(not SCIKIT_POSTHOCS_AVAILABLE, + reason="scikit_posthocs is not installed.") +def test_qcfilter2(): + ds = read_arm_netcdf(EXAMPLE_IRT25m20s) + var_name = 'inst_up_long_dome_resist' + expected_qc_var_name = 'qc_' + var_name + + data = ds[var_name].values + data[0:4] = data[0:4] + 30.0 + data[1000:1024] = data[1000:1024] + 30.0 + ds[var_name].values = data + + coef = 1.4 + ds.qcfilter.add_iqr_test(var_name, coef=1.4, test_assessment='Bad', prepend_text='arm') + assert np.sum(ds[expected_qc_var_name].values) == 28 + assert ds[expected_qc_var_name].attrs['flag_masks'] == [1] + assert ds[expected_qc_var_name].attrs['flag_meanings'] == [ + f'arm: Value outside of interquartile range test range with a coefficient of {coef}' + ] + + ds.qcfilter.add_iqr_test(var_name, test_number=3, prepend_text='ACT') + assert np.sum(ds[expected_qc_var_name].values) == 140 + assert ds[expected_qc_var_name].attrs['flag_masks'] == [1, 4] + assert ds[expected_qc_var_name].attrs['flag_meanings'][-1] == ( + 'ACT: Value outside of interquartile range test range with a coefficient of 1.5' + ) + + ds.qcfilter.add_gesd_test(var_name, test_assessment='Bad') + assert np.sum(ds[expected_qc_var_name].values) == 204 + assert ds[expected_qc_var_name].attrs['flag_masks'] == [1, 4, 8] + assert ds[expected_qc_var_name].attrs['flag_meanings'][-1] == ( + 'Value failed generalized Extreme Studentized Deviate test with an alpha of 0.05' + ) + + ds.qcfilter.add_gesd_test(var_name, alpha=0.1) + assert np.sum(ds[expected_qc_var_name].values) == 332 + assert ds[expected_qc_var_name].attrs['flag_masks'] == [1, 4, 8, 16] + assert ds[expected_qc_var_name].attrs['flag_meanings'][-1] == ( + 'Value failed generalized Extreme Studentized Deviate test with an alpha of 0.1' + ) + assert ds[expected_qc_var_name].attrs['flag_assessments'] == [ + 'Bad', + 'Indeterminate', + 'Bad', + 'Indeterminate', + ] + + +def test_qcfilter3(): + ds = read_arm_netcdf(EXAMPLE_IRT25m20s) + var_name = 'inst_up_long_dome_resist' + result = ds.qcfilter.add_test(var_name, index=range(0, 100), test_meaning='testing') + qc_var_name = result['qc_variable_name'] + assert ds[qc_var_name].values.dtype.kind in np.typecodes['AllInteger'] + + ds[qc_var_name].values = ds[qc_var_name].values.astype(np.float32) + assert ds[qc_var_name].values.dtype.kind not in np.typecodes['AllInteger'] + + result = ds.qcfilter.get_qc_test_mask( + var_name=var_name, test_number=1, return_index=False + ) + assert np.sum(result) == 100 + result = ds.qcfilter.get_qc_test_mask( + var_name=var_name, test_number=1, return_index=True + ) + assert np.sum(result) == 4950 + + # Test where QC variables are not integer type + ds = ds.resample(time='5min').mean(keep_attrs=True) + ds.qcfilter.add_test( + var_name, index=range(0, ds.time.size), test_meaning='Testing float' + ) + assert np.sum(ds[qc_var_name].values) == 582 + + ds[qc_var_name].values = ds[qc_var_name].values.astype(np.float32) + ds.qcfilter.remove_test(var_name, test_number=2) + assert np.sum(ds[qc_var_name].values) == 6 + + +def test_qc_speed(): + """ + This tests the speed of the QC module to ensure changes do not significantly + slow down the module's processing. + """ + + n_variables = 100 + n_samples = 100 + + time = pd.date_range(start='2022-02-17 00:00:00', end='2022-02-18 00:00:00', periods=n_samples) + + # Create data variables with random noise + np.random.seed(42) + noisy_data_mapping = {f'data_var_{i}': np.random.random(time.shape) for i in range(n_variables)} + + ds = xr.Dataset( + data_vars={name: ('time', data) for name, data in noisy_data_mapping.items()}, + coords={'time': time}, + ) + + start = datetime.utcnow() + for name, var in noisy_data_mapping.items(): + failed_qc = var > 0.75 # Consider data above 0.75 as bad. Negligible time here. + ds.qcfilter.add_test(name, index=failed_qc, test_meaning='Value above threshold') + + time_diff = datetime.utcnow() - start + assert time_diff.seconds <= 4 + + +def test_datafilter(): + ds = read_arm_netcdf(EXAMPLE_MET1, drop_variables=['base_time', 'time_offset']) + ds.clean.cleanup() + + data_var_names = list(ds.data_vars) + qc_var_names = [var_name for var_name in ds.data_vars if var_name.startswith('qc_')] + data_var_names = list(set(data_var_names) - set(qc_var_names)) + data_var_names.sort() + qc_var_names.sort() + + var_name = 'atmos_pressure' + + ds_1 = ds.mean() + + ds.qcfilter.add_less_test(var_name, 99, test_assessment='Bad') + ds_filtered = copy.deepcopy(ds) + ds_filtered.qcfilter.datafilter(rm_assessments='Bad') + ds_2 = ds_filtered.mean() + assert np.isclose(ds_1[var_name].values, 98.86, atol=0.01) + assert np.isclose(ds_2[var_name].values, 99.15, atol=0.01) + assert isinstance(ds_1[var_name].data, da.core.Array) + assert 'act.qc.datafilter' in ds_filtered[var_name].attrs['history'] + + ds_filtered = copy.deepcopy(ds) + ds_filtered.qcfilter.datafilter(rm_assessments='Bad', variables=var_name, del_qc_var=True) + ds_2 = ds_filtered.mean() + assert np.isclose(ds_2[var_name].values, 99.15, atol=0.01) + expected_var_names = sorted(list(set(data_var_names + qc_var_names) - set(['qc_' + var_name]))) + assert sorted(list(ds_filtered.data_vars)) == expected_var_names + + ds_filtered = copy.deepcopy(ds) + ds_filtered.qcfilter.datafilter(rm_assessments='Bad', del_qc_var=True) + assert sorted(list(ds_filtered.data_vars)) == data_var_names + + ds.close() + del ds + + +def test_qc_data_type(): + drop_vars = [ + 'base_time', + 'time_offset', + 'inst_up_long_case_resist', + 'inst_up_long_hemisp_tp', + 'inst_up_short_hemisp_tp', + 'inst_sfc_ir_temp', + 'lat', + 'lon', + 'alt', + ] + ds = read_arm_netcdf(EXAMPLE_IRT25m20s, drop_variables=drop_vars) + var_name = 'inst_up_long_dome_resist' + expected_qc_var_name = 'qc_' + var_name + ds.qcfilter.check_for_ancillary_qc(var_name, add_if_missing=True) + del ds[expected_qc_var_name].attrs['flag_meanings'] + del ds[expected_qc_var_name].attrs['flag_assessments'] + ds[expected_qc_var_name] = ds[expected_qc_var_name].astype(np.int8) + ds.qcfilter.add_test(var_name, index=[1], test_number=9, test_meaning='First test') + + assert ds[expected_qc_var_name].attrs['flag_masks'][0].dtype == np.uint32 + assert ds[expected_qc_var_name].dtype == np.int16 + ds.qcfilter.add_test(var_name, index=[1], test_number=17, test_meaning='Second test') + assert ds[expected_qc_var_name].dtype == np.int32 + ds.qcfilter.add_test(var_name, index=[1], test_number=33, test_meaning='Third test') + assert ds[expected_qc_var_name].dtype == np.int64 + assert ds[expected_qc_var_name].attrs['flag_masks'][0].dtype == np.uint64 + + ds.qcfilter.add_test(var_name, index=[1], test_meaning='Fourth test', recycle=True) diff --git a/act/tests/qc/test_qctests.py b/act/tests/qc/test_qctests.py new file mode 100644 index 0000000000..48fc2ba9d3 --- /dev/null +++ b/act/tests/qc/test_qctests.py @@ -0,0 +1,391 @@ +import dask.array as da +import numpy as np + +from act.io.arm import read_arm_netcdf +from act.tests import ( + EXAMPLE_MET1, + EXAMPLE_IRT25m20s, +) + + +def test_qctests(): + ds = read_arm_netcdf(EXAMPLE_IRT25m20s) + var_name = 'inst_up_long_dome_resist' + + # Add in one missing value and test for that missing value + data = ds[var_name].values + data[0] = np.nan + ds[var_name].data = da.from_array(data) + result = ds.qcfilter.add_missing_value_test(var_name) + data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) + assert data.mask[0] + + result = ds.qcfilter.add_missing_value_test(var_name, use_dask=True) + data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) + assert data == np.array([0]) + ds.qcfilter.remove_test(var_name, test_number=result['test_number']) + + # less than min test + limit_value = 6.8 + result = ds.qcfilter.add_less_test( + var_name, limit_value, prepend_text='arm', limit_attr_name='fail_min' + ) + + data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) + assert 'arm' in result['test_meaning'] + assert np.ma.count_masked(data) == 54 + assert 'fail_min' in ds[result['qc_variable_name']].attrs.keys() + assert ( + ds[result['qc_variable_name']].attrs['fail_min'].dtype + == ds[result['variable_name']].values.dtype + ) + assert np.isclose(ds[result['qc_variable_name']].attrs['fail_min'], limit_value) + + result = ds.qcfilter.add_less_test(var_name, limit_value, test_assessment='Suspect') + assert 'warn_min' in ds[result['qc_variable_name']].attrs.keys() + + limit_value = 8 + result = ds.qcfilter.add_less_test(var_name, limit_value) + data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) + assert np.sum(data) == 2911939 + result = ds.qcfilter.add_less_test(var_name, limit_value, use_dask=True) + data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) + assert np.sum(data) == 2911939 + + # greator than max test + limit_value = 12.7 + result = ds.qcfilter.add_greater_test( + var_name, limit_value, prepend_text='arm', limit_attr_name='fail_max' + ) + data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) + assert 'arm' in result['test_meaning'] + assert np.ma.count_masked(data) == 61 + assert 'fail_max' in ds[result['qc_variable_name']].attrs.keys() + assert ( + ds[result['qc_variable_name']].attrs['fail_max'].dtype + == ds[result['variable_name']].values.dtype + ) + assert np.isclose(ds[result['qc_variable_name']].attrs['fail_max'], limit_value) + + result = ds.qcfilter.add_greater_test(var_name, limit_value, test_assessment='Suspect') + assert 'warn_max' in ds[result['qc_variable_name']].attrs.keys() + + result = ds.qcfilter.add_greater_test(var_name, limit_value, use_dask=True) + data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) + assert np.sum(data) == 125458 + result = ds.qcfilter.add_greater_test(var_name, limit_value) + data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) + assert np.sum(data) == 125458 + + # less than or equal test + limit_value = 6.9 + result = ds.qcfilter.add_less_equal_test( + var_name, + limit_value, + test_assessment='Suspect', + prepend_text='arm', + limit_attr_name='warn_min', + ) + data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) + assert 'arm' in result['test_meaning'] + assert np.ma.count_masked(data) == 149 + assert 'warn_min' in ds[result['qc_variable_name']].attrs.keys() + assert ( + ds[result['qc_variable_name']].attrs['warn_min'].dtype + == ds[result['variable_name']].values.dtype + ) + assert np.isclose(ds[result['qc_variable_name']].attrs['warn_min'], limit_value) + + result = ds.qcfilter.add_less_equal_test(var_name, limit_value) + assert 'fail_min' in ds[result['qc_variable_name']].attrs.keys() + + result = ds.qcfilter.add_less_equal_test(var_name, limit_value, use_dask=True) + data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) + assert np.sum(data) == 601581 + result = ds.qcfilter.add_less_equal_test(var_name, limit_value) + data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) + assert np.sum(data) == 601581 + + # greater than or equal test + result = ds.qcfilter.add_greater_equal_test(var_name, None) + limit_value = 12 + result = ds.qcfilter.add_greater_equal_test( + var_name, + limit_value, + test_assessment='Suspect', + prepend_text='arm', + limit_attr_name='warn_max', + ) + data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) + assert 'arm' in result['test_meaning'] + assert np.ma.count_masked(data) == 606 + assert 'warn_max' in ds[result['qc_variable_name']].attrs.keys() + assert ( + ds[result['qc_variable_name']].attrs['warn_max'].dtype + == ds[result['variable_name']].values.dtype + ) + assert np.isclose(ds[result['qc_variable_name']].attrs['warn_max'], limit_value) + + result = ds.qcfilter.add_greater_equal_test(var_name, limit_value) + assert 'fail_max' in ds[result['qc_variable_name']].attrs.keys() + + result = ds.qcfilter.add_greater_equal_test(var_name, limit_value, use_dask=True) + data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) + assert np.sum(data) == 1189873 + result = ds.qcfilter.add_greater_equal_test(var_name, limit_value) + data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) + assert np.sum(data) == 1189873 + + # equal to test + limit_value = 7.6705 + result = ds.qcfilter.add_equal_to_test( + var_name, limit_value, prepend_text='arm', limit_attr_name='fail_equal_to' + ) + data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) + assert 'arm' in result['test_meaning'] + assert np.ma.count_masked(data) == 2 + assert 'fail_equal_to' in ds[result['qc_variable_name']].attrs.keys() + assert ( + ds[result['qc_variable_name']].attrs['fail_equal_to'].dtype + == ds[result['variable_name']].values.dtype + ) + assert np.isclose(ds[result['qc_variable_name']].attrs['fail_equal_to'], limit_value) + + result = ds.qcfilter.add_equal_to_test( + var_name, limit_value, test_assessment='Indeterminate' + ) + assert 'warn_equal_to' in ds[result['qc_variable_name']].attrs.keys() + + result = ds.qcfilter.add_equal_to_test(var_name, limit_value, use_dask=True) + data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) + assert np.sum(data) == 8631 + result = ds.qcfilter.add_equal_to_test(var_name, limit_value) + data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) + assert np.sum(data) == 8631 + + # not equal to test + limit_value = 7.6705 + result = ds.qcfilter.add_not_equal_to_test( + var_name, + limit_value, + test_assessment='Indeterminate', + prepend_text='arm', + limit_attr_name='warn_not_equal_to', + ) + data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) + assert 'arm' in result['test_meaning'] + assert np.ma.count_masked(data) == 4318 + assert 'warn_not_equal_to' in ds[result['qc_variable_name']].attrs.keys() + assert ( + ds[result['qc_variable_name']].attrs['warn_not_equal_to'].dtype + == ds[result['variable_name']].values.dtype + ) + assert np.isclose(ds[result['qc_variable_name']].attrs['warn_not_equal_to'], limit_value) + + result = ds.qcfilter.add_not_equal_to_test(var_name, limit_value) + assert 'fail_not_equal_to' in ds[result['qc_variable_name']].attrs.keys() + + result = ds.qcfilter.add_not_equal_to_test(var_name, limit_value, use_dask=True) + data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) + assert np.sum(data) == 9320409 + result = ds.qcfilter.add_not_equal_to_test(var_name, limit_value) + data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) + assert np.sum(data) == 9320409 + + # outside range test + limit_value1 = 6.8 + limit_value2 = 12.7 + result = ds.qcfilter.add_outside_test( + var_name, + limit_value1, + limit_value2, + prepend_text='arm', + limit_attr_names=['fail_lower_range', 'fail_upper_range'], + ) + data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) + assert 'arm' in result['test_meaning'] + assert np.ma.count_masked(data) == 115 + assert 'fail_lower_range' in ds[result['qc_variable_name']].attrs.keys() + assert ( + ds[result['qc_variable_name']].attrs['fail_lower_range'].dtype + == ds[result['variable_name']].values.dtype + ) + assert np.isclose(ds[result['qc_variable_name']].attrs['fail_lower_range'], limit_value1) + assert 'fail_upper_range' in ds[result['qc_variable_name']].attrs.keys() + assert ( + ds[result['qc_variable_name']].attrs['fail_upper_range'].dtype + == ds[result['variable_name']].values.dtype + ) + assert np.isclose(ds[result['qc_variable_name']].attrs['fail_upper_range'], limit_value2) + + result = ds.qcfilter.add_outside_test( + var_name, limit_value1, limit_value2, test_assessment='Indeterminate' + ) + assert 'warn_lower_range' in ds[result['qc_variable_name']].attrs.keys() + assert 'warn_upper_range' in ds[result['qc_variable_name']].attrs.keys() + + result = ds.qcfilter.add_outside_test( + var_name, limit_value1, limit_value2, use_dask=True + ) + data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) + assert np.sum(data) == 342254 + result = ds.qcfilter.add_outside_test( + var_name, + limit_value1, + limit_value2, + ) + data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) + assert np.sum(data) == 342254 + + # Starting to run out of space for tests. Remove some tests. + for ii in range(16, 30): + ds.qcfilter.remove_test(var_name, test_number=ii) + + # inside range test + limit_value1 = 7 + limit_value2 = 8 + result = ds.qcfilter.add_inside_test( + var_name, + limit_value1, + limit_value2, + prepend_text='arm', + limit_attr_names=['fail_lower_range_inner', 'fail_upper_range_inner'], + ) + data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) + assert 'arm' in result['test_meaning'] + assert np.ma.count_masked(data) == 479 + assert 'fail_lower_range_inner' in ds[result['qc_variable_name']].attrs.keys() + assert ( + ds[result['qc_variable_name']].attrs['fail_lower_range_inner'].dtype + == ds[result['variable_name']].values.dtype + ) + assert np.isclose( + ds[result['qc_variable_name']].attrs['fail_lower_range_inner'], + limit_value1, + ) + assert 'fail_upper_range_inner' in ds[result['qc_variable_name']].attrs.keys() + assert ( + ds[result['qc_variable_name']].attrs['fail_upper_range_inner'].dtype + == ds[result['variable_name']].values.dtype + ) + assert np.isclose( + ds[result['qc_variable_name']].attrs['fail_upper_range_inner'], + limit_value2, + ) + + result = ds.qcfilter.add_inside_test( + var_name, limit_value1, limit_value2, test_assessment='Indeterminate' + ) + assert 'warn_lower_range_inner' in ds[result['qc_variable_name']].attrs.keys() + assert 'warn_upper_range_inner' in ds[result['qc_variable_name']].attrs.keys() + + result = ds.qcfilter.add_inside_test(var_name, limit_value1, limit_value2, use_dask=True) + data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) + assert np.sum(data) == 1820693 + result = ds.qcfilter.add_inside_test( + var_name, + limit_value1, + limit_value2, + ) + data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) + assert np.sum(data) == 1820693 + + # delta test + test_limit = 0.05 + result = ds.qcfilter.add_delta_test( + var_name, test_limit, prepend_text='arm', limit_attr_name='warn_delta' + ) + data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) + assert 'arm' in result['test_meaning'] + assert np.ma.count_masked(data) == 175 + assert 'warn_delta' in ds[result['qc_variable_name']].attrs.keys() + assert ( + ds[result['qc_variable_name']].attrs['warn_delta'].dtype + == ds[result['variable_name']].values.dtype + ) + assert np.isclose(ds[result['qc_variable_name']].attrs['warn_delta'], test_limit) + + data = ds.qcfilter.get_masked_data(var_name, rm_assessments=['Suspect', 'Bad']) + assert np.ma.count_masked(data) == 1355 + + result = ds.qcfilter.add_delta_test(var_name, test_limit, test_assessment='Bad') + assert 'fail_delta' in ds[result['qc_variable_name']].attrs.keys() + + comp_ds = read_arm_netcdf(EXAMPLE_IRT25m20s) + with np.testing.assert_raises(ValueError): + result = ds.qcfilter.add_difference_test(var_name, 'test') + + with np.testing.assert_raises(ValueError): + result = ds.qcfilter.add_difference_test( + var_name, + {comp_ds.attrs['datastream']: comp_ds}, + var_name, + diff_limit=None, + ) + + assert ds.qcfilter.add_difference_test(var_name, set_test_regardless=False) is None + + result = ds.qcfilter.add_difference_test( + var_name, + {comp_ds.attrs['datastream']: comp_ds}, + var_name, + diff_limit=1, + prepend_text='arm', + ) + data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) + assert 'arm' in result['test_meaning'] + assert not (data.mask).all() + + comp_ds.close() + ds.close() + + +def test_qctests_dos(): + ds = read_arm_netcdf(EXAMPLE_IRT25m20s) + var_name = 'inst_up_long_dome_resist' + + # persistence test + data = ds[var_name].values + data[1000: 2400] = data[1000] + data = np.around(data, decimals=3) + ds[var_name].values = data + result = ds.qcfilter.add_persistence_test(var_name) + qc_var_name = result['qc_variable_name'] + test_meaning = ( + 'Data failing persistence test. Standard Deviation over a ' + 'window of 10 values less than 0.0001.' + ) + assert ds[qc_var_name].attrs['flag_meanings'][-1] == test_meaning + # There is a precision issue with GitHub testing that makes the number of tests + # tripped off. This isclose() option is to account for that. + assert np.isclose(np.sum(ds[qc_var_name].values), 1399, atol=2) + + ds.qcfilter.add_persistence_test(var_name, window=10000, prepend_text='DQO') + test_meaning = ( + 'DQO: Data failing persistence test. Standard Deviation over a window of ' + '4320 values less than 0.0001.' + ) + assert ds[qc_var_name].attrs['flag_meanings'][-1] == test_meaning + + +def test_add_atmospheric_pressure_test(): + ds = read_arm_netcdf(EXAMPLE_MET1, cleanup_qc=True) + ds.load() + + variable = 'atmos_pressure' + qc_variable = 'qc_' + variable + + data = ds[variable].values + data[200:250] = data[200:250] + 5 + data[500:550] = data[500:550] - 4.6 + ds[variable].values = data + result = ds.qcfilter.add_atmospheric_pressure_test(variable) + assert isinstance(result, dict) + assert np.sum(ds[qc_variable].values) == 1600 + + del ds[qc_variable] + ds.qcfilter.add_atmospheric_pressure_test(variable, use_dask=True) + assert np.sum(ds[qc_variable].values) == 100 + + ds.close + del ds diff --git a/act/tests/qc/test_radiometer_tests.py b/act/tests/qc/test_radiometer_tests.py new file mode 100644 index 0000000000..d781758d93 --- /dev/null +++ b/act/tests/qc/test_radiometer_tests.py @@ -0,0 +1,14 @@ +import numpy as np + + +from act.io.arm import read_arm_netcdf +from act.qc.radiometer_tests import fft_shading_test +from act.tests import EXAMPLE_MFRSR + + +def test_fft_shading_test(): + ds = read_arm_netcdf(EXAMPLE_MFRSR) + ds.clean.cleanup() + ds = fft_shading_test(ds) + qc_data = ds['qc_diffuse_hemisp_narrowband_filter4'] + assert np.nansum(qc_data.values) == 7164 diff --git a/act/tests/qc/test_sp2.py b/act/tests/qc/test_sp2.py new file mode 100644 index 0000000000..536b61c4f5 --- /dev/null +++ b/act/tests/qc/test_sp2.py @@ -0,0 +1,46 @@ +import numpy as np +import pytest + +from act.qc.sp2 import SP2ParticleCriteria, PYSP2_AVAILABLE + + +@pytest.mark.skipif(not PYSP2_AVAILABLE, reason="PySP2 is not installed.") +def test_sp2_particle_config(): + particle_config_ds = SP2ParticleCriteria() + assert particle_config_ds.ScatMaxPeakHt1 == 60000 + assert particle_config_ds.ScatMinPeakHt1 == 250 + assert particle_config_ds.ScatMaxPeakHt2 == 60000 + assert particle_config_ds.ScatMinPeakHt2 == 250 + assert particle_config_ds.ScatMinWidth == 10 + assert particle_config_ds.ScatMaxWidth == 90 + assert particle_config_ds.ScatMinPeakPos == 20 + assert particle_config_ds.ScatMaxPeakPos == 90 + assert particle_config_ds.IncanMinPeakHt1 == 200 + assert particle_config_ds.IncanMinPeakHt2 == 200 + assert particle_config_ds.IncanMaxPeakHt1 == 60000 + assert particle_config_ds.IncanMaxPeakHt2 == 60000 + assert particle_config_ds.IncanMinWidth == 5 + assert particle_config_ds.IncanMaxWidth == np.inf + assert particle_config_ds.IncanMinPeakPos == 20 + assert particle_config_ds.IncanMaxPeakPos == 90 + assert particle_config_ds.IncanMinPeakRatio == 0.1 + assert particle_config_ds.IncanMaxPeakRatio == 25 + assert particle_config_ds.IncanMaxPeakOffset == 11 + assert particle_config_ds.c0Mass1 == 0 + assert particle_config_ds.c1Mass1 == 0.0001896 + assert particle_config_ds.c2Mass1 == 0 + assert particle_config_ds.c3Mass1 == 0 + assert particle_config_ds.c0Mass2 == 0 + assert particle_config_ds.c1Mass2 == 0.0016815 + assert particle_config_ds.c2Mass2 == 0 + assert particle_config_ds.c3Mass2 == 0 + assert particle_config_ds.c0Scat1 == 0 + assert particle_config_ds.c1Scat1 == 78.141 + assert particle_config_ds.c2Scat1 == 0 + assert particle_config_ds.c0Scat2 == 0 + assert particle_config_ds.c1Scat2 == 752.53 + assert particle_config_ds.c2Scat2 == 0 + assert particle_config_ds.densitySO4 == 1.8 + assert particle_config_ds.densityBC == 1.8 + assert particle_config_ds.TempSTP == 273.15 + assert particle_config_ds.PressSTP == 1013.25 diff --git a/act/tests/retrievals/test_aeri.py b/act/tests/retrievals/test_aeri.py new file mode 100644 index 0000000000..8f940720b0 --- /dev/null +++ b/act/tests/retrievals/test_aeri.py @@ -0,0 +1,16 @@ +import numpy as np +import act + + +def test_aeri2irt(): + aeri_ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_AERI) + aeri_ds = act.retrievals.aeri.aeri2irt(aeri_ds) + assert np.round(np.nansum(aeri_ds['aeri_irt_equiv_temperature'].values)).astype(int) == 17372 + np.testing.assert_almost_equal( + aeri_ds['aeri_irt_equiv_temperature'].values[7], 286.081, decimal=3 + ) + np.testing.assert_almost_equal( + aeri_ds['aeri_irt_equiv_temperature'].values[-10], 285.366, decimal=3 + ) + aeri_ds.close() + del aeri_ds diff --git a/act/tests/retrievals/test_cbh.py b/act/tests/retrievals/test_cbh.py new file mode 100644 index 0000000000..33947a8c96 --- /dev/null +++ b/act/tests/retrievals/test_cbh.py @@ -0,0 +1,19 @@ +import act + + +def test_generic_sobel_cbh(): + ceil = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_CEIL1) + + ceil = ceil.resample(time='1min').nearest() + ceil = act.retrievals.cbh.generic_sobel_cbh( + ceil, + variable='backscatter', + height_dim='range', + var_thresh=1000.0, + fill_na=0.0, + edge_thresh=5, + ) + cbh = ceil['cbh_sobel_backscatter'].values + assert cbh[500] == 615.0 + assert cbh[1000] == 555.0 + ceil.close() diff --git a/act/tests/retrievals/test_doppler_lidar_retrievals.py b/act/tests/retrievals/test_doppler_lidar_retrievals.py new file mode 100644 index 0000000000..9e430b8a8c --- /dev/null +++ b/act/tests/retrievals/test_doppler_lidar_retrievals.py @@ -0,0 +1,23 @@ +import numpy as np + +import act + + +def test_doppler_lidar_winds(): + # Process a single file + dl_ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_DLPPI) + result = act.retrievals.doppler_lidar.compute_winds_from_ppi(dl_ds, intensity_name='intensity') + assert np.round(np.nansum(result['wind_speed'].values)).astype(int) == 1570 + assert np.round(np.nansum(result['wind_direction'].values)).astype(int) == 32635 + assert result['wind_speed'].attrs['units'] == 'm/s' + assert result['wind_direction'].attrs['units'] == 'degree' + assert result['height'].attrs['units'] == 'm' + dl_ds.close() + + # Process multiple files + dl_ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_DLPPI_MULTI) + del dl_ds['range'].attrs['units'] + result = act.retrievals.doppler_lidar.compute_winds_from_ppi(dl_ds) + assert np.round(np.nansum(result['wind_speed'].values)).astype(int) == 2854 + assert np.round(np.nansum(result['wind_direction'].values)).astype(int) == 64986 + dl_ds.close() diff --git a/act/tests/retrievals/test_irt.py b/act/tests/retrievals/test_irt.py new file mode 100644 index 0000000000..681ac63e0d --- /dev/null +++ b/act/tests/retrievals/test_irt.py @@ -0,0 +1,12 @@ +import numpy as np + +import act + + +def test_sst(): + ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_IRTSST) + ds = act.retrievals.irt.sst_from_irt(ds) + np.testing.assert_almost_equal(ds['sea_surface_temperature'].values[0], 278.901, decimal=3) + np.testing.assert_almost_equal(ds['sea_surface_temperature'].values[-1], 279.291, decimal=3) + assert np.round(np.nansum(ds['sea_surface_temperature'].values)).astype(int) == 6699 + ds.close() diff --git a/act/tests/retrievals/test_radiation.py b/act/tests/retrievals/test_radiation.py new file mode 100644 index 0000000000..fa8357efd2 --- /dev/null +++ b/act/tests/retrievals/test_radiation.py @@ -0,0 +1,47 @@ +import numpy as np +import xarray as xr + +import act + + +def test_calculate_sirs_variable(): + sirs_ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_SIRS) + met_ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_MET1) + + ds = act.retrievals.radiation.calculate_dsh_from_dsdh_sdn(sirs_ds) + assert np.isclose(np.nansum(ds['derived_down_short_hemisp'].values), 61157.71, atol=0.1) + + ds = act.retrievals.radiation.calculate_irradiance_stats( + ds, + variable='derived_down_short_hemisp', + variable2='down_short_hemisp', + threshold=60, + ) + + assert np.isclose(np.nansum(ds['diff_derived_down_short_hemisp'].values), 1335.12, atol=0.1) + assert np.isclose(np.nansum(ds['ratio_derived_down_short_hemisp'].values), 400.31, atol=0.1) + + ds = act.retrievals.radiation.calculate_net_radiation(ds, smooth=30) + assert np.ceil(np.nansum(ds['net_radiation'].values)) == 21915 + assert np.ceil(np.nansum(ds['net_radiation_smoothed'].values)) == 22316 + + ds = act.retrievals.radiation.calculate_longwave_radiation( + ds, + temperature_var='temp_mean', + vapor_pressure_var='vapor_pressure_mean', + met_ds=met_ds, + ) + assert np.ceil(ds['monteith_clear'].values[25]) == 239 + assert np.ceil(ds['monteith_cloudy'].values[30]) == 318 + assert np.ceil(ds['prata_clear'].values[35]) == 234 + + new_ds = xr.merge([sirs_ds, met_ds], compat='override') + ds = act.retrievals.radiation.calculate_longwave_radiation( + new_ds, temperature_var='temp_mean', vapor_pressure_var='vapor_pressure_mean' + ) + assert np.ceil(ds['monteith_clear'].values[25]) == 239 + assert np.ceil(ds['monteith_cloudy'].values[30]) == 318 + assert np.ceil(ds['prata_clear'].values[35]) == 234 + + sirs_ds.close() + met_ds.close() diff --git a/act/tests/test_retrievals.py b/act/tests/retrievals/test_sonde.py similarity index 51% rename from act/tests/test_retrievals.py rename to act/tests/retrievals/test_sonde.py index 848d8b07b8..f31a2507de 100644 --- a/act/tests/test_retrievals.py +++ b/act/tests/retrievals/test_sonde.py @@ -1,17 +1,6 @@ -' Unit tests for the ACT retrievals module. ' '' - -import glob import numpy as np -import pytest -import xarray as xr -import act - -try: - import pysp2 - PYSP2_AVAILABLE = True -except ImportError: - PYSP2_AVAILABLE = False +import act def test_get_stability_indices(): @@ -61,24 +50,6 @@ def test_get_stability_indices(): sonde_ds.close() -def test_generic_sobel_cbh(): - ceil = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_CEIL1) - - ceil = ceil.resample(time='1min').nearest() - ceil = act.retrievals.cbh.generic_sobel_cbh( - ceil, - variable='backscatter', - height_dim='range', - var_thresh=1000.0, - fill_na=0.0, - edge_thresh=5, - ) - cbh = ceil['cbh_sobel_backscatter'].values - assert cbh[500] == 615.0 - assert cbh[1000] == 555.0 - ceil.close() - - def test_calculate_precipitable_water(): sonde_ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_SONDE1) assert sonde_ds['tdry'].units == 'C', 'Temperature must be in Celsius' @@ -91,92 +62,6 @@ def test_calculate_precipitable_water(): sonde_ds.close() -def test_doppler_lidar_winds(): - # Process a single file - dl_ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_DLPPI) - result = act.retrievals.doppler_lidar.compute_winds_from_ppi(dl_ds, intensity_name='intensity') - assert np.round(np.nansum(result['wind_speed'].values)).astype(int) == 1570 - assert np.round(np.nansum(result['wind_direction'].values)).astype(int) == 32635 - assert result['wind_speed'].attrs['units'] == 'm/s' - assert result['wind_direction'].attrs['units'] == 'degree' - assert result['height'].attrs['units'] == 'm' - dl_ds.close() - - # Process multiple files - dl_ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_DLPPI_MULTI) - del dl_ds['range'].attrs['units'] - result = act.retrievals.doppler_lidar.compute_winds_from_ppi(dl_ds) - assert np.round(np.nansum(result['wind_speed'].values)).astype(int) == 2854 - assert np.round(np.nansum(result['wind_direction'].values)).astype(int) == 64986 - dl_ds.close() - - -def test_aeri2irt(): - aeri_ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_AERI) - aeri_ds = act.retrievals.aeri.aeri2irt(aeri_ds) - assert np.round(np.nansum(aeri_ds['aeri_irt_equiv_temperature'].values)).astype(int) == 17372 - np.testing.assert_almost_equal( - aeri_ds['aeri_irt_equiv_temperature'].values[7], 286.081, decimal=3 - ) - np.testing.assert_almost_equal( - aeri_ds['aeri_irt_equiv_temperature'].values[-10], 285.366, decimal=3 - ) - aeri_ds.close() - del aeri_ds - - -def test_sst(): - ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_IRTSST) - ds = act.retrievals.irt.sst_from_irt(ds) - np.testing.assert_almost_equal(ds['sea_surface_temperature'].values[0], 278.901, decimal=3) - np.testing.assert_almost_equal(ds['sea_surface_temperature'].values[-1], 279.291, decimal=3) - assert np.round(np.nansum(ds['sea_surface_temperature'].values)).astype(int) == 6699 - ds.close() - - -def test_calculate_sirs_variable(): - sirs_ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_SIRS) - met_ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_MET1) - - ds = act.retrievals.radiation.calculate_dsh_from_dsdh_sdn(sirs_ds) - assert np.isclose(np.nansum(ds['derived_down_short_hemisp'].values), 61157.71, atol=0.1) - - ds = act.retrievals.radiation.calculate_irradiance_stats( - ds, - variable='derived_down_short_hemisp', - variable2='down_short_hemisp', - threshold=60, - ) - - assert np.isclose(np.nansum(ds['diff_derived_down_short_hemisp'].values), 1335.12, atol=0.1) - assert np.isclose(np.nansum(ds['ratio_derived_down_short_hemisp'].values), 400.31, atol=0.1) - - ds = act.retrievals.radiation.calculate_net_radiation(ds, smooth=30) - assert np.ceil(np.nansum(ds['net_radiation'].values)) == 21915 - assert np.ceil(np.nansum(ds['net_radiation_smoothed'].values)) == 22316 - - ds = act.retrievals.radiation.calculate_longwave_radiation( - ds, - temperature_var='temp_mean', - vapor_pressure_var='vapor_pressure_mean', - met_ds=met_ds, - ) - assert np.ceil(ds['monteith_clear'].values[25]) == 239 - assert np.ceil(ds['monteith_cloudy'].values[30]) == 318 - assert np.ceil(ds['prata_clear'].values[35]) == 234 - - new_ds = xr.merge([sirs_ds, met_ds], compat='override') - ds = act.retrievals.radiation.calculate_longwave_radiation( - new_ds, temperature_var='temp_mean', vapor_pressure_var='vapor_pressure_mean' - ) - assert np.ceil(ds['monteith_clear'].values[25]) == 239 - assert np.ceil(ds['monteith_cloudy'].values[30]) == 318 - assert np.ceil(ds['prata_clear'].values[35]) == 234 - - sirs_ds.close() - met_ds.close() - - def test_calculate_pbl_liu_liang(): files = act.tests.sample_files.EXAMPLE_TWP_SONDE_20060121.copy() files2 = act.tests.sample_files.EXAMPLE_SONDE1 @@ -185,8 +70,8 @@ def test_calculate_pbl_liu_liang(): pblht = [] pbl_regime = [] - for i, r in enumerate(files): - ds = act.io.arm.read_arm_netcdf(r) + for file in files: + ds = act.io.arm.read_arm_netcdf(file) ds['tdry'].attrs['units'] = 'degree_Celsius' ds = act.retrievals.sonde.calculate_pbl_liu_liang(ds, smooth_height=10) pblht.append(float(ds['pblht_liu_liang'].values)) @@ -247,26 +132,3 @@ def test_calculate_heffter_pbl(): np.testing.assert_almost_equal(ds['potential_temperature_ss'].values[4], 298.4, 1) assert np.sum(ds['bottom_inversion'].values) == 7426 assert np.sum(ds['top_inversion'].values) == 7903 - - -@pytest.mark.skipif(not PYSP2_AVAILABLE, reason='PySP2 is not installed.') -def test_sp2_waveform_stats(): - my_sp2b = act.io.read_sp2(act.tests.EXAMPLE_SP2B) - my_ini = act.tests.EXAMPLE_INI - my_binary = act.qc.get_waveform_statistics(my_sp2b, my_ini, parallel=False) - assert my_binary.PkHt_ch1.max() == 62669.4 - np.testing.assert_almost_equal(np.nanmax(my_binary.PkHt_ch0.values), 98708.92915295, decimal=1) - np.testing.assert_almost_equal(np.nanmax(my_binary.PkHt_ch4.values), 65088.39598033, decimal=1) - - -@pytest.mark.skipif(not PYSP2_AVAILABLE, reason='PySP2 is not installed.') -def test_sp2_psds(): - my_sp2b = act.io.read_sp2(act.tests.EXAMPLE_SP2B) - my_ini = act.tests.EXAMPLE_INI - my_binary = act.qc.get_waveform_statistics(my_sp2b, my_ini, parallel=False) - my_hk = act.io.read_hk_file(act.tests.EXAMPLE_HK) - my_binary = act.retrievals.calc_sp2_diams_masses(my_binary) - ScatRejectKey = my_binary['ScatRejectKey'].values - assert np.nanmax(my_binary['ScatDiaBC50'].values[ScatRejectKey == 0]) < 1000.0 - my_psds = act.retrievals.process_sp2_psds(my_binary, my_hk, my_ini) - np.testing.assert_almost_equal(my_psds['NumConcIncan'].max(), 0.95805343) diff --git a/act/tests/retrievals/test_sp2_retrievals.py b/act/tests/retrievals/test_sp2_retrievals.py new file mode 100644 index 0000000000..6f2f33f9c4 --- /dev/null +++ b/act/tests/retrievals/test_sp2_retrievals.py @@ -0,0 +1,32 @@ +import numpy as np +import pytest +import act + +try: + import pysp2 + PYSP2_AVAILABLE = True +except ImportError: + PYSP2_AVAILABLE = False + + +@pytest.mark.skipif(not PYSP2_AVAILABLE, reason='PySP2 is not installed.') +def test_sp2_waveform_stats(): + my_sp2b = act.io.read_sp2(act.tests.EXAMPLE_SP2B) + my_ini = act.tests.EXAMPLE_INI + my_binary = act.qc.get_waveform_statistics(my_sp2b, my_ini, parallel=False) + assert my_binary.PkHt_ch1.max() == 62669.4 + np.testing.assert_almost_equal(np.nanmax(my_binary.PkHt_ch0.values), 98708.92915295, decimal=1) + np.testing.assert_almost_equal(np.nanmax(my_binary.PkHt_ch4.values), 65088.39598033, decimal=1) + + +@pytest.mark.skipif(not PYSP2_AVAILABLE, reason='PySP2 is not installed.') +def test_sp2_psds(): + my_sp2b = act.io.read_sp2(act.tests.EXAMPLE_SP2B) + my_ini = act.tests.EXAMPLE_INI + my_binary = act.qc.get_waveform_statistics(my_sp2b, my_ini, parallel=False) + my_hk = act.io.read_hk_file(act.tests.EXAMPLE_HK) + my_binary = act.retrievals.calc_sp2_diams_masses(my_binary) + scatrejectkey = my_binary['ScatRejectKey'].values + assert np.nanmax(my_binary['ScatDiaBC50'].values[scatrejectkey == 0]) < 1000.0 + my_psds = act.retrievals.process_sp2_psds(my_binary, my_hk, my_ini) + np.testing.assert_almost_equal(my_psds['NumConcIncan'].max(), 0.95805343) diff --git a/act/tests/test_correct.py b/act/tests/test_correct.py deleted file mode 100644 index c7643cc736..0000000000 --- a/act/tests/test_correct.py +++ /dev/null @@ -1,117 +0,0 @@ -import numpy as np -import xarray as xr - -import act - - -def test_correct_ceil(): - # Make a fake ARM dataset to test with, just an array with 1e-7 for half - # of it - fake_data = 10 * np.ones((300, 20)) - fake_data[:, 10:] = -1 - arm_ds = {} - arm_ds['backscatter'] = xr.DataArray(fake_data) - arm_ds = act.corrections.ceil.correct_ceil(arm_ds) - assert np.all(arm_ds['backscatter'].data[:, 10:] == -7) - assert np.all(arm_ds['backscatter'].data[:, 1:10] == 1) - - arm_ds['backscatter'].attrs['units'] = 'dummy' - arm_ds = act.corrections.ceil.correct_ceil(arm_ds) - assert arm_ds['backscatter'].units == 'log(dummy)' - - -def test_correct_mpl(): - # Make a fake ARM dataset to test with, just an array with 1e-7 for half - # of it - test_data = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_MPL_1SAMPLE) - ds = act.corrections.mpl.correct_mpl(test_data) - sig_cross_pol = ds['signal_return_cross_pol'].values[1, 10:15] - sig_co_pol = ds['signal_return_co_pol'].values[1, 10:15] - height = ds['height'].values[0:10] - overlap0 = ds['overlap_correction'].values[1, 0, 0:5] - overlap1 = ds['overlap_correction'].values[1, 1, 0:5] - overlap2 = ds['overlap_correction'].values[1, 2, 0:5] - np.testing.assert_allclose(overlap0, [0.0, 0.0, 0.0, 0.0, 0.0]) - np.testing.assert_allclose(overlap1, [754.338, 754.338, 754.338, 754.338, 754.338]) - np.testing.assert_allclose(overlap2, [181.9355, 181.9355, 181.9355, 181.9355, 181.9355]) - np.testing.assert_allclose( - sig_cross_pol, - [-0.5823283, -1.6066532, -1.7153032, -2.520143, -2.275405], - rtol=4e-06, - ) - np.testing.assert_allclose( - sig_co_pol, [12.5631485, 11.035495, 11.999875, 11.09393, 11.388968], rtol=1e-6 - ) - np.testing.assert_allclose( - height, - [ - 0.00749012, - 0.02247084, - 0.03745109, - 0.05243181, - 0.06741206, - 0.08239277, - 0.09737302, - 0.11235374, - 0.12733398, - 0.14231472, - ], - rtol=1e-6, - ) - assert ds['signal_return_co_pol'].attrs['units'] == '10 * log10(count/us)' - assert ds['signal_return_cross_pol'].attrs['units'] == '10 * log10(count/us)' - assert ds['cross_co_ratio'].attrs['long_name'] == 'Cross-pol / Co-pol ratio * 100' - assert ds['cross_co_ratio'].attrs['units'] == '1' - assert 'description' not in ds['cross_co_ratio'].attrs.keys() - assert 'ancillary_variables' not in ds['cross_co_ratio'].attrs.keys() - assert np.all(np.round(ds['cross_co_ratio'].data[0, 500]) == 34.0) - assert np.all(np.round(ds['signal_return_co_pol'].data[0, 11]) == 11) - assert np.all(np.round(ds['signal_return_co_pol'].data[0, 500]) == -6) - test_data.close() - ds.close() - - -def test_correct_wind(): - nav = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_NAV) - nav = act.utils.ship_utils.calc_cog_sog(nav) - - aosmet = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_AOSMET) - - ds = xr.merge([nav, aosmet], compat='override') - ds = act.corrections.ship.correct_wind(ds) - - assert round(ds['wind_speed_corrected'].values[800]) == 5.0 - assert round(ds['wind_direction_corrected'].values[800]) == 92.0 - - -def test_correct_dl(): - # Test the DL correction script on a PPI dataset eventhough it will - # mostlikely be used on FPT scans. Doing this to save space with only - # one datafile in the repo. - files = act.tests.sample_files.EXAMPLE_DLPPI - ds = act.io.arm.read_arm_netcdf(files) - - new_ds = act.corrections.doppler_lidar.correct_dl(ds, fill_value=np.nan) - data = new_ds['attenuated_backscatter'].values - np.testing.assert_almost_equal(np.nansum(data), -186479.83, decimal=0.1) - - new_ds = act.corrections.doppler_lidar.correct_dl(ds, range_normalize=False) - data = new_ds['attenuated_backscatter'].values - np.testing.assert_almost_equal(np.nansum(data), -200886.0, decimal=0.1) - - -def test_correct_rl(): - # Using ceil data in RL place to save memory - files = act.tests.sample_files.EXAMPLE_RL1 - ds = act.io.arm.read_arm_netcdf(files) - - ds = act.corrections.raman_lidar.correct_rl(ds, range_normalize_log_values=True) - np.testing.assert_almost_equal( - np.max(ds['depolarization_counts_high'].values), 9.91, decimal=2 - ) - np.testing.assert_almost_equal( - np.min(ds['depolarization_counts_high'].values), -7.00, decimal=2 - ) - np.testing.assert_almost_equal( - np.mean(ds['depolarization_counts_high'].values), -1.45, decimal=2 - ) diff --git a/act/tests/test_discovery.py b/act/tests/test_discovery.py deleted file mode 100644 index 97fc028984..0000000000 --- a/act/tests/test_discovery.py +++ /dev/null @@ -1,291 +0,0 @@ -import glob -import os -from datetime import datetime -import numpy as np -import requests -from requests.packages.urllib3.exceptions import InsecureRequestWarning -import act - - -def test_cropType(): - year = 2018 - lat = 37.15 - lon = -98.362 - # Try for when the cropscape API is not working - try: - crop = act.discovery.cropscape.get_crop_type(lat, lon, year) - crop2 = act.discovery.cropscape.get_crop_type(lat, lon) - except Exception: - return - - # print(crop, crop2) - if crop is not None: - assert crop == 'Dbl Crop WinWht/Sorghum' - if crop2 is not None: - # assert crop2 == 'Sorghum' - assert crop2 == 'Soybeans' - - -def test_get_ord(): - time_window = [datetime(2020, 2, 4, 2, 0), datetime(2020, 2, 12, 10, 0)] - my_asoses = act.discovery.get_asos_data(time_window, station='ORD') - assert 'ORD' in my_asoses.keys() - assert np.all( - np.equal( - my_asoses['ORD']['sknt'].values[:10], - np.array([13.0, 11.0, 14.0, 14.0, 13.0, 11.0, 14.0, 13.0, 13.0, 13.0]), - ) - ) - - -def test_get_region(): - my_keys = ['MDW', 'IGQ', 'ORD', '06C', 'PWK', 'LOT', 'GYY'] - time_window = [datetime(2020, 2, 4, 2, 0), datetime(2020, 2, 12, 10, 0)] - lat_window = (41.8781 - 0.5, 41.8781 + 0.5) - lon_window = (-87.6298 - 0.5, -87.6298 + 0.5) - my_asoses = act.discovery.get_asos_data(time_window, lat_range=lat_window, lon_range=lon_window) - asos_keys = [x for x in my_asoses.keys()] - assert asos_keys == my_keys - - -def test_get_armfile(): - if not os.path.isdir(os.getcwd() + '/data/'): - os.makedirs(os.getcwd() + '/data/') - - # Place your username and token here - username = os.getenv('ARM_USERNAME') - token = os.getenv('ARM_PASSWORD') - - if username is not None and token is not None: - if len(username) == 0 and len(token) == 0: - return - datastream = 'sgpmetE13.b1' - startdate = '2020-01-01' - enddate = startdate - outdir = os.getcwd() + '/data/' - - results = act.discovery.arm.download_arm_data( - username, token, datastream, startdate, enddate, output=outdir - ) - files = glob.glob(outdir + datastream + '*20200101*cdf') - if len(results) > 0: - assert files is not None - assert 'sgpmetE13' in files[0] - - if files is not None: - if len(files) > 0: - os.remove(files[0]) - - datastream = 'sgpmeetE13.b1' - act.discovery.arm.download_arm_data( - username, token, datastream, startdate, enddate, output=outdir - ) - files = glob.glob(outdir + datastream + '*20200101*cdf') - assert len(files) == 0 - - with np.testing.assert_raises(ConnectionRefusedError): - act.discovery.arm.download_arm_data( - username, token + '1234', datastream, startdate, enddate, output=outdir - ) - - datastream = 'sgpmetE13.b1' - results = act.discovery.arm.download_arm_data( - username, token, datastream, startdate, enddate - ) - assert len(results) == 1 - - -def test_get_armfile_hourly(): - if not os.path.isdir(os.getcwd() + '/data/'): - os.makedirs(os.getcwd() + '/data/') - - # Place your username and token here - username = os.getenv('ARM_USERNAME') - token = os.getenv('ARM_PASSWORD') - - if username is not None and token is not None: - if len(username) == 0 and len(token) == 0: - return - datastream = 'sgpmetE13.b1' - startdate = '2020-01-01T00:00:00' - enddate = '2020-01-01T12:00:00' - outdir = os.getcwd() + '/data/' - - results = act.discovery.arm.download_arm_data( - username, token, datastream, startdate, enddate, output=outdir - ) - files = glob.glob(outdir + datastream + '*20200101*cdf') - if len(results) > 0: - assert files is not None - assert 'sgpmetE13' in files[0] - - if files is not None: - if len(files) > 0: - os.remove(files[0]) - - datastream = 'sgpmeetE13.b1' - act.discovery.arm.download_arm_data( - username, token, datastream, startdate, enddate, output=outdir - ) - files = glob.glob(outdir + datastream + '*20200101*cdf') - assert len(files) == 0 - - with np.testing.assert_raises(ConnectionRefusedError): - act.discovery.arm.download_arm_data( - username, token + '1234', datastream, startdate, enddate, output=outdir - ) - - datastream = 'sgpmetE13.b1' - results = act.discovery.arm.download_arm_data( - username, token, datastream, startdate, enddate - ) - assert len(results) == 1 - - -def test_airnow(): - token = os.getenv('AIRNOW_API') - if token is not None: - if len(token) == 0: - return - results = act.discovery.get_airnow_forecast(token, '2022-05-01', zipcode=60108, distance=50) - assert results['CategoryName'].values[0] == 'Good' - assert results['AQI'].values[2] == -1 - assert results['ReportingArea'].values[3] == 'Aurora and Elgin' - - results = act.discovery.get_airnow_forecast( - token, '2022-05-01', distance=50, latlon=[41.958, -88.12] - ) - assert results['CategoryName'].values[3] == 'Good' - assert results['AQI'].values[2] == -1 - assert results['ReportingArea'][3] == 'Aurora and Elgin' - - results = act.discovery.get_airnow_obs(token, date='2022-05-01', zipcode=60108, distance=50) - assert results['AQI'].values[0] == 26 - assert results['ParameterName'].values[1] == 'PM2.5' - assert results['CategoryName'].values[0] == 'Good' - - results = act.discovery.get_airnow_obs(token, zipcode=60108, distance=50) - assert results['ReportingArea'].values[0] == 'Aurora and Elgin' - results = act.discovery.get_airnow_obs(token, latlon=[41.958, -88.12], distance=50) - assert results['StateCode'].values[0] == 'IL' - - with np.testing.assert_raises(NameError): - results = act.discovery.get_airnow_obs(token) - with np.testing.assert_raises(NameError): - results = act.discovery.get_airnow_forecast(token, '2022-05-01') - - results = act.discovery.get_airnow_obs( - token, date='2022-05-01', distance=50, latlon=[41.958, -88.12] - ) - assert results['AQI'].values[0] == 26 - assert results['ParameterName'].values[1] == 'PM2.5' - assert results['CategoryName'].values[0] == 'Good' - - lat_lon = '-88.245401,41.871346,-87.685099,42.234359' - results = act.discovery.get_airnow_bounded_obs( - token, '2022-05-01T00', '2022-05-01T12', lat_lon, 'OZONE,PM25', data_type='B' - ) - assert results['PM2.5'].values[-1, 0] == 1.8 - assert results['OZONE'].values[0, 0] == 37.0 - assert len(results['time'].values) == 13 - - -def test_noaa_psl(): - result = act.discovery.download_noaa_psl_data( - site='ctd', - instrument='Parsivel', - startdate='20211231', - enddate='20220101', - output='./data/', - ) - assert len(result) == 48 - - result = act.discovery.download_noaa_psl_data( - site='ctd', instrument='Pressure', startdate='20220101', hour='00' - ) - assert len(result) == 1 - - result = act.discovery.download_noaa_psl_data( - site='ctd', instrument='GpsTrimble', startdate='20220104', hour='00' - ) - assert len(result) == 6 - - types = [ - 'Radar S-band Moment', - 'Radar S-band Bright Band', - '449RWP Bright Band', - '449RWP Wind', - '449RWP Sub-Hour Wind', - '449RWP Sub-Hour Temp', - '915RWP Wind', - '915RWP Temp', - '915RWP Sub-Hour Wind', - '915RWP Sub-Hour Temp', - ] - for t in types: - result = act.discovery.download_noaa_psl_data( - site='ctd', instrument=t, startdate='20220601', hour='01' - ) - assert len(result) == 1 - - types = ['Radar FMCW Moment', 'Radar FMCW Bright Band'] - files = [3, 1] - for i, t in enumerate(types): - result = act.discovery.download_noaa_psl_data( - site='bck', instrument=t, startdate='20220101', hour='01' - ) - assert len(result) == files[i] - - with np.testing.assert_raises(ValueError): - result = act.discovery.download_noaa_psl_data( - instrument='Parsivel', startdate='20220601', hour='01' - ) - with np.testing.assert_raises(ValueError): - result = act.discovery.download_noaa_psl_data( - site='ctd', instrument='dongle', startdate='20220601', hour='01' - ) - - -def test_neon(): - site_code = 'BARR' - result = act.discovery.get_neon_site_products(site_code, print_to_screen=True) - assert 'DP1.00002.001' in result - assert result['DP1.00003.001'] == 'Triple aspirated air temperature' - - product_code = 'DP1.00002.001' - result = act.discovery.get_neon_product_avail(site_code, product_code, print_to_screen=True) - assert '2017-09' in result - assert '2022-11' in result - - output_dir = os.path.join(os.getcwd(), site_code + '_' + product_code) - result = act.discovery.download_neon_data(site_code, product_code, '2022-10', output_dir=output_dir) - assert len(result) == 20 - assert any('readme' in r for r in result) - assert any('sensor_position' in r for r in result) - - result = act.discovery.download_neon_data(site_code, product_code, '2022-09', - end_date='2022-10', output_dir=output_dir) - assert len(result) == 40 - assert any('readme' in r for r in result) - assert any('sensor_position' in r for r in result) - - -def test_arm_doi(): - datastream = 'sgpmetE13.b1' - startdate = '2022-01-01' - enddate = '2022-12-31' - doi = act.discovery.get_arm_doi(datastream, startdate, enddate) - - assert len(doi) > 10 - assert isinstance(doi, str) - assert 'doi' in doi - assert 'Kyrouac' in doi - - doi = act.discovery.get_arm_doi('test', startdate, enddate) - assert "No DOI Found" in doi - - -def test_download_surfrad(): - results = act.discovery.download_surfrad_data(site='tbl', startdate='20230601', enddate='20230602') - assert len(results) == 2 - assert 'tbl23152.dat' in results[0] diff --git a/act/tests/test_io.py b/act/tests/test_io.py deleted file mode 100644 index bfbd555fcd..0000000000 --- a/act/tests/test_io.py +++ /dev/null @@ -1,924 +0,0 @@ -import glob -from os import PathLike -from pathlib import Path -import random -from string import ascii_letters -import tempfile - -import fsspec -import numpy as np -import pytest - -import act -import act.tests.sample_files as sample_files -from act.io import read_gml, read_psl_wind_profiler_temperature, icartt -from act.io.noaapsl import read_psl_surface_met - - -def test_io(): - ds = act.io.arm.read_arm_netcdf([act.tests.EXAMPLE_MET1]) - assert 'temp_mean' in ds.variables.keys() - assert 'rh_mean' in ds.variables.keys() - assert ds.attrs['_arm_standards_flag'] == (1 << 0) - - with np.testing.assert_raises(OSError): - ds = act.io.arm.read_arm_netcdf([]) - - ds = act.io.arm.read_arm_netcdf([], return_None=True) - assert ds is None - ds = act.io.arm.read_arm_netcdf(['./randomfile.nc'], return_None=True) - assert ds is None - - ds = act.io.arm.read_arm_netcdf([act.tests.EXAMPLE_MET_TEST1]) - assert 'time' in ds - - ds = act.io.arm.read_arm_netcdf([act.tests.EXAMPLE_MET_TEST2]) - assert ds['time'].values[10].astype('datetime64[ms]') == np.datetime64('2019-01-01T00:10:00', 'ms') - - ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_MET1, use_base_time=True, drop_variables='time') - assert 'time' in ds - assert np.issubdtype(ds['time'].dtype, np.datetime64) - assert ds['time'].values[10].astype('datetime64[ms]') == np.datetime64('2019-01-01T00:10:00', 'ms') - - del ds - - -def test_keep_variables(): - - var_names = [ - 'temp_mean', - 'rh_mean', - 'wdir_vec_mean', - 'tbrg_precip_total_corr', - 'atmos_pressure', - 'wspd_vec_mean', - 'pwd_pw_code_inst', - 'pwd_pw_code_15min', - 'pwd_mean_vis_10min', - 'logger_temp', - 'pwd_precip_rate_mean_1min', - 'pwd_cumul_snow', - 'pwd_mean_vis_1min', - 'pwd_pw_code_1hr', - 'org_precip_rate_mean', - 'tbrg_precip_total', - 'pwd_cumul_rain', - ] - var_names = var_names + ['qc_' + ii for ii in var_names] - drop_variables = act.io.arm.keep_variables_to_drop_variables( - act.tests.EXAMPLE_MET1, var_names - ) - - expected_drop_variables = [ - 'wdir_vec_std', - 'base_time', - 'alt', - 'qc_wspd_arith_mean', - 'pwd_err_code', - 'logger_volt', - 'temp_std', - 'lon', - 'qc_logger_volt', - 'time_offset', - 'wspd_arith_mean', - 'lat', - 'vapor_pressure_std', - 'vapor_pressure_mean', - 'rh_std', - 'qc_vapor_pressure_mean', - ] - assert drop_variables.sort() == expected_drop_variables.sort() - - ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_MET1, keep_variables='temp_mean') - assert list(ds.data_vars) == ['temp_mean'] - del ds - - var_names = ['temp_mean', 'qc_temp_mean'] - ds = act.io.arm.read_arm_netcdf( - act.tests.EXAMPLE_MET1, keep_variables=var_names, drop_variables='nonsense' - ) - assert list(ds.data_vars).sort() == var_names.sort() - del ds - - var_names = ['temp_mean', 'qc_temp_mean', 'alt', 'lat', 'lon'] - ds = act.io.arm.read_arm_netcdf( - act.tests.EXAMPLE_MET_WILDCARD, keep_variables=var_names, drop_variables=['lon'] - ) - var_names = list(set(var_names) - {'lon'}) - assert list(ds.data_vars).sort() == var_names.sort() - del ds - - filenames = list(Path(file) for file in act.tests.EXAMPLE_MET_WILDCARD) - var_names = ['temp_mean', 'qc_temp_mean', 'alt', 'lat', 'lon'] - ds = act.io.arm.read_arm_netcdf(filenames, keep_variables=var_names) - assert list(ds.data_vars).sort() == var_names.sort() - del ds - - -def test_io_mfdataset(): - met_ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_MET_WILDCARD) - met_ds.load() - assert 'temp_mean' in met_ds.variables.keys() - assert 'rh_mean' in met_ds.variables.keys() - assert len(met_ds.attrs['_file_times']) == 7 - assert met_ds.attrs['_arm_standards_flag'] == (1 << 0) - met_ds.close() - del met_ds - - met_ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_MET_WILDCARD, cleanup_qc=True) - met_ds.load() - var_name = 'temp_mean' - qc_var_name = 'qc_' + var_name - attr_names = [ - 'long_name', - 'units', - 'flag_masks', - 'flag_meanings', - 'flag_assessments', - 'fail_min', - 'fail_max', - 'fail_delta', - 'standard_name', - ] - assert var_name in met_ds.variables.keys() - assert qc_var_name in met_ds.variables.keys() - assert sorted(attr_names) == sorted(list(met_ds[qc_var_name].attrs.keys())) - assert met_ds[qc_var_name].attrs['flag_masks'] == [1, 2, 4, 8] - assert met_ds[qc_var_name].attrs['flag_assessments'] == ['Bad', 'Bad', 'Bad', 'Indeterminate'] - met_ds.close() - del met_ds - - -def test_io_csv(): - headers = [ - 'day', - 'month', - 'year', - 'time', - 'pasquill', - 'wdir_60m', - 'wspd_60m', - 'wdir_60m_std', - 'temp_60m', - 'wdir_10m', - 'wspd_10m', - 'wdir_10m_std', - 'temp_10m', - 'temp_dp', - 'rh', - 'avg_temp_diff', - 'total_precip', - 'solar_rad', - 'net_rad', - 'atmos_press', - 'wv_pressure', - 'temp_soil_10cm', - 'temp_soil_100cm', - 'temp_soil_10ft', - ] - anl_ds = act.io.csv.read_csv(act.tests.EXAMPLE_ANL_CSV, sep=r'\s+', column_names=headers) - assert 'temp_60m' in anl_ds.variables.keys() - assert 'rh' in anl_ds.variables.keys() - assert anl_ds['temp_60m'].values[10] == -1.7 - anl_ds.close() - - files = glob.glob(act.tests.EXAMPLE_MET_CSV) - ds = act.io.csv.read_csv(files[0]) - assert 'date_time' in ds - assert '_datastream' in ds.attrs - - -def test_io_dod(): - dims = {'time': 1440, 'drop_diameter': 50} - - try: - ds = act.io.arm.create_ds_from_arm_dod( - 'vdis.b1', dims, version='1.2', scalar_fill_dim='time' - ) - assert 'moment1' in ds - assert len(ds['base_time'].values) == 1440 - assert len(ds['drop_diameter'].values) == 50 - with np.testing.assert_warns(UserWarning): - ds2 = act.io.arm.create_ds_from_arm_dod('vdis.b1', dims, scalar_fill_dim='time') - assert 'moment1' in ds2 - assert len(ds2['base_time'].values) == 1440 - assert len(ds2['drop_diameter'].values) == 50 - with np.testing.assert_raises(ValueError): - ds = act.io.arm.create_ds_from_arm_dod('vdis.b1', {}, version='1.2') - ds = act.io.arm.create_ds_from_arm_dod( - sample_files.EXAMPLE_DOD, dims, version=1.2, scalar_fill_dim='time', - local_file=True) - assert 'moment1' in ds - assert len(ds['base_time'].values) == 1440 - assert len(ds['drop_diameter'].values) == 50 - except Exception: - return - ds.close() - ds2.close() - - -def test_io_write(): - sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) - sonde_ds.clean.cleanup() - - with tempfile.TemporaryDirectory() as tmpdirname: - write_file = Path(tmpdirname, Path(sample_files.EXAMPLE_SONDE1).name) - keep_vars = ['tdry', 'qc_tdry', 'dp', 'qc_dp'] - for var_name in list(sonde_ds.data_vars): - if var_name not in keep_vars: - del sonde_ds[var_name] - sonde_ds.write.write_netcdf(path=write_file, FillValue=-9999) - - sonde_ds_read = act.io.arm.read_arm_netcdf(str(write_file)) - assert list(sonde_ds_read.data_vars) == keep_vars - assert isinstance(sonde_ds_read['qc_tdry'].attrs['flag_meanings'], str) - assert sonde_ds_read['qc_tdry'].attrs['flag_meanings'].count('__') == 21 - for attr in ['qc_standards_version', 'qc_method', 'qc_comment']: - assert attr not in list(sonde_ds_read.attrs) - sonde_ds_read.close() - del sonde_ds_read - - sonde_ds.close() - - sonde_ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_EBBR1) - sonde_ds.clean.cleanup() - assert 'fail_min' in sonde_ds['qc_home_signal_15'].attrs - assert 'standard_name' in sonde_ds['qc_home_signal_15'].attrs - assert 'flag_masks' in sonde_ds['qc_home_signal_15'].attrs - - with tempfile.TemporaryDirectory() as tmpdirname: - cf_convention = 'CF-1.8' - write_file = Path(tmpdirname, Path(sample_files.EXAMPLE_EBBR1).name) - sonde_ds.write.write_netcdf( - path=write_file, - make_copy=False, - join_char='_', - cf_compliant=True, - cf_convention=cf_convention, - ) - - sonde_ds_read = act.io.arm.read_arm_netcdf(str(write_file)) - - assert cf_convention in sonde_ds_read.attrs['Conventions'].split() - assert sonde_ds_read.attrs['FeatureType'] == 'timeSeries' - global_att_keys = [ii for ii in sonde_ds_read.attrs.keys() if not ii.startswith('_')] - assert global_att_keys[-1] == 'history' - assert sonde_ds_read['alt'].attrs['axis'] == 'Z' - assert sonde_ds_read['alt'].attrs['positive'] == 'up' - - sonde_ds_read.close() - del sonde_ds_read - - sonde_ds.close() - - ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_CEIL1) - with tempfile.TemporaryDirectory() as tmpdirname: - cf_convention = 'CF-1.8' - write_file = Path(tmpdirname, Path(sample_files.EXAMPLE_CEIL1).name) - ds.write.write_netcdf( - path=write_file, - make_copy=False, - join_char='_', - cf_compliant=True, - cf_convention=cf_convention, - ) - - ds_read = act.io.arm.read_arm_netcdf(str(write_file)) - - assert cf_convention in ds_read.attrs['Conventions'].split() - assert ds_read.attrs['FeatureType'] == 'timeSeriesProfile' - assert len(ds_read.dims) > 1 - - ds_read.close() - del ds_read - - -def test_clean_cf_qc(): - with tempfile.TemporaryDirectory() as tmpdirname: - ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_MET1, cleanup_qc=True) - ds.load() - var_name = 'temp_mean' - qc_var_name = 'qc_' + var_name - ds.qcfilter.remove_test(var_name, test_number=4) - ds.qcfilter.remove_test(var_name, test_number=3) - ds.qcfilter.remove_test(var_name, test_number=2) - ds[qc_var_name].attrs['flag_masks'] = ds[qc_var_name].attrs['flag_masks'][0] - flag_meanings = ds[qc_var_name].attrs['flag_meanings'][0] - ds[qc_var_name].attrs['flag_meanings'] = flag_meanings.replace(' ', '__') - flag_meanings = ds[qc_var_name].attrs['flag_assessments'][0] - ds[qc_var_name].attrs['flag_assessments'] = flag_meanings.replace(' ', '__') - - write_file = str(Path(tmpdirname, Path(sample_files.EXAMPLE_MET1).name)) - ds.write.write_netcdf(path=write_file, cf_compliant=True) - ds.close() - del ds - - read_ds = act.io.arm.read_arm_netcdf(write_file, cleanup_qc=True) - read_ds.load() - - assert type(read_ds[qc_var_name].attrs['flag_masks']).__module__ == 'numpy' - assert read_ds[qc_var_name].attrs['flag_masks'].size == 1 - assert read_ds[qc_var_name].attrs['flag_masks'][0] == 1 - assert isinstance(read_ds[qc_var_name].attrs['flag_meanings'], list) - assert len(read_ds[qc_var_name].attrs['flag_meanings']) == 1 - assert isinstance(read_ds[qc_var_name].attrs['flag_assessments'], list) - assert len(read_ds[qc_var_name].attrs['flag_assessments']) == 1 - assert read_ds[qc_var_name].attrs['flag_assessments'] == ['Bad'] - assert read_ds[qc_var_name].attrs['flag_meanings'] == ['Value is equal to missing_value.'] - - read_ds.close() - del read_ds - - -def test_io_mpldataset(): - try: - mpl_ds = act.io.mpl.read_sigma_mplv5(act.tests.EXAMPLE_SIGMA_MPLV5) - except Exception: - return - - # Tests fields - assert 'channel_1' in mpl_ds.variables.keys() - assert 'temp_0' in mpl_ds.variables.keys() - assert mpl_ds.channel_1.values.shape == (102, 1000) - - # Tests coordinates - assert 'time' in mpl_ds.coords.keys() - assert 'range' in mpl_ds.coords.keys() - assert mpl_ds.coords['time'].values.shape == (102,) - assert mpl_ds.coords['range'].values.shape == (1000,) - assert '_arm_standards_flag' in mpl_ds.attrs.keys() - - # Tests attributes - assert '_datastream' in mpl_ds.attrs.keys() - mpl_ds.close() - - -def test_read_gml(): - # Test Radiation - ds = read_gml(sample_files.EXAMPLE_GML_RADIATION, datatype='RADIATION') - assert np.isclose(np.nansum(ds['solar_zenith_angle']), 1725.28) - assert np.isclose(np.nansum(ds['upwelling_infrared_case_temp']), 4431.88) - assert ( - ds['upwelling_infrared_case_temp'].attrs['ancillary_variables'] - == 'qc_upwelling_infrared_case_temp' - ) - assert ds['qc_upwelling_infrared_case_temp'].attrs['flag_values'] == [0, 1, 2] - assert ds['qc_upwelling_infrared_case_temp'].attrs['flag_meanings'] == [ - 'Not failing any tests', - 'Knowingly bad value', - 'Should be used with scrutiny', - ] - assert ds['qc_upwelling_infrared_case_temp'].attrs['flag_assessments'] == [ - 'Good', - 'Bad', - 'Indeterminate', - ] - assert ds['time'].values[-1] == np.datetime64('2021-01-01T00:17:00') - - ds = read_gml(sample_files.EXAMPLE_GML_RADIATION, convert_missing=False) - assert np.isclose(np.nansum(ds['solar_zenith_angle']), 1725.28) - assert np.isclose(np.nansum(ds['upwelling_infrared_case_temp']), 4431.88) - assert ( - ds['upwelling_infrared_case_temp'].attrs['ancillary_variables'] - == 'qc_upwelling_infrared_case_temp' - ) - assert ds['qc_upwelling_infrared_case_temp'].attrs['flag_values'] == [0, 1, 2] - assert ds['qc_upwelling_infrared_case_temp'].attrs['flag_meanings'] == [ - 'Not failing any tests', - 'Knowingly bad value', - 'Should be used with scrutiny', - ] - assert ds['qc_upwelling_infrared_case_temp'].attrs['flag_assessments'] == [ - 'Good', - 'Bad', - 'Indeterminate', - ] - assert ds['time'].values[-1] == np.datetime64('2021-01-01T00:17:00') - - # Test MET - ds = read_gml(sample_files.EXAMPLE_GML_MET, datatype='MET') - assert np.isclose(np.nansum(ds['wind_speed'].values), 148.1) - assert ds['wind_speed'].attrs['units'] == 'm/s' - assert np.isnan(ds['wind_speed'].attrs['_FillValue']) - assert np.sum(np.isnan(ds['preciptation_intensity'].values)) == 20 - assert ds['preciptation_intensity'].attrs['units'] == 'mm/hour' - assert ds['time'].values[0] == np.datetime64('2020-01-01T00:00:00') - - ds = read_gml(sample_files.EXAMPLE_GML_MET, convert_missing=False) - assert np.isclose(np.nansum(ds['wind_speed'].values), 148.1) - assert ds['wind_speed'].attrs['units'] == 'm/s' - assert np.isclose(ds['wind_speed'].attrs['_FillValue'], -999.9) - assert np.sum(ds['preciptation_intensity'].values) == -1980 - assert ds['preciptation_intensity'].attrs['units'] == 'mm/hour' - assert ds['time'].values[0] == np.datetime64('2020-01-01T00:00:00') - - # Test Ozone - ds = read_gml(sample_files.EXAMPLE_GML_OZONE, datatype='OZONE') - assert np.isclose(np.nansum(ds['ozone'].values), 582.76) - assert ds['ozone'].attrs['long_name'] == 'Ozone' - assert ds['ozone'].attrs['units'] == 'ppb' - assert np.isnan(ds['ozone'].attrs['_FillValue']) - assert ds['time'].values[0] == np.datetime64('2020-12-01T00:00:00') - - ds = read_gml(sample_files.EXAMPLE_GML_OZONE) - assert np.isclose(np.nansum(ds['ozone'].values), 582.76) - assert ds['ozone'].attrs['long_name'] == 'Ozone' - assert ds['ozone'].attrs['units'] == 'ppb' - assert np.isnan(ds['ozone'].attrs['_FillValue']) - assert ds['time'].values[0] == np.datetime64('2020-12-01T00:00:00') - - # Test Carbon Dioxide - ds = read_gml(sample_files.EXAMPLE_GML_CO2, datatype='co2') - assert np.isclose(np.nansum(ds['co2'].values), 2307.630) - assert ( - ds['qc_co2'].values == np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=int) - ).all() - assert ds['co2'].attrs['units'] == 'ppm' - assert np.isnan(ds['co2'].attrs['_FillValue']) - assert ds['qc_co2'].attrs['flag_assessments'] == ['Bad', 'Indeterminate'] - assert ds['latitude'].attrs['standard_name'] == 'latitude' - - ds = read_gml(sample_files.EXAMPLE_GML_CO2, convert_missing=False) - assert np.isclose(np.nansum(ds['co2'].values), -3692.3098) - assert ds['co2'].attrs['_FillValue'] == -999.99 - assert ( - ds['qc_co2'].values == np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=int) - ).all() - assert ds['co2'].attrs['units'] == 'ppm' - assert np.isclose(ds['co2'].attrs['_FillValue'], -999.99) - assert ds['qc_co2'].attrs['flag_assessments'] == ['Bad', 'Indeterminate'] - assert ds['latitude'].attrs['standard_name'] == 'latitude' - - # Test Halocarbon - ds = read_gml(sample_files.EXAMPLE_GML_HALO, datatype='HALO') - assert np.isclose(np.nansum(ds['CCl4'].values), 1342.65) - assert ds['CCl4'].attrs['units'] == 'ppt' - assert ds['CCl4'].attrs['long_name'] == 'Carbon Tetrachloride (CCl4) daily median' - assert np.isnan(ds['CCl4'].attrs['_FillValue']) - assert ds['time'].values[0] == np.datetime64('1998-06-16T00:00:00') - - ds = read_gml(sample_files.EXAMPLE_GML_HALO) - assert np.isclose(np.nansum(ds['CCl4'].values), 1342.65) - assert ds['CCl4'].attrs['units'] == 'ppt' - assert ds['CCl4'].attrs['long_name'] == 'Carbon Tetrachloride (CCl4) daily median' - assert np.isnan(ds['CCl4'].attrs['_FillValue']) - assert ds['time'].values[0] == np.datetime64('1998-06-16T00:00:00') - - -def test_read_psl_wind_profiler(): - test_ds_low, test_ds_hi = act.io.noaapsl.read_psl_wind_profiler( - act.tests.EXAMPLE_NOAA_PSL, transpose=False - ) - # test dimensions - assert 'time' and 'HT' in test_ds_low.dims.keys() - assert 'time' and 'HT' in test_ds_hi.dims.keys() - assert test_ds_low.dims['time'] == 4 - assert test_ds_hi.dims['time'] == 4 - assert test_ds_low.dims['HT'] == 49 - assert test_ds_hi.dims['HT'] == 50 - - # test coordinates - assert ( - test_ds_low.coords['HT'][0:5] == np.array([0.151, 0.254, 0.356, 0.458, 0.561]) - ).all() - assert ( - test_ds_low.coords['time'][0:2] - == np.array( - ['2021-05-05T15:00:01.000000000', '2021-05-05T15:15:49.000000000'], - dtype='datetime64[ns]', - ) - ).all() - - # test attributes - assert test_ds_low.attrs['site_identifier'] == 'CTD' - assert test_ds_low.attrs['data_type'] == 'WINDS' - assert test_ds_low.attrs['revision_number'] == '5.1' - assert test_ds_low.attrs['latitude'] == 34.66 - assert test_ds_low.attrs['longitude'] == -87.35 - assert test_ds_low.attrs['elevation'] == 187.0 - assert (test_ds_low.attrs['beam_azimuth'] == np.array( - [38.0, 38.0, 308.0], dtype='float32')).all() - assert (test_ds_low.attrs['beam_elevation'] == np.array( - [90.0, 74.7, 74.7], dtype='float32')).all() - assert test_ds_low.attrs['consensus_average_time'] == 24 - assert test_ds_low.attrs['oblique-beam_vertical_correction'] == 0 - assert test_ds_low.attrs['number_of_beams'] == 3 - assert test_ds_low.attrs['number_of_range_gates'] == 49 - assert test_ds_low.attrs['number_of_gates_oblique'] == 49 - assert test_ds_low.attrs['number_of_gates_vertical'] == 49 - assert test_ds_low.attrs['number_spectral_averages_oblique'] == 50 - assert test_ds_low.attrs['number_spectral_averages_vertical'] == 50 - assert test_ds_low.attrs['pulse_width_oblique'] == 708 - assert test_ds_low.attrs['pulse_width_vertical'] == 708 - assert test_ds_low.attrs['inner_pulse_period_oblique'] == 50 - assert test_ds_low.attrs['inner_pulse_period_vertical'] == 50 - assert test_ds_low.attrs['full_scale_doppler_value_oblique'] == 20.9 - assert test_ds_low.attrs['full_scale_doppler_value_vertical'] == 20.9 - assert test_ds_low.attrs['delay_to_first_gate_oblique'] == 4000 - assert test_ds_low.attrs['delay_to_first_gate_vertical'] == 4000 - assert test_ds_low.attrs['spacing_of_gates_oblique'] == 708 - assert test_ds_low.attrs['spacing_of_gates_vertical'] == 708 - - # test fields - assert test_ds_low['RAD1'].shape == (4, 49) - assert test_ds_hi['RAD1'].shape == (4, 50) - assert (test_ds_low['RAD1'][0, 0:5] == np.array( - [0.2, 0.1, 0.1, 0.0, -0.1])).all() - assert (test_ds_hi['RAD1'][0, 0:5] == np.array( - [0.1, 0.1, -0.1, 0.0, -0.2])).all() - - assert test_ds_low['SPD'].shape == (4, 49) - assert test_ds_hi['SPD'].shape == (4, 50) - assert (test_ds_low['SPD'][0, 0:5] == np.array( - [2.5, 3.3, 4.3, 4.3, 4.8])).all() - assert (test_ds_hi['SPD'][0, 0:5] == np.array( - [3.7, 4.6, 6.3, 5.2, 6.8])).all() - - # test transpose - test_ds_low, test_ds_hi = act.io.noaapsl.read_psl_wind_profiler( - act.tests.EXAMPLE_NOAA_PSL, transpose=True - ) - assert test_ds_low['RAD1'].shape == (49, 4) - assert test_ds_hi['RAD1'].shape == (50, 4) - assert test_ds_low['SPD'].shape == (49, 4) - assert test_ds_hi['SPD'].shape == (50, 4) - test_ds_low.close() - - -def test_read_psl_wind_profiler_temperature(): - ds = read_psl_wind_profiler_temperature( - act.tests.EXAMPLE_NOAA_PSL_TEMPERATURE) - - ds.attrs['site_identifier'] == 'CTD' - ds.attrs['elevation'] = 600.0 - ds.T.values[0] == 33.2 - - -def test_read_psl_surface_met(): - ds = read_psl_surface_met(sample_files.EXAMPLE_NOAA_PSL_SURFACEMET) - assert ds.time.size == 2 - assert np.isclose(np.sum(ds['Pressure'].values), 1446.9) - assert np.isclose(ds['lat'].values, 38.972425) - assert ds['lat'].attrs['units'] == 'degree_N' - assert ds['Upward_Longwave_Irradiance'].attrs['long_name'] == 'Upward Longwave Irradiance' - assert ds['Upward_Longwave_Irradiance'].dtype.str == '= np.datetime64('2019-01-01 06:00:00') - ) - ds = ds.sel({'time': index}) - - index = (ds.time.values <= np.datetime64('2019-01-01 18:34:00')) | ( - ds.time.values >= np.datetime64('2019-01-01 19:06:00') - ) - ds = ds.sel({'time': index}) - - index = (ds.time.values <= np.datetime64('2019-01-01 12:30:00')) | ( - ds.time.values >= np.datetime64('2019-01-01 12:40:00') - ) - ds = ds.sel({'time': index}) - - display = TimeSeriesDisplay(ds, figsize=(15, 10), subplot_shape=(1,)) - display.plot('temp_mean', subplot_index=(0,), add_nan=True, day_night_background=True) - ds.close() - - try: - return display.fig - finally: - matplotlib.pyplot.close(display.fig) - - -@pytest.mark.mpl_image_compare(tolerance=30) -def test_timeseries_invert(): - ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_IRT25m20s) - display = TimeSeriesDisplay(ds, figsize=(10, 8)) - display.plot('inst_sfc_ir_temp', invert_y_axis=True) - ds.close() - return display.fig - - -def test_plot_time_rng(): - # Test if setting the xrange can be done with pandas or datetime datatype - # eventhough the data is numpy. Check for correctly converting xrange values - # before setting and not causing an exception. - met = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_MET1) - - # Plot data - xrng = [datetime(2019, 1, 1, 0, 0), datetime(2019, 1, 2, 0, 0)] - display = TimeSeriesDisplay(met) - display.plot('temp_mean', time_rng=xrng) - - xrng = [pd.to_datetime('2019-01-01'), pd.to_datetime('2019-01-02')] - display = TimeSeriesDisplay(met) - display.plot('temp_mean', time_rng=xrng) - - -@pytest.mark.mpl_image_compare(tolerance=30) -def test_groupby_plot(): - ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_MET_WILDCARD) - - # Create Plot Display - display = WindRoseDisplay(ds, figsize=(15, 15), subplot_shape=(3, 3)) - groupby = display.group_by('day') - groupby.plot_group( - 'plot_data', - None, - dir_field='wdir_vec_mean', - spd_field='wspd_vec_mean', - data_field='temp_mean', - num_dirs=12, - plot_type='line', - ) - - # Set theta tick markers for each axis inside display to be inside the polar axes - for i in range(3): - for j in range(3): - display.axes[i, j].tick_params(pad=-20) - ds.close() - return display.fig - - -@pytest.mark.mpl_image_compare(tolerance=30) -def test_match_ylimits_plot(): - files = sample_files.EXAMPLE_MET_WILDCARD - ds = act.io.arm.read_arm_netcdf(files) - display = act.plotting.TimeSeriesDisplay(ds, figsize=(10, 8), subplot_shape=(2, 2)) - groupby = display.group_by('day') - groupby.plot_group('plot', None, field='temp_mean', marker=' ') - groupby.display.set_yrng([-20, 20], match_axes_ylimits=True) - ds.close() - return display.fig - - -@pytest.mark.mpl_image_compare(tolerance=30) -def test_enhanced_skewt_plot(): - ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) - display = act.plotting.SkewTDisplay(ds) - display.plot_enhanced_skewt(color_field='alt', component_range=85) - ds.close() - return display.fig - - -@pytest.mark.mpl_image_compare(tolerance=30) -def test_enhanced_skewt_plot_2(): - ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) - display = act.plotting.SkewTDisplay(ds) - overwrite_data = {'Test': 1234.0} - display.plot_enhanced_skewt( - spd_name='u_wind', - dir_name='v_wind', - color_field='alt', - component_range=85, - uv_flag=True, - overwrite_data=overwrite_data, - add_data=overwrite_data, - ) - ds.close() - return display.fig - - -@pytest.mark.mpl_image_compare(tolerance=30) -def test_xlim_correction_plot(): - ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_MET1) - - # Plot data - xrng = [datetime(2019, 1, 1, 0, 0, 0), datetime(2019, 1, 1, 0, 0, 0)] - display = TimeSeriesDisplay(ds) - display.plot('temp_mean', time_rng=xrng) - - ds.close() - - return display.fig - - -def test_histogram_kwargs(): - files = sample_files.EXAMPLE_MET1 - ds = act.io.arm.read_arm_netcdf(files) - hist_kwargs = {'range': (-10, 10)} - histdisplay = DistributionDisplay(ds) - hist_dict = histdisplay.plot_stacked_bar( - 'temp_mean', - bins=np.arange(-40, 40, 5), - sortby_bins=np.arange(-40, 40, 5), - hist_kwargs=hist_kwargs, - ) - hist_array = np.array([0, 0, 0, 0, 0, 0, 493, 883, 64, 0, 0, 0, 0, 0, 0]) - assert_allclose(hist_dict['histogram'], hist_array) - hist_dict = histdisplay.plot_stacked_bar('temp_mean', hist_kwargs=hist_kwargs) - hist_array = np.array([0, 0, 950, 177, 249, 64, 0, 0, 0, 0]) - assert_allclose(hist_dict['histogram'], hist_array) - - hist_dict_stair = histdisplay.plot_stairstep( - 'temp_mean', - bins=np.arange(-40, 40, 5), - sortby_bins=np.arange(-40, 40, 5), - hist_kwargs=hist_kwargs, - ) - hist_array = np.array([0, 0, 0, 0, 0, 0, 493, 883, 64, 0, 0, 0, 0, 0, 0]) - assert_allclose(hist_dict_stair['histogram'], hist_array) - hist_dict_stair = histdisplay.plot_stairstep('temp_mean', hist_kwargs=hist_kwargs) - hist_array = np.array([0, 0, 950, 177, 249, 64, 0, 0, 0, 0]) - assert_allclose(hist_dict_stair['histogram'], hist_array) - - hist_dict_heat = histdisplay.plot_heatmap( - 'temp_mean', - 'rh_mean', - x_bins=np.arange(-60, 10, 1), - y_bins=np.linspace(0, 10000.0, 50), - hist_kwargs=hist_kwargs, - ) - hist_array = [0.0, 0.0, 0.0, 0.0] - assert_allclose(hist_dict_heat['histogram'][0, 0:4], hist_array) - ds.close() - matplotlib.pyplot.close(fig=histdisplay.fig) - - -@pytest.mark.mpl_image_compare(tolerance=30) -def test_violin(): - ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_MET1) - - # Create a DistributionDisplay object to compare fields - display = DistributionDisplay(ds) - - # Create violin display of mean temperature - display.plot_violin('temp_mean', positions=[5.0], set_title='SGP MET E13 2019-01-01') - - ds.close() - - return display.fig - - -@pytest.mark.mpl_image_compare(tolerance=30) -def test_scatter(): - ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_MET1) - # Create a DistributionDisplay object to compare fields - display = DistributionDisplay(ds) - - display.plot_scatter( - 'wspd_arith_mean', 'wspd_vec_mean', m_field='wdir_vec_mean', marker='d', cmap='bwr' - ) - # Set the range of the field on the x-axis - display.set_xrng((0, 14)) - display.set_yrng((0, 14)) - # Display the 1:1 ratio line - display.set_ratio_line() - - ds.close() - - return display.fig diff --git a/act/tests/test_qc.py b/act/tests/test_qc.py deleted file mode 100644 index 18fea4cba7..0000000000 --- a/act/tests/test_qc.py +++ /dev/null @@ -1,1485 +0,0 @@ -import copy -from datetime import datetime -import dask.array as da -import numpy as np -import pandas as pd -import pytest -import xarray as xr -from pathlib import Path - -from act.io.arm import read_arm_netcdf -from act.qc.arm import add_dqr_to_qc -from act.qc.qcfilter import parse_bit, set_bit, unset_bit -from act.qc.radiometer_tests import fft_shading_test -from act.qc.sp2 import SP2ParticleCriteria, PYSP2_AVAILABLE -from act.tests import ( - EXAMPLE_CEIL1, - EXAMPLE_CO2FLX4M, - EXAMPLE_MET1, - EXAMPLE_METE40, - EXAMPLE_MFRSR, - EXAMPLE_IRT25m20s, - EXAMPLE_BRS, - EXAMPLE_MET_YAML, - EXAMPLE_ENA_MET, - EXAMPLE_OLD_QC -) -from act.qc.bsrn_tests import _calculate_solar_parameters -from act.qc.add_supplemental_qc import read_yaml_supplemental_qc, apply_supplemental_qc - -try: - import scikit_posthocs - SCIKIT_POSTHOCS_AVAILABLE = True -except ImportError: - SCIKIT_POSTHOCS_AVAILABLE = False - - -def test_fft_shading_test(): - ds = read_arm_netcdf(EXAMPLE_MFRSR) - ds.clean.cleanup() - ds = fft_shading_test(ds) - qc_data = ds['qc_diffuse_hemisp_narrowband_filter4'] - assert np.nansum(qc_data.values) == 7164 - - -def test_global_qc_cleanup(): - ds = read_arm_netcdf(EXAMPLE_MET1) - ds.load() - ds.clean.cleanup() - - assert ds['qc_wdir_vec_mean'].attrs['flag_meanings'] == [ - 'Value is equal to missing_value.', - 'Value is less than the fail_min.', - 'Value is greater than the fail_max.', - ] - assert ds['qc_wdir_vec_mean'].attrs['flag_masks'] == [1, 2, 4] - assert ds['qc_wdir_vec_mean'].attrs['flag_assessments'] == [ - 'Bad', - 'Bad', - 'Bad', - ] - - assert ds['qc_temp_mean'].attrs['flag_meanings'] == [ - 'Value is equal to missing_value.', - 'Value is less than the fail_min.', - 'Value is greater than the fail_max.', - 'Difference between current and previous values exceeds fail_delta.', - ] - assert ds['qc_temp_mean'].attrs['flag_masks'] == [1, 2, 4, 8] - assert ds['qc_temp_mean'].attrs['flag_assessments'] == [ - 'Bad', - 'Bad', - 'Bad', - 'Indeterminate', - ] - - ds.close() - del ds - - -def test_qc_test_errors(): - ds = read_arm_netcdf(EXAMPLE_MET1) - var_name = 'temp_mean' - - assert ds.qcfilter.add_less_test(var_name, None) is None - assert ds.qcfilter.add_greater_test(var_name, None) is None - assert ds.qcfilter.add_less_equal_test(var_name, None) is None - assert ds.qcfilter.add_equal_to_test(var_name, None) is None - assert ds.qcfilter.add_not_equal_to_test(var_name, None) is None - - -def test_arm_qc(): - # Test DQR Webservice using known DQR - variable = 'wspd_vec_mean' - ds = read_arm_netcdf(EXAMPLE_METE40) - ds_org = copy.deepcopy(ds) - qc_variable = ds.qcfilter.check_for_ancillary_qc(variable) - - # DQR webservice does go down, so ensure it properly runs first before testing - try: - ds = add_dqr_to_qc(ds) - - except ValueError: - return - - assert 'Suspect' not in ds[qc_variable].attrs['flag_assessments'] - assert 'Incorrect' not in ds[qc_variable].attrs['flag_assessments'] - assert 'Bad' in ds[qc_variable].attrs['flag_assessments'] - assert 'Indeterminate' in ds[qc_variable].attrs['flag_assessments'] - - # Check that defualt will update all variables in DQR - for var_name in ['wdir_vec_mean', 'wdir_vec_std', 'wspd_arith_mean', 'wspd_vec_mean']: - qc_var = ds.qcfilter.check_for_ancillary_qc(var_name) - assert ds[qc_var].attrs['flag_meanings'][-1].startswith('D190529.4') - - # Check that variable keyword works as expected. - ds = copy.deepcopy(ds_org) - add_dqr_to_qc(ds, variable=variable) - qc_var = ds.qcfilter.check_for_ancillary_qc(variable) - assert ds[qc_var].attrs['flag_meanings'][-1].startswith('D190529.4') - qc_var = ds.qcfilter.check_for_ancillary_qc('wdir_vec_std') - assert len(ds[qc_var].attrs['flag_masks']) == 0 - - # Check that include and exclude keywords work as expected - ds = copy.deepcopy(ds_org) - add_dqr_to_qc(ds, variable=variable, exclude=['D190529.4']) - assert len(ds[qc_variable].attrs['flag_meanings']) == 4 - add_dqr_to_qc(ds, variable=variable, include=['D400101.1']) - assert len(ds[qc_variable].attrs['flag_meanings']) == 4 - add_dqr_to_qc(ds, variable=variable, include=['D190529.4']) - assert len(ds[qc_variable].attrs['flag_meanings']) == 5 - add_dqr_to_qc(ds, variable=variable, assessment='Incorrect') - assert len(ds[qc_variable].attrs['flag_meanings']) == 5 - - # Test additional keywords - add_dqr_to_qc(ds, variable=variable, assessment='Suspect', cleanup_qc=False, - dqr_link=True, skip_location_vars=True) - assert len(ds[qc_variable].attrs['flag_meanings']) == 6 - - # Default is to normalize assessment terms. Check that we can turn off. - add_dqr_to_qc(ds, variable=variable, normalize_assessment=False) - assert 'Suspect' in ds[qc_variable].attrs['flag_assessments'] - - # Test that an error is raised when no datastream global attributes - with np.testing.assert_raises(ValueError): - ds4 = copy.deepcopy(ds) - del ds4.attrs['datastream'] - del ds4.attrs['_datastream'] - add_dqr_to_qc(ds4, variable=variable) - - -def test_qcfilter(): - ds = read_arm_netcdf(EXAMPLE_IRT25m20s) - var_name = 'inst_up_long_dome_resist' - expected_qc_var_name = 'qc_' + var_name - - ds.qcfilter.check_for_ancillary_qc( - var_name, add_if_missing=True, cleanup=False, flag_type=False - ) - assert expected_qc_var_name in list(ds.keys()) - del ds[expected_qc_var_name] - - # Perform adding of quality control variables to Xarray dataset - result = ds.qcfilter.add_test(var_name, test_meaning='Birds!') - assert isinstance(result, dict) - qc_var_name = result['qc_variable_name'] - assert qc_var_name == expected_qc_var_name - - # Check that new linking and describing attributes are set - assert ds[qc_var_name].attrs['standard_name'] == 'quality_flag' - assert ds[var_name].attrs['ancillary_variables'] == qc_var_name - - # Check that CF attributes are set including new flag_assessments - assert 'flag_masks' in ds[qc_var_name].attrs.keys() - assert 'flag_meanings' in ds[qc_var_name].attrs.keys() - assert 'flag_assessments' in ds[qc_var_name].attrs.keys() - - # Check that the values of the attributes are set correctly - assert ds[qc_var_name].attrs['flag_assessments'][0] == 'Bad' - assert ds[qc_var_name].attrs['flag_meanings'][0] == 'Birds!' - assert ds[qc_var_name].attrs['flag_masks'][0] == 1 - - # Set some test values - index = [0, 1, 2, 30] - ds.qcfilter.set_test(var_name, index=index, test_number=result['test_number']) - - # Add a new test and set values - index2 = [6, 7, 8, 50] - ds.qcfilter.add_test( - var_name, - index=index2, - test_number=9, - test_meaning='testing high number', - test_assessment='Suspect', - ) - - # Retrieve data from Xarray dataset as numpy masked array. Count number of masked - # elements and ensure equal to size of index array. - data = ds.qcfilter.get_masked_data(var_name, rm_assessments='Bad') - assert np.ma.count_masked(data) == len(index) - - data = ds.qcfilter.get_masked_data( - var_name, rm_assessments='Suspect', return_nan_array=True - ) - assert np.sum(np.isnan(data)) == len(index2) - - data = ds.qcfilter.get_masked_data( - var_name, rm_assessments=['Bad', 'Suspect'], ma_fill_value=np.nan - ) - assert np.ma.count_masked(data) == len(index + index2) - - # Test internal function for returning the index array of where the - # tests are set. - assert ( - np.sum( - ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) - - np.array(index, dtype=int) - ) - == 0 - ) - - # Test adding QC for length-1 variables - ds['west'] = ('west', ['W']) - ds['avg_wind_speed'] = ('west', [20]) - - # Should not fail the test - ds.qcfilter.add_test( - 'avg_wind_speed', - index=ds.avg_wind_speed.data > 100, - test_meaning='testing bool flag: false', - test_assessment='Suspect', - ) - assert ds.qc_avg_wind_speed.data == 0 - - # Should fail the test - ds.qcfilter.add_test( - 'avg_wind_speed', - index=ds.avg_wind_speed.data < 100, - test_meaning='testing bool flag: true', - test_assessment='Suspect', - ) - assert ds.qc_avg_wind_speed.data == 2 - - # Should fail the test - ds.qcfilter.add_test( - 'avg_wind_speed', - index=[0], - test_meaning='testing idx flag: true', - test_assessment='Suspect', - ) - assert ds.qc_avg_wind_speed.data == 6 - - # Should not fail the test - ds.qcfilter.add_test( - 'avg_wind_speed', - test_meaning='testing idx flag: false', - test_assessment='Suspect', - ) - assert ds.qc_avg_wind_speed.data == 6 - - # Unset a test - ds.qcfilter.unset_test(var_name, index=0, test_number=result['test_number']) - # Remove the test - ds.qcfilter.remove_test(var_name, test_number=33) - - # Ensure removal works when flag_masks is a numpy array - ds['qc_' + var_name].attrs['flag_masks'] = np.array(ds['qc_' + var_name].attrs['flag_masks']) - ds.qcfilter.remove_test(var_name, test_number=result['test_number']) - pytest.raises(ValueError, ds.qcfilter.add_test, var_name) - pytest.raises(ValueError, ds.qcfilter.remove_test, var_name) - - ds.close() - - assert np.all(parse_bit([257]) == np.array([1, 9], dtype=np.int32)) - pytest.raises(ValueError, parse_bit, [1, 2]) - pytest.raises(ValueError, parse_bit, -1) - - assert set_bit(0, 16) == 32768 - data = range(0, 4) - assert isinstance(set_bit(list(data), 2), list) - assert isinstance(set_bit(tuple(data), 2), tuple) - assert isinstance(unset_bit(list(data), 2), list) - assert isinstance(unset_bit(tuple(data), 2), tuple) - - # Fill in missing tests - ds = read_arm_netcdf(EXAMPLE_IRT25m20s) - del ds[var_name].attrs['long_name'] - # Test creating a qc variable - ds.qcfilter.create_qc_variable(var_name) - # Test creating a second qc variable and of flag type - ds.qcfilter.create_qc_variable(var_name, flag_type=True) - result = ds.qcfilter.add_test( - var_name, - index=[1, 2, 3], - test_number=9, - test_meaning='testing high number', - flag_value=True, - ) - ds.qcfilter.set_test(var_name, index=5, test_number=9, flag_value=True) - data = ds.qcfilter.get_masked_data(var_name) - assert np.isclose(np.sum(data), 42674.766, 0.01) - data = ds.qcfilter.get_masked_data(var_name, rm_assessments='Bad') - assert np.isclose(np.sum(data), 42643.195, 0.01) - - ds.qcfilter.unset_test(var_name, test_number=9, flag_value=True) - ds.qcfilter.unset_test(var_name, index=1, test_number=9, flag_value=True) - assert ds.qcfilter.available_bit(result['qc_variable_name']) == 10 - assert ds.qcfilter.available_bit(result['qc_variable_name'], recycle=True) == 1 - ds.qcfilter.remove_test(var_name, test_number=9, flag_value=True) - - ds.qcfilter.update_ancillary_variable(var_name) - # Test updating ancillary variable if does not exist - ds.qcfilter.update_ancillary_variable('not_a_variable_name') - # Change ancillary_variables attribute to test if add correct qc variable correctly - ds[var_name].attrs['ancillary_variables'] = 'a_different_name' - ds.qcfilter.update_ancillary_variable(var_name, qc_var_name=expected_qc_var_name) - assert expected_qc_var_name in ds[var_name].attrs['ancillary_variables'] - - # Test flag QC - var_name = 'inst_sfc_ir_temp' - qc_var_name = 'qc_' + var_name - ds.qcfilter.create_qc_variable(var_name, flag_type=True) - assert qc_var_name in list(ds.data_vars) - assert 'flag_values' in ds[qc_var_name].attrs.keys() - assert 'flag_masks' not in ds[qc_var_name].attrs.keys() - del ds[qc_var_name] - - qc_var_name = ds.qcfilter.check_for_ancillary_qc( - var_name, add_if_missing=True, cleanup=False, flag_type=True - ) - assert qc_var_name in list(ds.data_vars) - assert 'flag_values' in ds[qc_var_name].attrs.keys() - assert 'flag_masks' not in ds[qc_var_name].attrs.keys() - del ds[qc_var_name] - - ds.qcfilter.add_missing_value_test(var_name, flag_value=True, prepend_text='arm') - ds.qcfilter.add_test( - var_name, - index=list(range(0, 20)), - test_number=2, - test_meaning='Testing flag', - flag_value=True, - test_assessment='Suspect', - ) - assert qc_var_name in list(ds.data_vars) - assert 'flag_values' in ds[qc_var_name].attrs.keys() - assert 'flag_masks' not in ds[qc_var_name].attrs.keys() - assert 'standard_name' in ds[qc_var_name].attrs.keys() - assert ds[qc_var_name].attrs['flag_values'] == [1, 2] - assert ds[qc_var_name].attrs['flag_assessments'] == ['Bad', 'Suspect'] - - ds.close() - - -@pytest.mark.skipif(not SCIKIT_POSTHOCS_AVAILABLE, - reason="scikit_posthocs is not installed.") -def test_qcfilter2(): - ds = read_arm_netcdf(EXAMPLE_IRT25m20s) - var_name = 'inst_up_long_dome_resist' - expected_qc_var_name = 'qc_' + var_name - - data = ds[var_name].values - data[0:4] = data[0:4] + 30.0 - data[1000:1024] = data[1000:1024] + 30.0 - ds[var_name].values = data - - coef = 1.4 - ds.qcfilter.add_iqr_test(var_name, coef=1.4, test_assessment='Bad', prepend_text='arm') - assert np.sum(ds[expected_qc_var_name].values) == 28 - assert ds[expected_qc_var_name].attrs['flag_masks'] == [1] - assert ds[expected_qc_var_name].attrs['flag_meanings'] == [ - f'arm: Value outside of interquartile range test range with a coefficient of {coef}' - ] - - ds.qcfilter.add_iqr_test(var_name, test_number=3, prepend_text='ACT') - assert np.sum(ds[expected_qc_var_name].values) == 140 - assert ds[expected_qc_var_name].attrs['flag_masks'] == [1, 4] - assert ds[expected_qc_var_name].attrs['flag_meanings'][-1] == ( - 'ACT: Value outside of interquartile range test range with a coefficient of 1.5' - ) - - ds.qcfilter.add_gesd_test(var_name, test_assessment='Bad') - assert np.sum(ds[expected_qc_var_name].values) == 204 - assert ds[expected_qc_var_name].attrs['flag_masks'] == [1, 4, 8] - assert ds[expected_qc_var_name].attrs['flag_meanings'][-1] == ( - 'Value failed generalized Extreme Studentized Deviate test with an alpha of 0.05' - ) - - ds.qcfilter.add_gesd_test(var_name, alpha=0.1) - assert np.sum(ds[expected_qc_var_name].values) == 332 - assert ds[expected_qc_var_name].attrs['flag_masks'] == [1, 4, 8, 16] - assert ds[expected_qc_var_name].attrs['flag_meanings'][-1] == ( - 'Value failed generalized Extreme Studentized Deviate test with an alpha of 0.1' - ) - assert ds[expected_qc_var_name].attrs['flag_assessments'] == [ - 'Bad', - 'Indeterminate', - 'Bad', - 'Indeterminate', - ] - - -def test_qcfilter3(): - ds = read_arm_netcdf(EXAMPLE_IRT25m20s) - var_name = 'inst_up_long_dome_resist' - result = ds.qcfilter.add_test(var_name, index=range(0, 100), test_meaning='testing') - qc_var_name = result['qc_variable_name'] - assert ds[qc_var_name].values.dtype.kind in np.typecodes['AllInteger'] - - ds[qc_var_name].values = ds[qc_var_name].values.astype(np.float32) - assert ds[qc_var_name].values.dtype.kind not in np.typecodes['AllInteger'] - - result = ds.qcfilter.get_qc_test_mask( - var_name=var_name, test_number=1, return_index=False - ) - assert np.sum(result) == 100 - result = ds.qcfilter.get_qc_test_mask( - var_name=var_name, test_number=1, return_index=True - ) - assert np.sum(result) == 4950 - - # Test where QC variables are not integer type - ds = ds.resample(time='5min').mean(keep_attrs=True) - ds.qcfilter.add_test( - var_name, index=range(0, ds.time.size), test_meaning='Testing float' - ) - assert np.sum(ds[qc_var_name].values) == 582 - - ds[qc_var_name].values = ds[qc_var_name].values.astype(np.float32) - ds.qcfilter.remove_test(var_name, test_number=2) - assert np.sum(ds[qc_var_name].values) == 6 - - -def test_qctests(): - ds = read_arm_netcdf(EXAMPLE_IRT25m20s) - var_name = 'inst_up_long_dome_resist' - - # Add in one missing value and test for that missing value - data = ds[var_name].values - data[0] = np.nan - ds[var_name].data = da.from_array(data) - result = ds.qcfilter.add_missing_value_test(var_name) - data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) - assert data.mask[0] - - result = ds.qcfilter.add_missing_value_test(var_name, use_dask=True) - data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) - assert data == np.array([0]) - ds.qcfilter.remove_test(var_name, test_number=result['test_number']) - - # less than min test - limit_value = 6.8 - result = ds.qcfilter.add_less_test( - var_name, limit_value, prepend_text='arm', limit_attr_name='fail_min' - ) - - data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) - assert 'arm' in result['test_meaning'] - assert np.ma.count_masked(data) == 54 - assert 'fail_min' in ds[result['qc_variable_name']].attrs.keys() - assert ( - ds[result['qc_variable_name']].attrs['fail_min'].dtype - == ds[result['variable_name']].values.dtype - ) - assert np.isclose(ds[result['qc_variable_name']].attrs['fail_min'], limit_value) - - result = ds.qcfilter.add_less_test(var_name, limit_value, test_assessment='Suspect') - assert 'warn_min' in ds[result['qc_variable_name']].attrs.keys() - - limit_value = 8 - result = ds.qcfilter.add_less_test(var_name, limit_value) - data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) - assert np.sum(data) == 2911939 - result = ds.qcfilter.add_less_test(var_name, limit_value, use_dask=True) - data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) - assert np.sum(data) == 2911939 - - # greator than max test - limit_value = 12.7 - result = ds.qcfilter.add_greater_test( - var_name, limit_value, prepend_text='arm', limit_attr_name='fail_max' - ) - data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) - assert 'arm' in result['test_meaning'] - assert np.ma.count_masked(data) == 61 - assert 'fail_max' in ds[result['qc_variable_name']].attrs.keys() - assert ( - ds[result['qc_variable_name']].attrs['fail_max'].dtype - == ds[result['variable_name']].values.dtype - ) - assert np.isclose(ds[result['qc_variable_name']].attrs['fail_max'], limit_value) - - result = ds.qcfilter.add_greater_test(var_name, limit_value, test_assessment='Suspect') - assert 'warn_max' in ds[result['qc_variable_name']].attrs.keys() - - result = ds.qcfilter.add_greater_test(var_name, limit_value, use_dask=True) - data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) - assert np.sum(data) == 125458 - result = ds.qcfilter.add_greater_test(var_name, limit_value) - data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) - assert np.sum(data) == 125458 - - # less than or equal test - limit_value = 6.9 - result = ds.qcfilter.add_less_equal_test( - var_name, - limit_value, - test_assessment='Suspect', - prepend_text='arm', - limit_attr_name='warn_min', - ) - data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) - assert 'arm' in result['test_meaning'] - assert np.ma.count_masked(data) == 149 - assert 'warn_min' in ds[result['qc_variable_name']].attrs.keys() - assert ( - ds[result['qc_variable_name']].attrs['warn_min'].dtype - == ds[result['variable_name']].values.dtype - ) - assert np.isclose(ds[result['qc_variable_name']].attrs['warn_min'], limit_value) - - result = ds.qcfilter.add_less_equal_test(var_name, limit_value) - assert 'fail_min' in ds[result['qc_variable_name']].attrs.keys() - - result = ds.qcfilter.add_less_equal_test(var_name, limit_value, use_dask=True) - data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) - assert np.sum(data) == 601581 - result = ds.qcfilter.add_less_equal_test(var_name, limit_value) - data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) - assert np.sum(data) == 601581 - - # greater than or equal test - result = ds.qcfilter.add_greater_equal_test(var_name, None) - limit_value = 12 - result = ds.qcfilter.add_greater_equal_test( - var_name, - limit_value, - test_assessment='Suspect', - prepend_text='arm', - limit_attr_name='warn_max', - ) - data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) - assert 'arm' in result['test_meaning'] - assert np.ma.count_masked(data) == 606 - assert 'warn_max' in ds[result['qc_variable_name']].attrs.keys() - assert ( - ds[result['qc_variable_name']].attrs['warn_max'].dtype - == ds[result['variable_name']].values.dtype - ) - assert np.isclose(ds[result['qc_variable_name']].attrs['warn_max'], limit_value) - - result = ds.qcfilter.add_greater_equal_test(var_name, limit_value) - assert 'fail_max' in ds[result['qc_variable_name']].attrs.keys() - - result = ds.qcfilter.add_greater_equal_test(var_name, limit_value, use_dask=True) - data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) - assert np.sum(data) == 1189873 - result = ds.qcfilter.add_greater_equal_test(var_name, limit_value) - data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) - assert np.sum(data) == 1189873 - - # equal to test - limit_value = 7.6705 - result = ds.qcfilter.add_equal_to_test( - var_name, limit_value, prepend_text='arm', limit_attr_name='fail_equal_to' - ) - data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) - assert 'arm' in result['test_meaning'] - assert np.ma.count_masked(data) == 2 - assert 'fail_equal_to' in ds[result['qc_variable_name']].attrs.keys() - assert ( - ds[result['qc_variable_name']].attrs['fail_equal_to'].dtype - == ds[result['variable_name']].values.dtype - ) - assert np.isclose(ds[result['qc_variable_name']].attrs['fail_equal_to'], limit_value) - - result = ds.qcfilter.add_equal_to_test( - var_name, limit_value, test_assessment='Indeterminate' - ) - assert 'warn_equal_to' in ds[result['qc_variable_name']].attrs.keys() - - result = ds.qcfilter.add_equal_to_test(var_name, limit_value, use_dask=True) - data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) - assert np.sum(data) == 8631 - result = ds.qcfilter.add_equal_to_test(var_name, limit_value) - data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) - assert np.sum(data) == 8631 - - # not equal to test - limit_value = 7.6705 - result = ds.qcfilter.add_not_equal_to_test( - var_name, - limit_value, - test_assessment='Indeterminate', - prepend_text='arm', - limit_attr_name='warn_not_equal_to', - ) - data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) - assert 'arm' in result['test_meaning'] - assert np.ma.count_masked(data) == 4318 - assert 'warn_not_equal_to' in ds[result['qc_variable_name']].attrs.keys() - assert ( - ds[result['qc_variable_name']].attrs['warn_not_equal_to'].dtype - == ds[result['variable_name']].values.dtype - ) - assert np.isclose(ds[result['qc_variable_name']].attrs['warn_not_equal_to'], limit_value) - - result = ds.qcfilter.add_not_equal_to_test(var_name, limit_value) - assert 'fail_not_equal_to' in ds[result['qc_variable_name']].attrs.keys() - - result = ds.qcfilter.add_not_equal_to_test(var_name, limit_value, use_dask=True) - data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) - assert np.sum(data) == 9320409 - result = ds.qcfilter.add_not_equal_to_test(var_name, limit_value) - data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) - assert np.sum(data) == 9320409 - - # outside range test - limit_value1 = 6.8 - limit_value2 = 12.7 - result = ds.qcfilter.add_outside_test( - var_name, - limit_value1, - limit_value2, - prepend_text='arm', - limit_attr_names=['fail_lower_range', 'fail_upper_range'], - ) - data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) - assert 'arm' in result['test_meaning'] - assert np.ma.count_masked(data) == 115 - assert 'fail_lower_range' in ds[result['qc_variable_name']].attrs.keys() - assert ( - ds[result['qc_variable_name']].attrs['fail_lower_range'].dtype - == ds[result['variable_name']].values.dtype - ) - assert np.isclose(ds[result['qc_variable_name']].attrs['fail_lower_range'], limit_value1) - assert 'fail_upper_range' in ds[result['qc_variable_name']].attrs.keys() - assert ( - ds[result['qc_variable_name']].attrs['fail_upper_range'].dtype - == ds[result['variable_name']].values.dtype - ) - assert np.isclose(ds[result['qc_variable_name']].attrs['fail_upper_range'], limit_value2) - - result = ds.qcfilter.add_outside_test( - var_name, limit_value1, limit_value2, test_assessment='Indeterminate' - ) - assert 'warn_lower_range' in ds[result['qc_variable_name']].attrs.keys() - assert 'warn_upper_range' in ds[result['qc_variable_name']].attrs.keys() - - result = ds.qcfilter.add_outside_test( - var_name, limit_value1, limit_value2, use_dask=True - ) - data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) - assert np.sum(data) == 342254 - result = ds.qcfilter.add_outside_test( - var_name, - limit_value1, - limit_value2, - ) - data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) - assert np.sum(data) == 342254 - - # Starting to run out of space for tests. Remove some tests. - for ii in range(16, 30): - ds.qcfilter.remove_test(var_name, test_number=ii) - - # inside range test - limit_value1 = 7 - limit_value2 = 8 - result = ds.qcfilter.add_inside_test( - var_name, - limit_value1, - limit_value2, - prepend_text='arm', - limit_attr_names=['fail_lower_range_inner', 'fail_upper_range_inner'], - ) - data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) - assert 'arm' in result['test_meaning'] - assert np.ma.count_masked(data) == 479 - assert 'fail_lower_range_inner' in ds[result['qc_variable_name']].attrs.keys() - assert ( - ds[result['qc_variable_name']].attrs['fail_lower_range_inner'].dtype - == ds[result['variable_name']].values.dtype - ) - assert np.isclose( - ds[result['qc_variable_name']].attrs['fail_lower_range_inner'], - limit_value1, - ) - assert 'fail_upper_range_inner' in ds[result['qc_variable_name']].attrs.keys() - assert ( - ds[result['qc_variable_name']].attrs['fail_upper_range_inner'].dtype - == ds[result['variable_name']].values.dtype - ) - assert np.isclose( - ds[result['qc_variable_name']].attrs['fail_upper_range_inner'], - limit_value2, - ) - - result = ds.qcfilter.add_inside_test( - var_name, limit_value1, limit_value2, test_assessment='Indeterminate' - ) - assert 'warn_lower_range_inner' in ds[result['qc_variable_name']].attrs.keys() - assert 'warn_upper_range_inner' in ds[result['qc_variable_name']].attrs.keys() - - result = ds.qcfilter.add_inside_test(var_name, limit_value1, limit_value2, use_dask=True) - data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) - assert np.sum(data) == 1820693 - result = ds.qcfilter.add_inside_test( - var_name, - limit_value1, - limit_value2, - ) - data = ds.qcfilter.get_qc_test_mask(var_name, result['test_number'], return_index=True) - assert np.sum(data) == 1820693 - - # delta test - test_limit = 0.05 - result = ds.qcfilter.add_delta_test( - var_name, test_limit, prepend_text='arm', limit_attr_name='warn_delta' - ) - data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) - assert 'arm' in result['test_meaning'] - assert np.ma.count_masked(data) == 175 - assert 'warn_delta' in ds[result['qc_variable_name']].attrs.keys() - assert ( - ds[result['qc_variable_name']].attrs['warn_delta'].dtype - == ds[result['variable_name']].values.dtype - ) - assert np.isclose(ds[result['qc_variable_name']].attrs['warn_delta'], test_limit) - - data = ds.qcfilter.get_masked_data(var_name, rm_assessments=['Suspect', 'Bad']) - assert np.ma.count_masked(data) == 1355 - - result = ds.qcfilter.add_delta_test(var_name, test_limit, test_assessment='Bad') - assert 'fail_delta' in ds[result['qc_variable_name']].attrs.keys() - - comp_ds = read_arm_netcdf(EXAMPLE_IRT25m20s) - with np.testing.assert_raises(ValueError): - result = ds.qcfilter.add_difference_test(var_name, 'test') - - with np.testing.assert_raises(ValueError): - result = ds.qcfilter.add_difference_test( - var_name, - {comp_ds.attrs['datastream']: comp_ds}, - var_name, - diff_limit=None, - ) - - assert ds.qcfilter.add_difference_test(var_name, set_test_regardless=False) is None - - result = ds.qcfilter.add_difference_test( - var_name, - {comp_ds.attrs['datastream']: comp_ds}, - var_name, - diff_limit=1, - prepend_text='arm', - ) - data = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number']) - assert 'arm' in result['test_meaning'] - assert not (data.mask).all() - - comp_ds.close() - ds.close() - - -def test_qctests_dos(): - ds = read_arm_netcdf(EXAMPLE_IRT25m20s) - var_name = 'inst_up_long_dome_resist' - - # persistence test - data = ds[var_name].values - data[1000: 2400] = data[1000] - data = np.around(data, decimals=3) - ds[var_name].values = data - result = ds.qcfilter.add_persistence_test(var_name) - qc_var_name = result['qc_variable_name'] - test_meaning = ( - 'Data failing persistence test. Standard Deviation over a ' - 'window of 10 values less than 0.0001.' - ) - assert ds[qc_var_name].attrs['flag_meanings'][-1] == test_meaning - # There is a precision issue with GitHub testing that makes the number of tests - # tripped off. This isclose() option is to account for that. - assert np.isclose(np.sum(ds[qc_var_name].values), 1399, atol=2) - - ds.qcfilter.add_persistence_test(var_name, window=10000, prepend_text='DQO') - test_meaning = ( - 'DQO: Data failing persistence test. Standard Deviation over a window of ' - '4320 values less than 0.0001.' - ) - assert ds[qc_var_name].attrs['flag_meanings'][-1] == test_meaning - - -def test_datafilter(): - ds = read_arm_netcdf(EXAMPLE_MET1, drop_variables=['base_time', 'time_offset']) - ds.clean.cleanup() - - data_var_names = list(ds.data_vars) - qc_var_names = [var_name for var_name in ds.data_vars if var_name.startswith('qc_')] - data_var_names = list(set(data_var_names) - set(qc_var_names)) - data_var_names.sort() - qc_var_names.sort() - - var_name = 'atmos_pressure' - - ds_1 = ds.mean() - - ds.qcfilter.add_less_test(var_name, 99, test_assessment='Bad') - ds_filtered = copy.deepcopy(ds) - ds_filtered.qcfilter.datafilter(rm_assessments='Bad') - ds_2 = ds_filtered.mean() - assert np.isclose(ds_1[var_name].values, 98.86, atol=0.01) - assert np.isclose(ds_2[var_name].values, 99.15, atol=0.01) - assert isinstance(ds_1[var_name].data, da.core.Array) - assert 'act.qc.datafilter' in ds_filtered[var_name].attrs['history'] - - ds_filtered = copy.deepcopy(ds) - ds_filtered.qcfilter.datafilter(rm_assessments='Bad', variables=var_name, del_qc_var=True) - ds_2 = ds_filtered.mean() - assert np.isclose(ds_2[var_name].values, 99.15, atol=0.01) - expected_var_names = sorted(list(set(data_var_names + qc_var_names) - set(['qc_' + var_name]))) - assert sorted(list(ds_filtered.data_vars)) == expected_var_names - - ds_filtered = copy.deepcopy(ds) - ds_filtered.qcfilter.datafilter(rm_assessments='Bad', del_qc_var=True) - assert sorted(list(ds_filtered.data_vars)) == data_var_names - - ds.close() - del ds - - -def test_qc_remainder(): - ds = read_arm_netcdf(EXAMPLE_MET1) - assert ds.clean.get_attr_info(variable='bad_name') is None - del ds.attrs['qc_bit_comment'] - assert isinstance(ds.clean.get_attr_info(), dict) - ds.attrs['qc_flag_comment'] = 'testing' - ds.close() - - ds = read_arm_netcdf(EXAMPLE_MET1) - ds.clean.cleanup(normalize_assessment=True) - ds['qc_atmos_pressure'].attrs['units'] = 'testing' - del ds['qc_temp_mean'].attrs['units'] - del ds['qc_temp_mean'].attrs['flag_masks'] - ds.clean.handle_missing_values() - ds.close() - - ds = read_arm_netcdf(EXAMPLE_MET1) - ds.attrs['qc_bit_1_comment'] = 'tesing' - data = ds['qc_atmos_pressure'].values.astype(np.int64) - data[0] = 2**32 - ds['qc_atmos_pressure'].values = data - ds.clean.get_attr_info(variable='qc_atmos_pressure') - ds.clean.clean_arm_state_variables('testname') - ds.clean.cleanup() - ds['qc_atmos_pressure'].attrs['standard_name'] = 'wrong_name' - ds.clean.link_variables() - assert ds['qc_atmos_pressure'].attrs['standard_name'] == 'quality_flag' - ds.close() - - -def test_qc_flag_description(): - """ - This will check if the cleanup() method will correctly convert convert - flag_#_description to CF flag_masks and flag_meanings. - - """ - - ds = read_arm_netcdf(EXAMPLE_CO2FLX4M) - ds.clean.cleanup() - qc_var_name = ds.qcfilter.check_for_ancillary_qc( - 'momentum_flux', add_if_missing=False, cleanup=False - ) - - assert isinstance(ds[qc_var_name].attrs['flag_masks'], list) - assert isinstance(ds[qc_var_name].attrs['flag_meanings'], list) - assert isinstance(ds[qc_var_name].attrs['flag_assessments'], list) - assert ds[qc_var_name].attrs['standard_name'] == 'quality_flag' - - assert len(ds[qc_var_name].attrs['flag_masks']) == 9 - unique_flag_assessments = list({'Acceptable', 'Indeterminate', 'Bad'}) - for f in list(set(ds[qc_var_name].attrs['flag_assessments'])): - assert f in unique_flag_assessments - - -def test_clean(): - # Read test data - ceil_ds = read_arm_netcdf([EXAMPLE_CEIL1]) - # Cleanup QC data - ceil_ds.clean.cleanup(clean_arm_state_vars=['detection_status']) - - # Check that global attribures are removed - global_attributes = [ - 'qc_bit_comment', - 'qc_bit_1_description', - 'qc_bit_1_assessment', - 'qc_bit_2_description', - 'qc_bit_2_assessment' 'qc_bit_3_description', - 'qc_bit_3_assessment', - ] - - for glb_att in global_attributes: - assert glb_att not in ceil_ds.attrs.keys() - - # Check that CF attributes are set including new flag_assessments - var_name = 'qc_first_cbh' - for attr_name in ['flag_masks', 'flag_meanings', 'flag_assessments']: - assert attr_name in ceil_ds[var_name].attrs.keys() - assert isinstance(ceil_ds[var_name].attrs[attr_name], list) - - # Check that the flag_mask values are set correctly - assert ceil_ds['qc_first_cbh'].attrs['flag_masks'] == [1, 2, 4] - - # Check that the flag_meanings values are set correctly - assert ceil_ds['qc_first_cbh'].attrs['flag_meanings'] == [ - 'Value is equal to missing_value.', - 'Value is less than the fail_min.', - 'Value is greater than the fail_max.', - ] - - # Check the value of flag_assessments is as expected - assert ceil_ds['qc_first_cbh'].attrs['flag_assessments'] == ['Bad', 'Bad', 'Bad'] - - # Check that ancillary varibles is being added - assert 'qc_first_cbh' in ceil_ds['first_cbh'].attrs['ancillary_variables'].split() - - # Check that state field is updated to CF - assert 'flag_values' in ceil_ds['detection_status'].attrs.keys() - assert isinstance(ceil_ds['detection_status'].attrs['flag_values'], list) - assert ceil_ds['detection_status'].attrs['flag_values'] == [0, 1, 2, 3, 4, 5] - - assert 'flag_meanings' in ceil_ds['detection_status'].attrs.keys() - assert isinstance(ceil_ds['detection_status'].attrs['flag_meanings'], list) - assert ceil_ds['detection_status'].attrs['flag_meanings'] == [ - 'No significant backscatter', - 'One cloud base detected', - 'Two cloud bases detected', - 'Three cloud bases detected', - 'Full obscuration determined but no cloud base detected', - 'Some obscuration detected but determined to be transparent', - ] - - assert 'flag_0_description' not in ceil_ds['detection_status'].attrs.keys() - assert 'detection_status' in ceil_ds['first_cbh'].attrs['ancillary_variables'].split() - - ceil_ds.close() - - -def test_compare_time_series_trends(): - - drop_vars = [ - 'base_time', - 'time_offset', - 'atmos_pressure', - 'qc_atmos_pressure', - 'temp_std', - 'rh_mean', - 'qc_rh_mean', - 'rh_std', - 'vapor_pressure_mean', - 'qc_vapor_pressure_mean', - 'vapor_pressure_std', - 'wspd_arith_mean', - 'qc_wspd_arith_mean', - 'wspd_vec_mean', - 'qc_wspd_vec_mean', - 'wdir_vec_mean', - 'qc_wdir_vec_mean', - 'wdir_vec_std', - 'tbrg_precip_total', - 'qc_tbrg_precip_total', - 'tbrg_precip_total_corr', - 'qc_tbrg_precip_total_corr', - 'org_precip_rate_mean', - 'qc_org_precip_rate_mean', - 'pwd_err_code', - 'pwd_mean_vis_1min', - 'qc_pwd_mean_vis_1min', - 'pwd_mean_vis_10min', - 'qc_pwd_mean_vis_10min', - 'pwd_pw_code_inst', - 'qc_pwd_pw_code_inst', - 'pwd_pw_code_15min', - 'qc_pwd_pw_code_15min', - 'pwd_pw_code_1hr', - 'qc_pwd_pw_code_1hr', - 'pwd_precip_rate_mean_1min', - 'qc_pwd_precip_rate_mean_1min', - 'pwd_cumul_rain', - 'qc_pwd_cumul_rain', - 'pwd_cumul_snow', - 'qc_pwd_cumul_snow', - 'logger_volt', - 'qc_logger_volt', - 'logger_temp', - 'qc_logger_temp', - 'lat', - 'lon', - 'alt', - ] - ds = read_arm_netcdf(EXAMPLE_MET1, drop_variables=drop_vars) - ds.clean.cleanup() - ds2 = copy.deepcopy(ds) - - var_name = 'temp_mean' - qc_var_name = ds.qcfilter.check_for_ancillary_qc( - var_name, add_if_missing=False, cleanup=False, flag_type=False - ) - ds.qcfilter.compare_time_series_trends( - var_name=var_name, - time_shift=60, - comp_var_name=var_name, - comp_dataset=ds2, - time_qc_threshold=60 * 10, - ) - - test_description = ( - 'Time shift detected with Minimum Difference test. Comparison of ' - 'temp_mean with temp_mean off by 0 seconds exceeding absolute ' - 'threshold of 600 seconds.' - ) - assert ds[qc_var_name].attrs['flag_meanings'][-1] == test_description - - time = ds2['time'].values + np.timedelta64(1, 'h') - time_attrs = ds2['time'].attrs - ds2 = ds2.assign_coords({'time': time}) - ds2['time'].attrs = time_attrs - - ds.qcfilter.compare_time_series_trends( - var_name=var_name, comp_dataset=ds2, time_step=60, time_match_threshhold=50 - ) - - test_description = ( - 'Time shift detected with Minimum Difference test. Comparison of ' - 'temp_mean with temp_mean off by 3600 seconds exceeding absolute ' - 'threshold of 900 seconds.' - ) - assert ds[qc_var_name].attrs['flag_meanings'][-1] == test_description - - -def test_qc_data_type(): - drop_vars = [ - 'base_time', - 'time_offset', - 'inst_up_long_case_resist', - 'inst_up_long_hemisp_tp', - 'inst_up_short_hemisp_tp', - 'inst_sfc_ir_temp', - 'lat', - 'lon', - 'alt', - ] - ds = read_arm_netcdf(EXAMPLE_IRT25m20s, drop_variables=drop_vars) - var_name = 'inst_up_long_dome_resist' - expected_qc_var_name = 'qc_' + var_name - ds.qcfilter.check_for_ancillary_qc(var_name, add_if_missing=True) - del ds[expected_qc_var_name].attrs['flag_meanings'] - del ds[expected_qc_var_name].attrs['flag_assessments'] - ds[expected_qc_var_name] = ds[expected_qc_var_name].astype(np.int8) - ds.qcfilter.add_test(var_name, index=[1], test_number=9, test_meaning='First test') - - assert ds[expected_qc_var_name].attrs['flag_masks'][0].dtype == np.uint32 - assert ds[expected_qc_var_name].dtype == np.int16 - ds.qcfilter.add_test(var_name, index=[1], test_number=17, test_meaning='Second test') - assert ds[expected_qc_var_name].dtype == np.int32 - ds.qcfilter.add_test(var_name, index=[1], test_number=33, test_meaning='Third test') - assert ds[expected_qc_var_name].dtype == np.int64 - assert ds[expected_qc_var_name].attrs['flag_masks'][0].dtype == np.uint64 - - ds.qcfilter.add_test(var_name, index=[1], test_meaning='Fourth test', recycle=True) - - -def test_qc_speed(): - """ - This tests the speed of the QC module to ensure changes do not significantly - slow down the module's processing. - """ - - n_variables = 100 - n_samples = 100 - - time = pd.date_range(start='2022-02-17 00:00:00', end='2022-02-18 00:00:00', periods=n_samples) - - # Create data variables with random noise - np.random.seed(42) - noisy_data_mapping = {f'data_var_{i}': np.random.random(time.shape) for i in range(n_variables)} - - ds = xr.Dataset( - data_vars={name: ('time', data) for name, data in noisy_data_mapping.items()}, - coords={'time': time}, - ) - - start = datetime.utcnow() - for name, var in noisy_data_mapping.items(): - failed_qc = var > 0.75 # Consider data above 0.75 as bad. Negligible time here. - ds.qcfilter.add_test(name, index=failed_qc, test_meaning='Value above threshold') - - time_diff = datetime.utcnow() - start - assert time_diff.seconds <= 4 - - -@pytest.mark.skipif(not PYSP2_AVAILABLE, reason="PySP2 is not installed.") -def test_sp2_particle_config(): - particle_config_ds = SP2ParticleCriteria() - assert particle_config_ds.ScatMaxPeakHt1 == 60000 - assert particle_config_ds.ScatMinPeakHt1 == 250 - assert particle_config_ds.ScatMaxPeakHt2 == 60000 - assert particle_config_ds.ScatMinPeakHt2 == 250 - assert particle_config_ds.ScatMinWidth == 10 - assert particle_config_ds.ScatMaxWidth == 90 - assert particle_config_ds.ScatMinPeakPos == 20 - assert particle_config_ds.ScatMaxPeakPos == 90 - assert particle_config_ds.IncanMinPeakHt1 == 200 - assert particle_config_ds.IncanMinPeakHt2 == 200 - assert particle_config_ds.IncanMaxPeakHt1 == 60000 - assert particle_config_ds.IncanMaxPeakHt2 == 60000 - assert particle_config_ds.IncanMinWidth == 5 - assert particle_config_ds.IncanMaxWidth == np.inf - assert particle_config_ds.IncanMinPeakPos == 20 - assert particle_config_ds.IncanMaxPeakPos == 90 - assert particle_config_ds.IncanMinPeakRatio == 0.1 - assert particle_config_ds.IncanMaxPeakRatio == 25 - assert particle_config_ds.IncanMaxPeakOffset == 11 - assert particle_config_ds.c0Mass1 == 0 - assert particle_config_ds.c1Mass1 == 0.0001896 - assert particle_config_ds.c2Mass1 == 0 - assert particle_config_ds.c3Mass1 == 0 - assert particle_config_ds.c0Mass2 == 0 - assert particle_config_ds.c1Mass2 == 0.0016815 - assert particle_config_ds.c2Mass2 == 0 - assert particle_config_ds.c3Mass2 == 0 - assert particle_config_ds.c0Scat1 == 0 - assert particle_config_ds.c1Scat1 == 78.141 - assert particle_config_ds.c2Scat1 == 0 - assert particle_config_ds.c0Scat2 == 0 - assert particle_config_ds.c1Scat2 == 752.53 - assert particle_config_ds.c2Scat2 == 0 - assert particle_config_ds.densitySO4 == 1.8 - assert particle_config_ds.densityBC == 1.8 - assert particle_config_ds.TempSTP == 273.15 - assert particle_config_ds.PressSTP == 1013.25 - - -def test_bsrn_limits_test(): - - for use_dask in [False, True]: - ds = read_arm_netcdf(EXAMPLE_BRS) - var_names = list(ds.data_vars) - # Remove QC variables to make testing easier - for var_name in var_names: - if var_name.startswith('qc_'): - del ds[var_name] - - # Add atmospheric temperature fake data - ds['temp_mean'] = xr.DataArray( - data=np.full(ds.time.size, 13.5), dims=['time'], - attrs={'long_name': 'Atmospheric air temperature', 'units': 'degC'}) - - # Make a short direct variable since BRS does not have one - ds['short_direct'] = copy.deepcopy(ds['short_direct_normal']) - ds['short_direct'].attrs['ancillary_variables'] = 'qc_short_direct' - ds['short_direct'].attrs['long_name'] = 'Shortwave direct irradiance, pyrheliometer' - sza, Sa = _calculate_solar_parameters(ds, 'lat', 'lon', 1360.8) - ds['short_direct'].data = ds['short_direct'].data * .5 - - # Make up long variable since BRS does not have values - ds['up_long_hemisp'].data = copy.deepcopy(ds['down_long_hemisp_shaded'].data) - data = copy.deepcopy(ds['down_short_hemisp'].data) - ds['up_short_hemisp'].data = data - - # Test that nothing happens when no variable names are provided - ds.qcfilter.bsrn_limits_test() - - # Mess with data to get tests to trip - data = ds['down_short_hemisp'].values - data[200:300] -= 10 - data[800:850] += 330 - data[1340:1380] += 600 - ds['down_short_hemisp'].data = da.from_array(data) - - data = ds['down_short_diffuse_hemisp'].values - data[200:250] = data[200:250] - 1.9 - data[250:300] = data[250:300] - 3.9 - data[800:850] += 330 - data[1340:1380] += 600 - ds['down_short_diffuse_hemisp'].data = da.from_array(data) - - data = ds['short_direct_normal'].values - data[200:250] = data[200:250] - 1.9 - data[250:300] = data[250:300] - 3.9 - data[800:850] += 600 - data[1340:1380] += 800 - ds['short_direct_normal'].data = da.from_array(data) - - data = ds['short_direct'].values - data[200:250] = data[200:250] - 1.9 - data[250:300] = data[250:300] - 3.9 - data[800:850] += 300 - data[1340:1380] += 800 - ds['short_direct'].data = da.from_array(data) - - data = ds['down_long_hemisp_shaded'].values - data[200:250] = data[200:250] - 355 - data[250:300] = data[250:300] - 400 - data[800:850] += 200 - data[1340:1380] += 400 - ds['down_long_hemisp_shaded'].data = da.from_array(data) - - data = ds['up_long_hemisp'].values - data[200:250] = data[200:250] - 355 - data[250:300] = data[250:300] - 400 - data[800:850] += 300 - data[1340:1380] += 500 - ds['up_long_hemisp'].data = da.from_array(data) - - ds.qcfilter.bsrn_limits_test( - gbl_SW_dn_name='down_short_hemisp', - glb_diffuse_SW_dn_name='down_short_diffuse_hemisp', - direct_normal_SW_dn_name='short_direct_normal', - glb_SW_up_name='up_short_hemisp', - glb_LW_dn_name='down_long_hemisp_shaded', - glb_LW_up_name='up_long_hemisp', - direct_SW_dn_name='short_direct', - use_dask=use_dask) - - assert ds['qc_down_short_hemisp'].attrs['flag_masks'] == [1, 2] - assert ds['qc_down_short_hemisp'].attrs['flag_meanings'][-2] == \ - 'Value less than BSRN physically possible limit of -4.0 W/m^2' - assert ds['qc_down_short_hemisp'].attrs['flag_meanings'][-1] == \ - 'Value greater than BSRN physically possible limit' - - assert ds['qc_down_short_diffuse_hemisp'].attrs['flag_masks'] == [1, 2] - assert ds['qc_down_short_diffuse_hemisp'].attrs['flag_assessments'] == ['Bad', 'Bad'] - - assert ds['qc_short_direct'].attrs['flag_masks'] == [1, 2] - assert ds['qc_short_direct'].attrs['flag_assessments'] == ['Bad', 'Bad'] - assert ds['qc_short_direct'].attrs['flag_meanings'] == \ - ['Value less than BSRN physically possible limit of -4.0 W/m^2', - 'Value greater than BSRN physically possible limit'] - - assert ds['qc_short_direct_normal'].attrs['flag_masks'] == [1, 2] - assert ds['qc_short_direct_normal'].attrs['flag_meanings'][-1] == \ - 'Value greater than BSRN physically possible limit' - - assert ds['qc_down_short_hemisp'].attrs['flag_masks'] == [1, 2] - assert ds['qc_down_short_hemisp'].attrs['flag_meanings'][-1] == \ - 'Value greater than BSRN physically possible limit' - - assert ds['qc_up_short_hemisp'].attrs['flag_masks'] == [1, 2] - assert ds['qc_up_short_hemisp'].attrs['flag_meanings'][-1] == \ - 'Value greater than BSRN physically possible limit' - - assert ds['qc_up_long_hemisp'].attrs['flag_masks'] == [1, 2] - assert ds['qc_up_long_hemisp'].attrs['flag_meanings'][-1] == \ - 'Value greater than BSRN physically possible limit of 900.0 W/m^2' - - ds.qcfilter.bsrn_limits_test( - test="Extremely Rare", - gbl_SW_dn_name='down_short_hemisp', - glb_diffuse_SW_dn_name='down_short_diffuse_hemisp', - direct_normal_SW_dn_name='short_direct_normal', - glb_SW_up_name='up_short_hemisp', - glb_LW_dn_name='down_long_hemisp_shaded', - glb_LW_up_name='up_long_hemisp', - direct_SW_dn_name='short_direct', - use_dask=use_dask) - - assert ds['qc_down_short_hemisp'].attrs['flag_masks'] == [1, 2, 4, 8] - assert ds['qc_down_short_diffuse_hemisp'].attrs['flag_masks'] == [1, 2, 4, 8] - assert ds['qc_short_direct'].attrs['flag_masks'] == [1, 2, 4, 8] - assert ds['qc_short_direct_normal'].attrs['flag_masks'] == [1, 2, 4, 8] - assert ds['qc_up_short_hemisp'].attrs['flag_masks'] == [1, 2, 4, 8] - assert ds['qc_up_long_hemisp'].attrs['flag_masks'] == [1, 2, 4, 8] - - assert ds['qc_up_long_hemisp'].attrs['flag_meanings'][-1] == \ - 'Value greater than BSRN extremely rare limit of 700.0 W/m^2' - - assert ds['qc_down_long_hemisp_shaded'].attrs['flag_meanings'][-1] == \ - 'Value greater than BSRN extremely rare limit of 500.0 W/m^2' - - # down_short_hemisp - result = ds.qcfilter.get_qc_test_mask('down_short_hemisp', test_number=1) - assert np.sum(result) == 100 - result = ds.qcfilter.get_qc_test_mask('down_short_hemisp', test_number=2) - assert np.sum(result) == 26 - result = ds.qcfilter.get_qc_test_mask('down_short_hemisp', test_number=3) - assert np.sum(result) == 337 - result = ds.qcfilter.get_qc_test_mask('down_short_hemisp', test_number=4) - assert np.sum(result) == 66 - - # down_short_diffuse_hemisp - result = ds.qcfilter.get_qc_test_mask('down_short_diffuse_hemisp', test_number=1) - assert np.sum(result) == 50 - result = ds.qcfilter.get_qc_test_mask('down_short_diffuse_hemisp', test_number=2) - assert np.sum(result) == 56 - result = ds.qcfilter.get_qc_test_mask('down_short_diffuse_hemisp', test_number=3) - assert np.sum(result) == 100 - result = ds.qcfilter.get_qc_test_mask('down_short_diffuse_hemisp', test_number=4) - assert np.sum(result) == 90 - - # short_direct_normal - result = ds.qcfilter.get_qc_test_mask('short_direct_normal', test_number=1) - assert np.sum(result) == 46 - result = ds.qcfilter.get_qc_test_mask('short_direct_normal', test_number=2) - assert np.sum(result) == 26 - result = ds.qcfilter.get_qc_test_mask('short_direct_normal', test_number=3) - assert np.sum(result) == 94 - result = ds.qcfilter.get_qc_test_mask('short_direct_normal', test_number=4) - assert np.sum(result) == 38 - - # short_direct_normal - result = ds.qcfilter.get_qc_test_mask('short_direct', test_number=1) - assert np.sum(result) == 41 - result = ds.qcfilter.get_qc_test_mask('short_direct', test_number=2) - assert np.sum(result) == 607 - result = ds.qcfilter.get_qc_test_mask('short_direct', test_number=3) - assert np.sum(result) == 89 - result = ds.qcfilter.get_qc_test_mask('short_direct', test_number=4) - assert np.sum(result) == 79 - - # down_long_hemisp_shaded - result = ds.qcfilter.get_qc_test_mask('down_long_hemisp_shaded', test_number=1) - assert np.sum(result) == 50 - result = ds.qcfilter.get_qc_test_mask('down_long_hemisp_shaded', test_number=2) - assert np.sum(result) == 40 - result = ds.qcfilter.get_qc_test_mask('down_long_hemisp_shaded', test_number=3) - assert np.sum(result) == 89 - result = ds.qcfilter.get_qc_test_mask('down_long_hemisp_shaded', test_number=4) - assert np.sum(result) == 90 - - # up_long_hemisp - result = ds.qcfilter.get_qc_test_mask('up_long_hemisp', test_number=1) - assert np.sum(result) == 50 - result = ds.qcfilter.get_qc_test_mask('up_long_hemisp', test_number=2) - assert np.sum(result) == 40 - result = ds.qcfilter.get_qc_test_mask('up_long_hemisp', test_number=3) - assert np.sum(result) == 89 - result = ds.qcfilter.get_qc_test_mask('up_long_hemisp', test_number=4) - assert np.sum(result) == 90 - - # Change data values to trip tests - ds['down_short_diffuse_hemisp'].values[0:100] = \ - ds['down_short_diffuse_hemisp'].values[0:100] + 100 - ds['up_long_hemisp'].values[0:100] = \ - ds['up_long_hemisp'].values[0:100] - 200 - - ds.qcfilter.bsrn_comparison_tests( - ['Global over Sum SW Ratio', 'Diffuse Ratio', 'SW up', 'LW down to air temp', - 'LW up to air temp', 'LW down to LW up'], - gbl_SW_dn_name='down_short_hemisp', - glb_diffuse_SW_dn_name='down_short_diffuse_hemisp', - direct_normal_SW_dn_name='short_direct_normal', - glb_SW_up_name='up_short_hemisp', - glb_LW_dn_name='down_long_hemisp_shaded', - glb_LW_up_name='up_long_hemisp', - air_temp_name='temp_mean', - test_assessment='Indeterminate', - lat_name='lat', - lon_name='lon', - use_dask=use_dask - ) - - # Ratio of Global over Sum SW - result = ds.qcfilter.get_qc_test_mask('down_short_hemisp', test_number=5) - assert np.sum(result) == 190 - - # Diffuse Ratio - result = ds.qcfilter.get_qc_test_mask('down_short_hemisp', test_number=6) - assert np.sum(result) == 47 - - # Shortwave up comparison - result = ds.qcfilter.get_qc_test_mask('up_short_hemisp', test_number=5) - assert np.sum(result) == 226 - - # Longwave up to air temperature comparison - result = ds.qcfilter.get_qc_test_mask('up_long_hemisp', test_number=5) - assert np.sum(result) == 290 - - # Longwave down to air temperature compaison - result = ds.qcfilter.get_qc_test_mask('down_long_hemisp_shaded', test_number=5) - assert np.sum(result) == 976 - - # Lonwave down to longwave up comparison - result = ds.qcfilter.get_qc_test_mask('down_long_hemisp_shaded', test_number=6) - assert np.sum(result) == 100 - - -def test_add_atmospheric_pressure_test(): - ds = read_arm_netcdf(EXAMPLE_MET1, cleanup_qc=True) - ds.load() - - variable = 'atmos_pressure' - qc_varialbe = 'qc_' + variable - - data = ds[variable].values - data[200:250] = data[200:250] + 5 - data[500:550] = data[500:550] - 4.6 - ds[variable].values = data - result = ds.qcfilter.add_atmospheric_pressure_test(variable) - assert isinstance(result, dict) - assert np.sum(ds[qc_varialbe].values) == 1600 - - del ds[qc_varialbe] - ds.qcfilter.add_atmospheric_pressure_test(variable, use_dask=True) - assert np.sum(ds[qc_varialbe].values) == 100 - - ds.close - del ds - - -def test_read_yaml_supplemental_qc(): - ds = read_arm_netcdf(EXAMPLE_MET1, keep_variables=['temp_mean', 'qc_temp_mean'], cleanup_qc=True) - - result = read_yaml_supplemental_qc(ds, EXAMPLE_MET_YAML) - assert isinstance(result, dict) - assert len(result.keys()) == 3 - - result = read_yaml_supplemental_qc(ds, Path(EXAMPLE_MET_YAML).parent, variables='temp_mean', - assessments=['Bad', 'Incorrect', 'Suspect']) - assert len(result.keys()) == 2 - assert sorted(result['temp_mean'].keys()) == ['Bad', 'Suspect'] - - result = read_yaml_supplemental_qc(ds, 'sgpmetE13.b1.yaml', quiet=True) - assert result is None - - apply_supplemental_qc(ds, EXAMPLE_MET_YAML) - assert ds['qc_temp_mean'].attrs['flag_masks'] == [1, 2, 4, 8, 16, 32, 64, 128, 256] - assert ds['qc_temp_mean'].attrs['flag_assessments'] == [ - 'Bad', 'Bad', 'Bad', 'Indeterminate', 'Bad', 'Bad', 'Suspect', 'Good', 'Bad'] - assert ds['qc_temp_mean'].attrs['flag_meanings'][0] == 'Value is equal to missing_value.' - assert ds['qc_temp_mean'].attrs['flag_meanings'][-1] == 'Values are bad for all' - assert ds['qc_temp_mean'].attrs['flag_meanings'][-2] == 'Values are good' - assert np.sum(ds['qc_temp_mean'].values) == 81344 - assert np.count_nonzero(ds['qc_temp_mean'].values) == 1423 - - del ds - - ds = read_arm_netcdf(EXAMPLE_MET1, keep_variables=['temp_mean', 'qc_temp_mean'], cleanup_qc=True) - apply_supplemental_qc(ds, Path(EXAMPLE_MET_YAML).parent, apply_all=False) - assert ds['qc_temp_mean'].attrs['flag_masks'] == [1, 2, 4, 8, 16, 32, 64, 128] - - ds = read_arm_netcdf(EXAMPLE_MET1, cleanup_qc=True) - apply_supplemental_qc(ds, Path(EXAMPLE_MET_YAML).parent, exclude_all_variables='temp_mean') - assert ds['qc_rh_mean'].attrs['flag_masks'] == [1, 2, 4, 8, 16, 32, 64, 128] - assert 'Values are bad for all' in ds['qc_rh_mean'].attrs['flag_meanings'] - assert 'Values are bad for all' not in ds['qc_temp_mean'].attrs['flag_meanings'] - - del ds - - ds = read_arm_netcdf(EXAMPLE_MET1, keep_variables=['temp_mean', 'rh_mean']) - apply_supplemental_qc(ds, Path(EXAMPLE_MET_YAML).parent, exclude_all_variables='temp_mean', - assessments='Bad', quiet=True) - assert ds['qc_rh_mean'].attrs['flag_assessments'] == ['Bad'] - assert ds['qc_temp_mean'].attrs['flag_assessments'] == ['Bad', 'Bad'] - assert np.sum(ds['qc_rh_mean'].values) == 124 - assert np.sum(ds['qc_temp_mean'].values) == 2840 - - del ds - - -def test_scalar_dqr(): - # Test DQR Webservice using known DQR - ds = read_arm_netcdf(EXAMPLE_ENA_MET) - - # DQR webservice does go down, so ensure it - # properly runs first before testing - try: - ds = add_dqr_to_qc(ds) - ran = True - except ValueError: - ran = False - - if ran: - assert 'qc_lat' in ds - assert np.size(ds['qc_lon'].values) == 1 - assert np.size(ds['qc_lat'].values) == 1 - assert np.size(ds['qc_alt'].values) == 1 - assert np.size(ds['base_time'].values) == 1 - - -def test_get_attr_info(): - ds = read_arm_netcdf(EXAMPLE_OLD_QC, cleanup_qc=True) - assert 'flag_assessments' in ds['qc_lv'].attrs - assert 'fail_min' in ds['qc_lv'].attrs - assert ds['qc_lv'].attrs['flag_assessments'][0] == 'Bad' - assert ds['qc_lv'].attrs['flag_masks'][-1] == 4 diff --git a/act/tests/test_utils.py b/act/tests/utils/test_data_utils.py similarity index 55% rename from act/tests/test_utils.py rename to act/tests/utils/test_data_utils.py index f62a6a9360..03a4bf512d 100644 --- a/act/tests/test_utils.py +++ b/act/tests/utils/test_data_utils.py @@ -1,21 +1,11 @@ -""" Unit tests for ACT utils module. """ - import importlib -import tempfile -from datetime import datetime -from pathlib import Path -import tarfile -from os import chdir, PathLike -import string -import random import numpy as np from numpy.testing import assert_almost_equal -import pandas as pd import pytest -import pytz import xarray as xr import act +from act.utils.data_utils import DatastreamParserARM as DatastreamParser spec = importlib.util.find_spec('pyart') if spec is not None: @@ -24,20 +14,6 @@ PYART_AVAILABLE = False -def test_dates_between(): - start_date = '20191201' - end_date = '20201201' - date_list = act.utils.dates_between(start_date, end_date) - start_string = datetime.strptime(start_date, '%Y%m%d').strftime('%Y-%m-%d') - end_string = datetime.strptime(end_date, '%Y%m%d').strftime('%Y-%m-%d') - answer = np.arange(start_string, end_string, dtype='datetime64[D]') - answer = np.append(answer, answer[-1] + 1) - answer = answer.astype('datetime64[s]').astype(int) - answer = [datetime.utcfromtimestamp(ii) for ii in answer] - - assert date_list == answer - - def test_add_in_nan(): # Make a 1D array of 10 minute data time = np.arange('2019-01-01T01:00', '2019-01-01T01:10', dtype='datetime64[m]') @@ -197,114 +173,6 @@ def test_accum_precip(): assert np.isclose(dmax, 0.22, atol=0.01) -def test_calc_cog_sog(): - ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_NAV) - - ds = act.utils.calc_cog_sog(ds) - - cog = ds['course_over_ground'].values - sog = ds['speed_over_ground'].values - - np.testing.assert_almost_equal(cog[10], 170.987, decimal=3) - np.testing.assert_almost_equal(sog[15], 0.448, decimal=3) - - ds = ds.rename({'lat': 'latitude', 'lon': 'longitude'}) - ds = act.utils.calc_cog_sog(ds) - np.testing.assert_almost_equal(cog[10], 170.987, decimal=3) - np.testing.assert_almost_equal(sog[15], 0.448, decimal=3) - - -def test_destination_azimuth_distance(): - lat = 37.1509 - lon = -98.362 - lat2, lon2 = act.utils.destination_azimuth_distance(lat, lon, 180.0, 100) - - np.testing.assert_almost_equal(lat2, 37.150, decimal=3) - np.testing.assert_almost_equal(lon2, -98.361, decimal=3) - - -def test_calculate_dqr_times(): - ebbr1_ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_EBBR1) - ebbr2_ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_EBBR2) - brs_ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_BRS) - ebbr1_result = act.utils.calculate_dqr_times(ebbr1_ds, variable=['soil_temp_1'], threshold=2) - ebbr2_result = act.utils.calculate_dqr_times( - ebbr2_ds, variable=['rh_bottom_fraction'], qc_bit=3, threshold=2 - ) - ebbr3_result = act.utils.calculate_dqr_times( - ebbr2_ds, variable=['rh_bottom_fraction'], qc_bit=3 - ) - brs_result = act.utils.calculate_dqr_times( - brs_ds, variable='down_short_hemisp_min', qc_bit=2, threshold=30 - ) - assert ebbr1_result == [('2019-11-25 02:00:00', '2019-11-25 04:30:00')] - assert ebbr2_result == [('2019-11-30 00:00:00', '2019-11-30 11:00:00')] - assert brs_result == [('2019-07-05 01:57:00', '2019-07-05 11:07:00')] - assert ebbr3_result is None - with tempfile.TemporaryDirectory() as tmpdirname: - write_file = Path(tmpdirname) - brs_result = act.utils.calculate_dqr_times( - brs_ds, - variable='down_short_hemisp_min', - qc_bit=2, - threshold=30, - txt_path=str(write_file), - ) - - brs_result = act.utils.calculate_dqr_times( - brs_ds, - variable='down_short_hemisp_min', - qc_bit=2, - threshold=30, - return_missing=False, - ) - assert len(brs_result[0]) == 2 - - ebbr1_ds.close() - ebbr2_ds.close() - brs_ds.close() - - -def test_decode_present_weather(): - ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_MET1) - ds = act.utils.decode_present_weather(ds, variable='pwd_pw_code_inst') - - data = ds['pwd_pw_code_inst_decoded'].values - result = 'No significant weather observed' - assert data[0] == result - assert data[100] == result - assert data[600] == result - - np.testing.assert_raises(ValueError, act.utils.inst_utils.decode_present_weather, ds) - np.testing.assert_raises( - ValueError, - act.utils.inst_utils.decode_present_weather, - ds, - variable='temp_temp', - ) - - -def test_datetime64_to_datetime(): - time_datetime = [ - datetime(2019, 1, 1, 1, 0), - datetime(2019, 1, 1, 1, 1), - datetime(2019, 1, 1, 1, 2), - datetime(2019, 1, 1, 1, 3), - datetime(2019, 1, 1, 1, 4), - ] - - time_datetime64 = [ - np.datetime64(datetime(2019, 1, 1, 1, 0)), - np.datetime64(datetime(2019, 1, 1, 1, 1)), - np.datetime64(datetime(2019, 1, 1, 1, 2)), - np.datetime64(datetime(2019, 1, 1, 1, 3)), - np.datetime64(datetime(2019, 1, 1, 1, 4)), - ] - - time_datetime64_to_datetime = act.utils.datetime_utils.datetime64_to_datetime(time_datetime64) - assert time_datetime == time_datetime64_to_datetime - - @pytest.mark.skipif(not PYART_AVAILABLE, reason="Py-ART is not installed.") def test_create_pyart_obj(): try: @@ -347,194 +215,6 @@ def test_create_pyart_obj(): del radar -def test_add_solar_variable(): - ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_NAV) - new_ds = act.utils.geo_utils.add_solar_variable(ds) - - assert 'sun_variable' in list(new_ds.keys()) - assert new_ds['sun_variable'].values[10] == 1 - assert np.sum(new_ds['sun_variable'].values) >= 598 - - new_ds = act.utils.geo_utils.add_solar_variable(ds, dawn_dusk=True) - assert 'sun_variable' in list(new_ds.keys()) - assert new_ds['sun_variable'].values[10] == 1 - assert np.sum(new_ds['sun_variable'].values) >= 1234 - - ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_MET1) - new_ds = act.utils.geo_utils.add_solar_variable(ds, dawn_dusk=True) - assert np.sum(new_ds['sun_variable'].values) >= 1046 - - ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_IRTSST) - ds = ds.fillna(0) - new_ds = act.utils.geo_utils.add_solar_variable(ds) - assert np.sum(new_ds['sun_variable'].values) >= 12 - - ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_IRTSST) - ds.drop_vars('lat') - pytest.raises(ValueError, act.utils.geo_utils.add_solar_variable, ds) - - ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_IRTSST) - ds.drop_vars('lon') - pytest.raises(ValueError, act.utils.geo_utils.add_solar_variable, ds) - ds.close() - new_ds.close() - - -def test_reduce_time_ranges(): - time = pd.date_range(start='2020-01-01T00:00:00', freq='1min', periods=100) - time = time.to_list() - time = time[0:50] + time[60:] - result = act.utils.datetime_utils.reduce_time_ranges(time) - assert len(result) == 2 - assert result[1][1].minute == 39 - - result = act.utils.datetime_utils.reduce_time_ranges(time, broken_barh=True) - assert len(result) == 2 - - -def test_planck_converter(): - wnum = 1100 - temp = 300 - radiance = 81.5 - result = act.utils.radiance_utils.planck_converter(wnum=wnum, temperature=temp) - np.testing.assert_almost_equal(result, radiance, decimal=1) - result = act.utils.radiance_utils.planck_converter(wnum=wnum, radiance=radiance) - assert np.ceil(result) == temp - np.testing.assert_raises(ValueError, act.utils.radiance_utils.planck_converter) - - -def test_solar_azimuth_elevation(): - - ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_NAV) - - elevation, azimuth, distance = act.utils.geo_utils.get_solar_azimuth_elevation( - latitude=ds['lat'].values[0], - longitude=ds['lon'].values[0], - time=ds['time'].values, - library='skyfield', - temperature_C='standard', - pressure_mbar='standard', - ) - assert np.isclose(np.nanmean(elevation), 10.5648, atol=0.001) - assert np.isclose(np.nanmean(azimuth), 232.0655, atol=0.001) - assert np.isclose(np.nanmean(distance), 0.985, atol=0.001) - - -def test_get_sunrise_sunset_noon(): - - ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_NAV) - - sunrise, sunset, noon = act.utils.geo_utils.get_sunrise_sunset_noon( - latitude=ds['lat'].values[0], - longitude=ds['lon'].values[0], - date=ds['time'].values[0], - library='skyfield', - ) - assert sunrise[0].replace(microsecond=0) == datetime(2018, 1, 31, 22, 36, 32) - assert sunset[0].replace(microsecond=0) == datetime(2018, 2, 1, 17, 24, 4) - assert noon[0].replace(microsecond=0) == datetime(2018, 2, 1, 8, 2, 10) - - sunrise, sunset, noon = act.utils.geo_utils.get_sunrise_sunset_noon( - latitude=ds['lat'].values[0], - longitude=ds['lon'].values[0], - date=ds['time'].values[0], - library='skyfield', - timezone=True, - ) - assert sunrise[0].replace(microsecond=0) == datetime(2018, 1, 31, 22, 36, 32, tzinfo=pytz.UTC) - assert sunset[0].replace(microsecond=0) == datetime(2018, 2, 1, 17, 24, 4, tzinfo=pytz.UTC) - assert noon[0].replace(microsecond=0) == datetime(2018, 2, 1, 8, 2, 10, tzinfo=pytz.UTC) - - sunrise, sunset, noon = act.utils.geo_utils.get_sunrise_sunset_noon( - latitude=ds['lat'].values[0], - longitude=ds['lon'].values[0], - date='20180201', - library='skyfield', - ) - assert sunrise[0].replace(microsecond=0) == datetime(2018, 1, 31, 22, 36, 32) - assert sunset[0].replace(microsecond=0) == datetime(2018, 2, 1, 17, 24, 4) - assert noon[0].replace(microsecond=0) == datetime(2018, 2, 1, 8, 2, 10) - - sunrise, sunset, noon = act.utils.geo_utils.get_sunrise_sunset_noon( - latitude=ds['lat'].values[0], - longitude=ds['lon'].values[0], - date=['20180201'], - library='skyfield', - ) - assert sunrise[0].replace(microsecond=0) == datetime(2018, 1, 31, 22, 36, 32) - assert sunset[0].replace(microsecond=0) == datetime(2018, 2, 1, 17, 24, 4) - assert noon[0].replace(microsecond=0) == datetime(2018, 2, 1, 8, 2, 10) - - sunrise, sunset, noon = act.utils.geo_utils.get_sunrise_sunset_noon( - latitude=ds['lat'].values[0], - longitude=ds['lon'].values[0], - date=datetime(2018, 2, 1), - library='skyfield', - ) - assert sunrise[0].replace(microsecond=0) == datetime(2018, 1, 31, 22, 36, 32) - assert sunset[0].replace(microsecond=0) == datetime(2018, 2, 1, 17, 24, 4) - assert noon[0].replace(microsecond=0) == datetime(2018, 2, 1, 8, 2, 10) - - sunrise, sunset, noon = act.utils.geo_utils.get_sunrise_sunset_noon( - latitude=ds['lat'].values[0], - longitude=ds['lon'].values[0], - date=datetime(2018, 2, 1, tzinfo=pytz.UTC), - library='skyfield', - ) - assert sunrise[0].replace(microsecond=0) == datetime(2018, 1, 31, 22, 36, 32) - assert sunset[0].replace(microsecond=0) == datetime(2018, 2, 1, 17, 24, 4) - assert noon[0].replace(microsecond=0) == datetime(2018, 2, 1, 8, 2, 10) - - sunrise, sunset, noon = act.utils.geo_utils.get_sunrise_sunset_noon( - latitude=ds['lat'].values[0], - longitude=ds['lon'].values[0], - date=[datetime(2018, 2, 1)], - library='skyfield', - ) - assert sunrise[0].replace(microsecond=0) == datetime(2018, 1, 31, 22, 36, 32) - assert sunset[0].replace(microsecond=0) == datetime(2018, 2, 1, 17, 24, 4) - assert noon[0].replace(microsecond=0) == datetime(2018, 2, 1, 8, 2, 10) - - sunrise, sunset, noon = act.utils.geo_utils.get_sunrise_sunset_noon( - latitude=85.0, longitude=-140.0, date=[datetime(2018, 6, 1)], library='skyfield' - ) - assert sunrise[0].replace(microsecond=0) == datetime(2018, 3, 30, 10, 48, 48) - assert sunset[0].replace(microsecond=0) == datetime(2018, 9, 12, 8, 50, 14) - assert noon[0].replace(microsecond=0) == datetime(2018, 6, 1, 21, 17, 52) - - -def test_is_sun_visible(): - ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_EBBR1) - result = act.utils.geo_utils.is_sun_visible( - latitude=ds['lat'].values, - longitude=ds['lon'].values, - date_time=ds['time'].values, - ) - assert len(result) == 48 - assert sum(result) == 20 - - result = act.utils.geo_utils.is_sun_visible( - latitude=ds['lat'].values, - longitude=ds['lon'].values, - date_time=ds['time'].values[0], - ) - assert result == [False] - - result = act.utils.geo_utils.is_sun_visible( - latitude=ds['lat'].values, - longitude=ds['lon'].values, - date_time=[datetime(2019, 11, 25, 13, 30, 00)], - ) - assert result == [True] - - result = act.utils.geo_utils.is_sun_visible( - latitude=ds['lat'].values, - longitude=ds['lon'].values, - date_time=datetime(2019, 11, 25, 13, 30, 00), - ) - assert result == [True] - - def test_convert_to_potential_temp(): ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_MET1) @@ -644,39 +324,7 @@ def test_height_adjusted_pressure(): ) -def test_date_parser(): - datestring = '20111001' - output_format = '%Y/%m/%d' - - test_string = act.utils.date_parser(datestring, output_format, return_datetime=False) - assert test_string == '2011/10/01' - - test_datetime = act.utils.date_parser(datestring, output_format, return_datetime=True) - assert test_datetime == datetime(2011, 10, 1) - - -def test_date_parser_minute_second(): - date_string = '2020-01-01T12:00:00' - parsed_date = act.utils.date_parser(date_string, return_datetime=True) - assert parsed_date == datetime(2020, 1, 1, 12, 0, 0) - - output_format = parsed_date.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z' - assert output_format == '2020-01-01T12:00:00.000Z' - - -def test_adjust_timestamp(): - file = act.tests.sample_files.EXAMPLE_EBBR1 - ds = act.io.arm.read_arm_netcdf(file) - ds = act.utils.datetime_utils.adjust_timestamp(ds) - assert ds['time'].values[0] == np.datetime64('2019-11-24T23:30:00.000000000') - - ds = act.utils.datetime_utils.adjust_timestamp(ds, offset=-60 * 60) - assert ds['time'].values[0] == np.datetime64('2019-11-24T22:30:00.000000000') - - -def test_DatastreamParser(): - from act.utils.data_utils import DatastreamParserARM as DatastreamParser - +def test_datastreamparser(): pytest.raises(ValueError, DatastreamParser, 123) fn_obj = DatastreamParser() diff --git a/act/tests/utils/test_datetime_utils.py b/act/tests/utils/test_datetime_utils.py new file mode 100644 index 0000000000..14af804124 --- /dev/null +++ b/act/tests/utils/test_datetime_utils.py @@ -0,0 +1,82 @@ +from datetime import datetime +import numpy as np +import pandas as pd + +import act + + +def test_dates_between(): + start_date = '20191201' + end_date = '20201201' + date_list = act.utils.dates_between(start_date, end_date) + start_string = datetime.strptime(start_date, '%Y%m%d').strftime('%Y-%m-%d') + end_string = datetime.strptime(end_date, '%Y%m%d').strftime('%Y-%m-%d') + answer = np.arange(start_string, end_string, dtype='datetime64[D]') + answer = np.append(answer, answer[-1] + 1) + answer = answer.astype('datetime64[s]').astype(int) + answer = [datetime.utcfromtimestamp(ii) for ii in answer] + + assert date_list == answer + + +def test_datetime64_to_datetime(): + time_datetime = [ + datetime(2019, 1, 1, 1, 0), + datetime(2019, 1, 1, 1, 1), + datetime(2019, 1, 1, 1, 2), + datetime(2019, 1, 1, 1, 3), + datetime(2019, 1, 1, 1, 4), + ] + + time_datetime64 = [ + np.datetime64(datetime(2019, 1, 1, 1, 0)), + np.datetime64(datetime(2019, 1, 1, 1, 1)), + np.datetime64(datetime(2019, 1, 1, 1, 2)), + np.datetime64(datetime(2019, 1, 1, 1, 3)), + np.datetime64(datetime(2019, 1, 1, 1, 4)), + ] + + time_datetime64_to_datetime = act.utils.datetime_utils.datetime64_to_datetime(time_datetime64) + assert time_datetime == time_datetime64_to_datetime + + +def test_reduce_time_ranges(): + time = pd.date_range(start='2020-01-01T00:00:00', freq='1min', periods=100) + time = time.to_list() + time = time[0:50] + time[60:] + result = act.utils.datetime_utils.reduce_time_ranges(time) + assert len(result) == 2 + assert result[1][1].minute == 39 + + result = act.utils.datetime_utils.reduce_time_ranges(time, broken_barh=True) + assert len(result) == 2 + + +def test_date_parser(): + datestring = '20111001' + output_format = '%Y/%m/%d' + + test_string = act.utils.date_parser(datestring, output_format, return_datetime=False) + assert test_string == '2011/10/01' + + test_datetime = act.utils.date_parser(datestring, output_format, return_datetime=True) + assert test_datetime == datetime(2011, 10, 1) + + +def test_date_parser_minute_second(): + date_string = '2020-01-01T12:00:00' + parsed_date = act.utils.date_parser(date_string, return_datetime=True) + assert parsed_date == datetime(2020, 1, 1, 12, 0, 0) + + output_format = parsed_date.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z' + assert output_format == '2020-01-01T12:00:00.000Z' + + +def test_adjust_timestamp(): + file = act.tests.sample_files.EXAMPLE_EBBR1 + ds = act.io.arm.read_arm_netcdf(file) + ds = act.utils.datetime_utils.adjust_timestamp(ds) + assert ds['time'].values[0] == np.datetime64('2019-11-24T23:30:00.000000000') + + ds = act.utils.datetime_utils.adjust_timestamp(ds, offset=-60 * 60) + assert ds['time'].values[0] == np.datetime64('2019-11-24T22:30:00.000000000') diff --git a/act/tests/utils/test_geo_utils.py b/act/tests/utils/test_geo_utils.py new file mode 100644 index 0000000000..29360d10d2 --- /dev/null +++ b/act/tests/utils/test_geo_utils.py @@ -0,0 +1,179 @@ +from datetime import datetime +import numpy as np +import pytest +import pytz + + +import act + + +def test_destination_azimuth_distance(): + lat = 37.1509 + lon = -98.362 + lat2, lon2 = act.utils.destination_azimuth_distance(lat, lon, 180.0, 100) + + np.testing.assert_almost_equal(lat2, 37.150, decimal=3) + np.testing.assert_almost_equal(lon2, -98.361, decimal=3) + + +def test_add_solar_variable(): + ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_NAV) + new_ds = act.utils.geo_utils.add_solar_variable(ds) + + assert 'sun_variable' in list(new_ds.keys()) + assert new_ds['sun_variable'].values[10] == 1 + assert np.sum(new_ds['sun_variable'].values) >= 598 + + new_ds = act.utils.geo_utils.add_solar_variable(ds, dawn_dusk=True) + assert 'sun_variable' in list(new_ds.keys()) + assert new_ds['sun_variable'].values[10] == 1 + assert np.sum(new_ds['sun_variable'].values) >= 1234 + + ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_MET1) + new_ds = act.utils.geo_utils.add_solar_variable(ds, dawn_dusk=True) + assert np.sum(new_ds['sun_variable'].values) >= 1046 + + ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_IRTSST) + ds = ds.fillna(0) + new_ds = act.utils.geo_utils.add_solar_variable(ds) + assert np.sum(new_ds['sun_variable'].values) >= 12 + + ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_IRTSST) + ds.drop_vars('lat') + pytest.raises(ValueError, act.utils.geo_utils.add_solar_variable, ds) + + ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_IRTSST) + ds.drop_vars('lon') + pytest.raises(ValueError, act.utils.geo_utils.add_solar_variable, ds) + ds.close() + new_ds.close() + + +def test_solar_azimuth_elevation(): + ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_NAV) + + elevation, azimuth, distance = act.utils.geo_utils.get_solar_azimuth_elevation( + latitude=ds['lat'].values[0], + longitude=ds['lon'].values[0], + time=ds['time'].values, + library='skyfield', + temperature_C='standard', + pressure_mbar='standard', + ) + assert np.isclose(np.nanmean(elevation), 10.5648, atol=0.001) + assert np.isclose(np.nanmean(azimuth), 232.0655, atol=0.001) + assert np.isclose(np.nanmean(distance), 0.985, atol=0.001) + + +def test_get_sunrise_sunset_noon(): + ds = act.io.arm.read_arm_netcdf(act.tests.EXAMPLE_NAV) + + sunrise, sunset, noon = act.utils.geo_utils.get_sunrise_sunset_noon( + latitude=ds['lat'].values[0], + longitude=ds['lon'].values[0], + date=ds['time'].values[0], + library='skyfield', + ) + assert sunrise[0].replace(microsecond=0) == datetime(2018, 1, 31, 22, 36, 32) + assert sunset[0].replace(microsecond=0) == datetime(2018, 2, 1, 17, 24, 4) + assert noon[0].replace(microsecond=0) == datetime(2018, 2, 1, 8, 2, 10) + + sunrise, sunset, noon = act.utils.geo_utils.get_sunrise_sunset_noon( + latitude=ds['lat'].values[0], + longitude=ds['lon'].values[0], + date=ds['time'].values[0], + library='skyfield', + timezone=True, + ) + assert sunrise[0].replace(microsecond=0) == datetime(2018, 1, 31, 22, 36, 32, tzinfo=pytz.UTC) + assert sunset[0].replace(microsecond=0) == datetime(2018, 2, 1, 17, 24, 4, tzinfo=pytz.UTC) + assert noon[0].replace(microsecond=0) == datetime(2018, 2, 1, 8, 2, 10, tzinfo=pytz.UTC) + + sunrise, sunset, noon = act.utils.geo_utils.get_sunrise_sunset_noon( + latitude=ds['lat'].values[0], + longitude=ds['lon'].values[0], + date='20180201', + library='skyfield', + ) + assert sunrise[0].replace(microsecond=0) == datetime(2018, 1, 31, 22, 36, 32) + assert sunset[0].replace(microsecond=0) == datetime(2018, 2, 1, 17, 24, 4) + assert noon[0].replace(microsecond=0) == datetime(2018, 2, 1, 8, 2, 10) + + sunrise, sunset, noon = act.utils.geo_utils.get_sunrise_sunset_noon( + latitude=ds['lat'].values[0], + longitude=ds['lon'].values[0], + date=['20180201'], + library='skyfield', + ) + assert sunrise[0].replace(microsecond=0) == datetime(2018, 1, 31, 22, 36, 32) + assert sunset[0].replace(microsecond=0) == datetime(2018, 2, 1, 17, 24, 4) + assert noon[0].replace(microsecond=0) == datetime(2018, 2, 1, 8, 2, 10) + + sunrise, sunset, noon = act.utils.geo_utils.get_sunrise_sunset_noon( + latitude=ds['lat'].values[0], + longitude=ds['lon'].values[0], + date=datetime(2018, 2, 1), + library='skyfield', + ) + assert sunrise[0].replace(microsecond=0) == datetime(2018, 1, 31, 22, 36, 32) + assert sunset[0].replace(microsecond=0) == datetime(2018, 2, 1, 17, 24, 4) + assert noon[0].replace(microsecond=0) == datetime(2018, 2, 1, 8, 2, 10) + + sunrise, sunset, noon = act.utils.geo_utils.get_sunrise_sunset_noon( + latitude=ds['lat'].values[0], + longitude=ds['lon'].values[0], + date=datetime(2018, 2, 1, tzinfo=pytz.UTC), + library='skyfield', + ) + assert sunrise[0].replace(microsecond=0) == datetime(2018, 1, 31, 22, 36, 32) + assert sunset[0].replace(microsecond=0) == datetime(2018, 2, 1, 17, 24, 4) + assert noon[0].replace(microsecond=0) == datetime(2018, 2, 1, 8, 2, 10) + + sunrise, sunset, noon = act.utils.geo_utils.get_sunrise_sunset_noon( + latitude=ds['lat'].values[0], + longitude=ds['lon'].values[0], + date=[datetime(2018, 2, 1)], + library='skyfield', + ) + assert sunrise[0].replace(microsecond=0) == datetime(2018, 1, 31, 22, 36, 32) + assert sunset[0].replace(microsecond=0) == datetime(2018, 2, 1, 17, 24, 4) + assert noon[0].replace(microsecond=0) == datetime(2018, 2, 1, 8, 2, 10) + + sunrise, sunset, noon = act.utils.geo_utils.get_sunrise_sunset_noon( + latitude=85.0, longitude=-140.0, date=[datetime(2018, 6, 1)], library='skyfield' + ) + assert sunrise[0].replace(microsecond=0) == datetime(2018, 3, 30, 10, 48, 48) + assert sunset[0].replace(microsecond=0) == datetime(2018, 9, 12, 8, 50, 14) + assert noon[0].replace(microsecond=0) == datetime(2018, 6, 1, 21, 17, 52) + + +def test_is_sun_visible(): + ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_EBBR1) + result = act.utils.geo_utils.is_sun_visible( + latitude=ds['lat'].values, + longitude=ds['lon'].values, + date_time=ds['time'].values, + ) + assert len(result) == 48 + assert sum(result) == 20 + + result = act.utils.geo_utils.is_sun_visible( + latitude=ds['lat'].values, + longitude=ds['lon'].values, + date_time=ds['time'].values[0], + ) + assert result == [False] + + result = act.utils.geo_utils.is_sun_visible( + latitude=ds['lat'].values, + longitude=ds['lon'].values, + date_time=[datetime(2019, 11, 25, 13, 30, 00)], + ) + assert result == [True] + + result = act.utils.geo_utils.is_sun_visible( + latitude=ds['lat'].values, + longitude=ds['lon'].values, + date_time=datetime(2019, 11, 25, 13, 30, 00), + ) + assert result == [True] diff --git a/act/tests/utils/test_inst_utils.py b/act/tests/utils/test_inst_utils.py new file mode 100644 index 0000000000..465e41f834 --- /dev/null +++ b/act/tests/utils/test_inst_utils.py @@ -0,0 +1,22 @@ +import numpy as np + +import act + + +def test_decode_present_weather(): + ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_MET1) + ds = act.utils.decode_present_weather(ds, variable='pwd_pw_code_inst') + + data = ds['pwd_pw_code_inst_decoded'].values + result = 'No significant weather observed' + assert data[0] == result + assert data[100] == result + assert data[600] == result + + np.testing.assert_raises(ValueError, act.utils.inst_utils.decode_present_weather, ds) + np.testing.assert_raises( + ValueError, + act.utils.inst_utils.decode_present_weather, + ds, + variable='temp_temp', + ) diff --git a/act/tests/utils/test_io_utils.py b/act/tests/utils/test_io_utils.py new file mode 100644 index 0000000000..ffe259974e --- /dev/null +++ b/act/tests/utils/test_io_utils.py @@ -0,0 +1,205 @@ +import tempfile +from pathlib import Path +from os import PathLike +from string import ascii_letters +import random + +import act +from act.tests import sample_files + + +def test_read_netcdf_gztarfiles(): + with tempfile.TemporaryDirectory() as tmpdirname: + met_files = list(Path(file) for file in act.tests.EXAMPLE_MET_WILDCARD) + filename = act.utils.io_utils.pack_tar(met_files, write_directory=tmpdirname) + filename = act.utils.io_utils.pack_gzip(filename, write_directory=tmpdirname, remove=True) + ds = act.io.arm.read_arm_netcdf(filename) + ds.clean.cleanup() + + assert 'temp_mean' in ds.data_vars + + with tempfile.TemporaryDirectory() as tmpdirname: + met_files = sample_files.EXAMPLE_MET1 + filename = act.utils.io_utils.pack_gzip(met_files, write_directory=tmpdirname, remove=False) + ds = act.io.arm.read_arm_netcdf(filename) + ds.clean.cleanup() + + assert 'temp_mean' in ds.data_vars + + +def test_read_netcdf_tarfiles(): + with tempfile.TemporaryDirectory() as tmpdirname: + met_files = list(Path(file) for file in act.tests.EXAMPLE_MET_WILDCARD) + filename = act.utils.io_utils.pack_tar(met_files, write_directory=tmpdirname) + ds = act.io.arm.read_arm_netcdf(filename) + ds.clean.cleanup() + + assert 'temp_mean' in ds.data_vars + + +def test_unpack_tar(): + with tempfile.TemporaryDirectory() as tmpdirname: + + tar_file = Path(tmpdirname, 'tar_file_dir') + output_dir = Path(tmpdirname, 'output_dir') + tar_file.mkdir(parents=True, exist_ok=True) + output_dir.mkdir(parents=True, exist_ok=True) + + for tar_file_name in ['test_file1.tar', 'test_file2.tar']: + filenames = [] + for value in range(0, 10): + filename = "".join(random.choices(list(ascii_letters), k=15)) + filename = Path(tar_file, f"{filename}.nc") + filename.touch() + filenames.append(filename) + act.utils.io_utils.pack_tar(filenames, write_filename=Path(tar_file, tar_file_name), + remove=True) + + tar_files = list(tar_file.glob('*.tar')) + result = act.utils.io_utils.unpack_tar(tar_files[0], write_directory=output_dir) + assert isinstance(result, list) + assert len(result) == 10 + for file in result: + assert isinstance(file, (str, PathLike)) + + files = list(output_dir.glob('*')) + assert len(files) == 1 + assert files[0].is_dir() + act.utils.io_utils.cleanup_files(dirname=output_dir) + files = list(output_dir.glob('*')) + assert len(files) == 0 + + # Check not returing file but directory + result = act.utils.io_utils.unpack_tar(tar_files[0], write_directory=output_dir, return_files=False) + assert isinstance(result, str) + files = list(Path(result).glob('*')) + assert len(files) == 10 + act.utils.io_utils.cleanup_files(result) + files = list(Path(output_dir).glob('*')) + assert len(files) == 0 + + # Test temporary directory + result = act.utils.io_utils.unpack_tar(tar_files[0], temp_dir=True) + assert isinstance(result, list) + assert len(result) == 10 + for file in result: + assert isinstance(file, (str, PathLike)) + + act.utils.io_utils.cleanup_files(files=result) + + # Test removing TAR file + result = act.utils.io_utils.unpack_tar(tar_files, write_directory=output_dir, remove=True) + assert isinstance(result, list) + assert len(result) == 20 + for file in result: + assert isinstance(file, (str, PathLike)) + + tar_files = list(tar_file.glob('*.tar')) + assert len(tar_files) == 0 + + act.utils.io_utils.cleanup_files(files=result) + files = list(Path(output_dir).glob('*')) + assert len(files) == 0 + + not_a_tar_file = Path(tar_file, 'not_a_tar_file.tar') + not_a_tar_file.touch() + result = act.utils.io_utils.unpack_tar(not_a_tar_file, Path(output_dir, 'another_dir')) + assert result == [] + + act.utils.io_utils.cleanup_files() + + not_a_directory = '/asasfdlkjsdfjioasdflasdfhasd/not/a/directory' + act.utils.io_utils.cleanup_files(dirname=not_a_directory) + + not_a_file = Path(not_a_directory, 'not_a_file.nc') + act.utils.io_utils.cleanup_files(files=not_a_file) + + act.utils.io_utils.cleanup_files(files=output_dir) + + dir_names = list(Path(tmpdirname).glob('*')) + for dir_name in [tar_file, output_dir]: + assert dir_name, dir_name in dir_names + + filename = "".join(random.choices(list(ascii_letters), k=15)) + filename = Path(tar_file, f"{filename}.nc") + filename.touch() + result = act.utils.io_utils.pack_tar( + filename, write_filename=Path(tar_file, 'test_file_single'), remove=True) + assert Path(filename).is_file() is False + assert Path(result).is_file() + assert result.endswith('.tar') + + +def test_gunzip(): + with tempfile.TemporaryDirectory() as tmpdirname: + + filenames = [] + for value in range(0, 10): + filename = "".join(random.choices(list(ascii_letters), k=15)) + filename = Path(tmpdirname, f"{filename}.nc") + filename.touch() + filenames.append(filename) + + filename = act.utils.io_utils.pack_tar(filenames, write_directory=tmpdirname, remove=True) + files = list(Path(tmpdirname).glob('*')) + assert len(files) == 1 + assert files[0].name == 'created_tarfile.tar' + assert Path(filename).name == 'created_tarfile.tar' + + gzip_file = act.utils.io_utils.pack_gzip(filename=filename) + files = list(Path(tmpdirname).glob('*')) + assert len(files) == 2 + files = list(Path(tmpdirname).glob('*.gz')) + assert files[0].name == 'created_tarfile.tar.gz' + assert Path(gzip_file).name == 'created_tarfile.tar.gz' + + unpack_filename = act.utils.io_utils.unpack_gzip(filename=gzip_file) + files = list(Path(tmpdirname).glob('*')) + assert len(files) == 2 + assert Path(unpack_filename).name == 'created_tarfile.tar' + + result = act.utils.io_utils.unpack_tar(unpack_filename, return_files=True, randomize=True) + files = list(Path(Path(result[0]).parent).glob('*')) + assert len(result) == 10 + assert len(files) == 10 + for file in result: + assert file.endswith('.nc') + + with tempfile.TemporaryDirectory() as tmpdirname: + + filenames = [] + for value in range(0, 10): + filename = "".join(random.choices(list(ascii_letters), k=15)) + filename = Path(tmpdirname, f"{filename}.nc") + filename.touch() + filenames.append(filename) + + filename = act.utils.io_utils.pack_tar(filenames, write_directory=tmpdirname, remove=True) + files = list(Path(tmpdirname).glob('*')) + assert len(files) == 1 + files = list(Path(tmpdirname).glob('*.tar')) + assert files[0].name == 'created_tarfile.tar' + assert Path(filename).name == 'created_tarfile.tar' + + gzip_file = act.utils.io_utils.pack_gzip( + filename=filename, write_directory=Path(filename).parent, remove=False) + files = list(Path(tmpdirname).glob('*')) + assert len(files) == 2 + files = list(Path(tmpdirname).glob('*gz')) + assert files[0].name == 'created_tarfile.tar.gz' + assert Path(gzip_file).name == 'created_tarfile.tar.gz' + + unpack_filename = act.utils.io_utils.unpack_gzip( + filename=gzip_file, write_directory=Path(filename).parent, remove=False) + files = list(Path(tmpdirname).glob('*')) + assert len(files) == 2 + assert Path(unpack_filename).name == 'created_tarfile.tar' + + result = act.utils.io_utils.unpack_tar(unpack_filename, return_files=True, randomize=False, remove=True) + files = list(Path(Path(result[0]).parent).glob('*.nc')) + assert len(result) == 10 + assert len(files) == 10 + for file in result: + assert file.endswith('.nc') + + assert Path(unpack_filename).is_file() is False diff --git a/act/tests/utils/test_qc_utils.py b/act/tests/utils/test_qc_utils.py new file mode 100644 index 0000000000..9c9183202d --- /dev/null +++ b/act/tests/utils/test_qc_utils.py @@ -0,0 +1,46 @@ +import tempfile +from pathlib import Path + +import act + + +def test_calculate_dqr_times(): + ebbr1_ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_EBBR1) + ebbr2_ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_EBBR2) + brs_ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_BRS) + ebbr1_result = act.utils.calculate_dqr_times(ebbr1_ds, variable=['soil_temp_1'], threshold=2) + ebbr2_result = act.utils.calculate_dqr_times( + ebbr2_ds, variable=['rh_bottom_fraction'], qc_bit=3, threshold=2 + ) + ebbr3_result = act.utils.calculate_dqr_times( + ebbr2_ds, variable=['rh_bottom_fraction'], qc_bit=3 + ) + brs_result = act.utils.calculate_dqr_times( + brs_ds, variable='down_short_hemisp_min', qc_bit=2, threshold=30 + ) + assert ebbr1_result == [('2019-11-25 02:00:00', '2019-11-25 04:30:00')] + assert ebbr2_result == [('2019-11-30 00:00:00', '2019-11-30 11:00:00')] + assert brs_result == [('2019-07-05 01:57:00', '2019-07-05 11:07:00')] + assert ebbr3_result is None + with tempfile.TemporaryDirectory() as tmpdirname: + write_file = Path(tmpdirname) + brs_result = act.utils.calculate_dqr_times( + brs_ds, + variable='down_short_hemisp_min', + qc_bit=2, + threshold=30, + txt_path=str(write_file), + ) + + brs_result = act.utils.calculate_dqr_times( + brs_ds, + variable='down_short_hemisp_min', + qc_bit=2, + threshold=30, + return_missing=False, + ) + assert len(brs_result[0]) == 2 + + ebbr1_ds.close() + ebbr2_ds.close() + brs_ds.close() diff --git a/act/tests/utils/test_radiance_utils.py b/act/tests/utils/test_radiance_utils.py new file mode 100644 index 0000000000..e7d0b5c006 --- /dev/null +++ b/act/tests/utils/test_radiance_utils.py @@ -0,0 +1,14 @@ +import numpy as np + +import act + + +def test_planck_converter(): + wnum = 1100 + temp = 300 + radiance = 81.5 + result = act.utils.radiance_utils.planck_converter(wnum=wnum, temperature=temp) + np.testing.assert_almost_equal(result, radiance, decimal=1) + result = act.utils.radiance_utils.planck_converter(wnum=wnum, radiance=radiance) + assert np.ceil(result) == temp + np.testing.assert_raises(ValueError, act.utils.radiance_utils.planck_converter) diff --git a/act/tests/utils/test_ship_utils.py b/act/tests/utils/test_ship_utils.py new file mode 100644 index 0000000000..1d1acb388b --- /dev/null +++ b/act/tests/utils/test_ship_utils.py @@ -0,0 +1,20 @@ +import numpy as np + +import act + + +def test_calc_cog_sog(): + ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_NAV) + + ds = act.utils.calc_cog_sog(ds) + + cog = ds['course_over_ground'].values + sog = ds['speed_over_ground'].values + + np.testing.assert_almost_equal(cog[10], 170.987, decimal=3) + np.testing.assert_almost_equal(sog[15], 0.448, decimal=3) + + ds = ds.rename({'lat': 'latitude', 'lon': 'longitude'}) + ds = act.utils.calc_cog_sog(ds) + np.testing.assert_almost_equal(cog[10], 170.987, decimal=3) + np.testing.assert_almost_equal(sog[15], 0.448, decimal=3) diff --git a/act/utils/__init__.py b/act/utils/__init__.py index cd885b0cb1..867c0d8690 100644 --- a/act/utils/__init__.py +++ b/act/utils/__init__.py @@ -22,6 +22,7 @@ 'height_adjusted_temperature', 'convert_to_potential_temp', 'arm_site_location_search', + 'DatastreamParserARM', ], 'datetime_utils': [ 'dates_between',