Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New columns added to the MET-11.0.0-beta1 output. Issue #92 #100

Merged
merged 4 commits into from
Aug 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/compare_db.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ jobs:
run: |
mkdir metdata
cd metdata
curl 'https://dtcenter.ucar.edu/dfiles/code/METplus/METdatadb/sample_data-met_out_v9.1.tgz' -o m91.tgz
curl 'https://dtcenter.ucar.edu/dfiles/code/METplus/METdataio/sample_data-met_out_v9.1.tgz' -o m91.tgz
tar -xvf m91.tgz
# Note that both XML files are taken from the new branch, so they can match if changed
- name: run METdbload prod
Expand Down
20 changes: 18 additions & 2 deletions METdbLoad/sql/mv_mysql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ CREATE TABLE line_data_ctc
fy_on INT UNSIGNED,
fn_oy INT UNSIGNED,
fn_on INT UNSIGNED,
ec_value DOUBLE DEFAULT -9999,

CONSTRAINT line_data_ctc_data_file_id_pk
FOREIGN KEY (data_file_id)
Expand Down Expand Up @@ -263,6 +264,11 @@ CREATE TABLE line_data_cts
bagss DOUBLE DEFAULT -9999,
bagss_bcl DOUBLE DEFAULT -9999,
bagss_bcu DOUBLE DEFAULT -9999,

hss_ec DOUBLE DEFAULT -9999,
hss_ec_bcl DOUBLE DEFAULT -9999,
hss_ec_bcu DOUBLE DEFAULT -9999,
ec_value DOUBLE DEFAULT -9999,

CONSTRAINT line_data_cts_data_file_id_pk
FOREIGN KEY (data_file_id)
Expand Down Expand Up @@ -893,8 +899,8 @@ CREATE TABLE line_data_vl1l2
uvfobar DOUBLE,
uvffbar DOUBLE,
uvoobar DOUBLE,
f_speed_bar DOUBLE,
o_speed_bar DOUBLE,
f_speed_bar DOUBLE DEFAULT -9999,
o_speed_bar DOUBLE DEFAULT -9999,

CONSTRAINT line_data_vl1l2_data_file_id_pk
FOREIGN KEY (data_file_id)
Expand Down Expand Up @@ -931,6 +937,8 @@ CREATE TABLE line_data_val1l2
uvfoabar DOUBLE,
uvffabar DOUBLE,
uvooabar DOUBLE,
fa_speed_bar DOUBLE DEFAULT -9999,
oa_speed_bar DOUBLE DEFAULT -9999,

CONSTRAINT line_data_val1l2_data_file_id_pk
FOREIGN KEY (data_file_id)
Expand Down Expand Up @@ -1681,6 +1689,14 @@ CREATE TABLE line_data_vcnt
dir_abserr DOUBLE,
dir_abserr_bcl DOUBLE,
dir_abserr_bcu DOUBLE,
anom_corr DOUBLE DEFAULT -9999,
anom_corr_ncl DOUBLE DEFAULT -9999,
anom_corr_ncu DOUBLE DEFAULT -9999,
anom_corr_bcl DOUBLE DEFAULT -9999,
anom_corr_bcu DOUBLE DEFAULT -9999,
anom_corr_uncntr DOUBLE DEFAULT -9999,
anom_corr_uncntr_bcl DOUBLE DEFAULT -9999,
anom_corr_uncntr_bcu DOUBLE DEFAULT -9999,

CONSTRAINT line_data_vcnt_data_file_id_pk
FOREIGN KEY (data_file_id)
Expand Down
12 changes: 8 additions & 4 deletions METdbLoad/ush/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@
'si', 'si_bcl', 'si_bcu']

LINE_DATA_FIELDS[CTC] = TOT_LINE_DATA_FIELDS + \
[FY_OY, FY_ON, FN_OY, FN_ON]
[FY_OY, FY_ON, FN_OY, FN_ON, EC_VALUE]

LINE_DATA_FIELDS[CTS] = ALPH_LINE_DATA_FIELDS + \
[BASER, 'baser_ncl', 'baser_ncu', 'baser_bcl', 'baser_bcu',
Expand All @@ -408,7 +408,8 @@
'seds', 'seds_ncl', 'seds_ncu', 'seds_bcl', 'seds_bcu',
'edi', 'edi_ncl', 'edi_ncu', 'edi_bcl', 'edi_bcu',
'sedi', 'sedi_ncl', 'sedi_ncu', 'sedi_bcl', 'sedi_bcu',
'bagss', 'bagss_bcl', 'bagss_bcu']
'bagss', 'bagss_bcl', 'bagss_bcu', 'hss_ec', 'hss_ec_bcl',
'hss_ec_bcu', EC_VALUE]

LINE_DATA_FIELDS[DMAP] = ALPH_LINE_DATA_FIELDS + \
['fy', 'oy', 'fbias', 'baddeley', 'hausdorff',
Expand Down Expand Up @@ -551,7 +552,7 @@

LINE_DATA_FIELDS[VAL1L2] = TOT_LINE_DATA_FIELDS + \
['ufabar', 'vfabar', 'uoabar', 'voabar', 'uvfoabar', 'uvffabar',
'uvooabar']
'uvooabar', 'fa_speed_bar', 'oa_speed_bar']

LINE_DATA_FIELDS[VCNT] = ALPH_LINE_DATA_FIELDS + \
['fbar', 'fbar_bcl', 'fbar_bcu', 'obar', 'obar_bcl', 'obar_bcu',
Expand All @@ -568,7 +569,10 @@
'speed_err', 'speed_err_bcl', 'speed_err_bcu',
'speed_abserr', 'speed_abserr_bcl', 'speed_abserr_bcu',
'dir_err', 'dir_err_bcl', 'dir_err_bcu',
'dir_abserr', 'dir_abserr_bcl', 'dir_abserr_bcu']
'dir_abserr', 'dir_abserr_bcl', 'dir_abserr_bcu',
'anom_corr', 'anom_corr_ncl', 'anom_corr_ncu',
'anom_corr_bcl', 'anom_corr_bcu', 'anom_corr_uncntr',
'anom_corr_uncntr_bcl', 'anom_corr_uncntr_bcu']

COLUMNS[TCMPR] = ['total', 'index_pair', 'level', 'watch_warn', 'initials', 'alat',
'alon',
Expand Down
21 changes: 12 additions & 9 deletions METdbLoad/ush/read_data_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def read_data(self, load_flags, load_files, line_types):
# Add the code that describes what kind of file this is - stat, vsdb, etc
self.data_files[CN.DATA_FILE_LU_ID] = \
np.vectorize(self.get_lookup)(self.data_files[CN.FULL_FILE])

# Drop files that are not of a valid type
self.data_files.drop(self.data_files[self.data_files[CN.DATA_FILE_LU_ID] ==
CN.NO_KEY].index, inplace=True)
Expand Down Expand Up @@ -196,7 +196,7 @@ def read_data(self, load_flags, load_files, line_types):
# put space in front of hyphen between numbers in case space is missing
# FHO can have negative thresh - fix with regex, only between numbers
split_file.iloc[:, 1] = \
split_file.iloc[:, 1].str.replace(r'(\d)-(\d)', r'\1 -\2',
split_file.iloc[:, 1].str.replace(r'(\d)-(\d)', r'\1 -\2',
regex=True)

# merge the two halves together again
Expand Down Expand Up @@ -237,7 +237,7 @@ def read_data(self, load_flags, load_files, line_types):
# Process mode files
#
elif lu_id in (CN.MODE_CTS, CN.MODE_OBJ):

# Get the first line of the mode cts or obj file that has the headers
file_hdr = pd.read_csv(filename, delim_whitespace=True,
nrows=1)
Expand Down Expand Up @@ -1167,28 +1167,31 @@ def read_stat(self, filename, hdr_names):
Returns:
all the stat lines in a dataframe, with dates converted to datetime
"""
# switched to python engine for python 3.8
# switched to python engine for python 3.8 and pandas 1.4.2
# switched back to c version for pandas 1.2.3
# added the low_memory=False option when getting a DtypeWarning
# to switch to python version: low_memory=False -> engine='python'
return pd.read_csv(filename, delim_whitespace=True,
names=hdr_names, skiprows=1,
parse_dates=[CN.FCST_VALID_BEG,
CN.FCST_VALID_END,
CN.OBS_VALID_BEG,
CN.OBS_VALID_END],
date_parser=self.cached_date_parser,
keep_default_na=False, na_values='', engine='python')
keep_default_na=False, na_values='', low_memory=False)

def read_tcst(self, filename, hdr_names):
""" Read in all of the lines except the header of a tcst file.
Returns:
all the tcst lines in a dataframe, with dates converted to datetime
"""
# switched to python engine for python 3.8
# added the low_memory=False option when getting a DtypeWarning
return pd.read_csv(filename, delim_whitespace=True,
names=hdr_names, skiprows=1,
parse_dates=[CN.INIT,
CN.VALID],
date_parser=self.cached_date_parser,
keep_default_na=False, na_values='', engine='python')
keep_default_na=False, na_values='', low_memory=False)

def cached_date_parser(self, date_str):
""" if date is repeated and already converted, return that value.
Expand All @@ -1211,10 +1214,10 @@ def read_mode(self, filename, hdr_names):
Returns:
all the mode lines in a dataframe, with dates converted to datetime
"""
# switched to python engine for python 3.8
# added the low_memory=False option when getting a DtypeWarning
return pd.read_csv(filename, delim_whitespace=True,
names=hdr_names, skiprows=1,
parse_dates=[CN.FCST_VALID,
CN.OBS_VALID],
date_parser=self.cached_date_parser,
keep_default_na=False, na_values='', engine='python')
keep_default_na=False, na_values='', low_memory=False)