-
Notifications
You must be signed in to change notification settings - Fork 0
/
lseg_qa_test_timeseries.R
232 lines (211 loc) · 7.2 KB
/
lseg_qa_test_timeseries.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
##### This script runs QA on land segment summary stat data. It generates a txt file with list of land segments it flags.
## Last Updated 4/26/22
## HARP Group
## To change metric and QA testing value alter lines 33 and 45
## Change the .XXX label at the end of the paste statement in line 36 to correspond to metric (ex: PRC = precipitation)
## Change the numeric condition in the if statement to fit needs in line 45 (for precipitation > 1 corresponds to > 1 in/hr)
# load packages
#.libPaths("/var/www/R/x86_64-pc-linux-gnu-library/")
basepath <- '/var/www/R';
source(paste(basepath,'config.R',sep='/'))
suppressPackageStartupMessages(library(lubridate))
suppressPackageStartupMessages(library(sqldf))
#library(IHA)
#library(zoo)
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(hydrotools))
ds <- RomDataSource$new(site, rest_uname)
ds$get_token(rest_pw)
# load lseg_functions
nldas_site <- paste0(omsite,"/met/out/lseg_csv") # temporary cloud url
nldas_root=Sys.getenv(c('NLDAS_ROOT'))[1]
if(is.empty(nldas_root)) {
message("Can not locate env variable NLDAS_ROOT. Please set and tryagain (or run hspf_config to set)")
quit()
}
#source(paste(github_location,"HARParchive/HARP-2021-2022","lseg_functions.R", sep = "/"))
source(paste0(nldas_root, "/R/nldas_feature_dataset_prop.R"))
argst <- commandArgs(trailingOnly = T)
if (length(argst) < 4) {
message("Use: Rscript lseg_qa_test_timeseries.R landseg dataset landseg_ftype model_version_code ")
message("Ex: Rscript lseg_qa_test_timeseries.R A51011 1984010100-2020123123 cbp532_landseg cbp-5.3.2 ")
quit()
}
landseg = argst[1]
dataset = argst[2]
landseg_ftype = argst[3]
model_version_code = argst[4]
if (length(argst) > 4) {
# store as a run scenario?
as_scen = argst[5]
} else {
as_scen = 0
}
# Variable names
om_con <- 'om_class_Constant'
om_file <- 'external_file'
# instantiate data frames and variables for loops
i <- 1
print(paste0("current landsegment: ", landseg))
# get/set a model for this data
nldas_data <- nldas_feature_dataset_prop(ds, landseg, 'landunit', landseg_ftype, model_version_code, dataset, as_scen)
# read in lseg_csv
ts_file_url <- paste0(nldas_site,"/",dataset,"/",landseg,".PRC")
timeSeries <- fread(ts_file_url)
names(timeSeries) <- c('year','month','day','hour','tsvalue')
# code with correct input directory if running on deq machine
#timeSeries <- fread(paste0(dir, "out/lseg_csv/1984010100-2020123123/",landseg,".PRC"))
print(paste0(landseg," data read"))
data_file <- RomProperty$new(
ds,
list(
entity_type='dh_properties',propname='file',varkey=om_file,featureid=nldas_data$pid
),
TRUE
)
data_file$propcode <- ts_file_url
data_file$save(TRUE)
# line of code to help run even with incomplete lseg_csv
#timeSeries <- timeSeries[-nrow(timeSeries),]
# loops iterates through to check for abnormally values
j <- 1
allcount <- sqldf("select count(*) as num_anom from timeSeries")
ann_dat <- sqldf(
paste("
select year, round(max(tsvalue),2) as maxp, round(sum(tsvalue),2) as sump, count(*)/24 as days
from timeSeries
group by year
")
)
ann_sum <- sqldf(
"select max(sump), avg(sump), min(sump)
from ann_dat
where days >= 365
"
)
max_ann <- ann_sum$max
mean_ann <- ann_sum$avg
min_ann <- ann_sum$min
min_year <- as.integer(sqldf(paste("select year from ann_dat where sump =", min_ann)))
max_year <- as.integer(sqldf(paste("select year from ann_dat where sump =", max_ann)))
hecount <- sqldf("select count(*) as num_anom from timeSeries where tsvalue > 4.0")
decount <- sqldf(
"select count(*) as num_anom
from (
select year, month, day, sum(tsvalue) as tsvalue
from timeSeries
group by year, month, day
) as ts
where tsvalue > 20.0"
)
pcount <- sqldf("select count(*) as num_anom from timeSeries where tsvalue > 1.0")
message(paste(landseg, allcount, "values checked"))
data_status <- RomProperty$new(
ds,
list(entity_type='dh_properties',propname='status',varkey=om_con,featureid=nldas_data$pid),
TRUE
)
# flag as bad if there are anomalous values, or no values
if ( (hecount > 0) || (decount > 0) || (allcount == 0)) {
data_status$propvalue <- 0
} else {
data_status$propvalue <- 1
}
data_status$save(TRUE)
data_flagged <- RomProperty$new(
ds,
list(entity_type='dh_properties',propname='PRC_anomaly_count',varkey=om_con,featureid=nldas_data$pid),
TRUE
)
data_flagged$propvalue <- as.integer(pcount)
data_flagged$save(TRUE)
data_count <- RomProperty$new(
ds,
list(entity_type='dh_properties',propname='record_count',varkey=om_con,featureid=nldas_data$pid),
TRUE
)
data_count$propvalue <- as.integer(allcount)
data_count$save(TRUE)
data_max <- RomProperty$new(
ds,
list(entity_type='dh_properties',propname='precip_annual_max_in',varkey=om_con,featureid=nldas_data$pid),
TRUE
)
data_max$propvalue <- as.numeric(max_ann)
data_max$save(TRUE)
data_mean <- RomProperty$new(
ds,
list(entity_type='dh_properties',propname='precip_annual_mean_in',varkey=om_con,featureid=nldas_data$pid),
TRUE
)
data_mean$propvalue <- as.numeric(mean_ann)
data_mean$save(TRUE)
data_min <- RomProperty$new(
ds,
list(entity_type='dh_properties',propname='precip_annual_min_in',varkey=om_con,featureid=nldas_data$pid),
TRUE
)
data_min$propvalue <- as.numeric(min_ann)
data_min$save(TRUE)
data_min_yr <- RomProperty$new(
ds,
list(entity_type='dh_properties',propname='precip_annual_min_year',varkey=om_con,featureid=nldas_data$pid),
TRUE
)
data_min_yr$propvalue <- as.numeric(min_year)
data_min_yr$save(TRUE)
data_max_yr <- RomProperty$new(
ds,
list(entity_type='dh_properties',propname='precip_annual_max_year',varkey=om_con,featureid=nldas_data$pid),
TRUE
)
data_max_yr$propvalue <- as.numeric(max_year)
data_max_yr$save(TRUE)
de_count <- RomProperty$new(
ds,
list(entity_type='dh_properties',propname='PRC_daily_error_count',varkey=om_con,featureid=nldas_data$pid),
TRUE
)
if (!is.na(de_count$pid)) {
# delete this so as not to save old years underneath of it
#de_count$delete()
# but we CAN'T cause we don't have a rest delete entity support yet
# so just warn the user that this needs to happen
message(paste("*** WARNING: User should delete the prior version of this property, with pid = ", de_count$pid, "in order to eliminate older QA counts"))
}
de_count$propvalue <- as.integer(decount)
de_count$save(TRUE)
# todo: put summary of years with errors/anomalies
ydecount <- sqldf(
"select year, count(*) as num_anom
from (
select year, month, day, sum(tsvalue) as tsvalue
from timeSeries
group by year, month, day
) as ts
where tsvalue > 20.0"
)
for (i in 1:nrow(ydecount)) {
dinfo <- ydecount[i,]
yde_rec <- RomProperty$new(
ds,
list(entity_type='dh_properties',propname=paste('year',dinfo$year),varkey=om_con,featureid=de_count$pid),
TRUE
)
yde_rec$propvalue <- as.integer(dinfo$num_anom)
yde_rec$save(TRUE)
yde_year <- RomProperty$new(
ds,
list(entity_type='dh_properties',propname='year',varkey=om_con,featureid=yde_rec$pid),
TRUE
)
yde_year$propvalue <- as.integer(dinfo$year)
yde_year$save(TRUE)
}
he_count <- RomProperty$new(
ds,
list(entity_type='dh_properties',propname='PRC_hourly_error_count',varkey=om_con,featureid=nldas_data$pid),
TRUE
)
he_count$propvalue <- as.integer(hecount)
he_count$save(TRUE)