-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtwo_phase_sampling_area_estimation.R
571 lines (437 loc) · 25.7 KB
/
two_phase_sampling_area_estimation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
#############################################################################
## This function is for computing area estimates of two-phase sampling with data from Open Foris Arena.
#
# This function can be called in the Arena's data processing chain, after running 'persist-results.R'.
#
# Required R packages (with dependencies): tidyverse, rlang, forestinventory
#
# Created by: Lauri Vesa, FAO
# Anibal Cuchietti, FAO
#
# Last update: 6.9.2024
#
#############################################################################
ronak_func <- function( df, joint_category_column, report_categories ) {
# https://stackoverflow.com/questions/67996444/how-do-i-create-a-binary-column-based-off-characters-in-another-column-in-r
df <- df %>%
mutate( TEMP_phase2 = ifelse( phase != 2 , NA, !!sym( joint_category_column) )) %>%
mutate( col = paste0( "joint_ph2_", sub( '\\d+\\.', '', TEMP_phase2))) %>%
pivot_wider( names_from = col, values_from = col,
values_fn = length, values_fill = 0) %>%
dplyr::select( -TEMP_phase2 ) %>%
# For each joint category type this is a dummy variable as an indicator of the
# number of plots of each joint (e.g. land use) category according to the 1st phase (N1)
mutate(col = paste0( "joint_ph1_", sub( '\\d+\\.', '', !!sym(joint_category_column) ))) %>%
pivot_wider( names_from = col, values_from = col,
values_fn = length, values_fill = 0) %>%
as.data.frame()
if ( all(report_categories != "" )) {
for (s_column in report_categories) {
df <- df %>%
mutate( TEMP_phase2 = ifelse( phase != 2 , NA, !!sym( s_column ))) %>%
mutate( col = paste0( "report_ph2_", s_column, "__", sub( '\\d+\\.', '', TEMP_phase2))) %>%
pivot_wider( names_from = col, values_from = col,
values_fn = length, values_fill = 0) %>%
dplyr::select( -TEMP_phase2 ) %>%
as.data.frame()
}
}
df
}
arenaTwoPhaseSampling_area_estimates <- function(arena.analyze, arena.chainSummary) {
# PACKAGES
usePackage("tidyverse")
usePackage("forestinventory")
arena.analyze$stratification <- ifelse(( arena.chainSummary$samplingStrategy == 3 | arena.chainSummary$samplingStrategy == 4 | arena.chainSummary$samplingStrategy == 5 ) & arena.chainSummary$stratumAttribute != "", TRUE, FALSE)
#######################################
# Results out as a list
Results_out <- list("ok", "Two-phase sampling: area estimation completed!", NA, NA)
#######################################
#
# Read commonAttribute (= joint category)
joint_category_ <- arena.chainSummary$commonAttribute
# If Joint category is missing, return error msg
if ( is.null(joint_category_) ) {
Results_out <- list("failed", "Two-phase sampling: Failed! Common Attribute is missing.", NA, NA)
return( Results_out )}
if ( joint_category_ == "") {
Results_out <- list("failed", "Two-phase sampling: Failed! Common Attribute is missing.", NA, NA)
return( Results_out )}
# Note, take parentCode from arena.schemaSummary in order to select the correct level!
cat_level <- arena.schemaSummary %>%
filter( name == joint_category_) %>%
select( categoryName, parentCode) %>%
mutate( level = ifelse( is.na(parentCode) | parentCode == "", 1, as.integer( stringr::str_sub( categoryName, -4, -2)))) %>%
select( level) %>%
pull()
if (cat_level > 1) print(paste0("WARNING: the common attribute is in a hierarchical table at level ", cat_level, ". It is assumed that all that category level codes are unique!"))
# get joint category label
joint_category_label_ <- paste(joint_category_, "label", sep = "_")
joint_category_Tablename <- arena.schemaSummary$categoryName[ arena.schemaSummary$name == joint_category_ ]
# take out [level_code]
joint_category_Tablename <- trimws( stringr::word( joint_category_Tablename, 1, sep= "\\["))
joint_category_Table <- categories[[ joint_category_Tablename ]] %>%
select(-uuid) %>%
filter( level == cat_level) %>%
mutate( code = as.character(code)) # Note: here we assume that all codes are unique !
rm( joint_category_Tablename )
##########################################################################
# get domain/stratum attribute
DomainAttribute_phase1 <- ""
df_Domain <- NA
if (arena.chainSummary$stratumAttribute != "" ) {
df_Domain <- categories[[ arena.chainSummary$stratumAttributeCategory ]] %>%
select( -uuid)
if ('code_joint' %in% names( df_Domain)) {
df_Domain$Domain_code = as.character( df_Domain$code_joint)
} else {
df_Domain$Domain_code = as.character( df_Domain$code)
}
# get variable name in 1st phase sample data (sampling_point_data)
domainDefaultValueExpression <- arena.schemaSummary %>%
filter(name == arena.chainSummary$stratumAttribute) %>%
select( defaultValue) %>%
pull() %>% trimws()
# Get domain attribute name in phase1 table. Note. This method does not work if domain is on level 2 or above! It must on level 1.
if ( stringr::str_detect( domainDefaultValueExpression, "categoryItemProp")) {
domainDefaultValueExpression = stringr::str_sub( domainDefaultValueExpression, 18, )
st = stringr::str_split( domainDefaultValueExpression, ",")[[1]][[2]]
st = trimws( gsub("'", '', st)) # remove single quotation marks
DomainAttribute_phase1 <- st
rm(st); rm(domainDefaultValueExpression)
} else {
DomainAttribute_phase1 <- arena.chainSummary$stratumAttribute
}
}
#########################################################################
# Get phase 1 data
data_phase1 <- categories[[ arena.chainSummary$phase1Category ]]
if ( is.null( data_phase1)) {
Results_out <- list("failed", "Two-phase sampling: Failed! Missing 1st phase category table!", NA, NA)
return( Results_out )
}
if ('code_joint' %in% colnames(data_phase1)) {
data_phase1$plot_id_ = data_phase1$code_joint
} else {
data_phase1$plot_id_ = data_phase1$level_1_code
}
data_phase1 <- data_phase1 %>%
filter(as.numeric(level_2_code) >= 1) %>%
dplyr::select(level_1_code,
level_2_code,
phase,
plot_id_,
all_of( joint_category_),
any_of( DomainAttribute_phase1)) %>%
mutate(across(everything(), as.character), phase = as.integer(phase))
if (DomainAttribute_phase1 %in% names(data_phase1)) {
data_phase1$DOMAIN <- data_phase1[[ DomainAttribute_phase1 ]]
data_phase1[[ DomainAttribute_phase1 ]] <- NULL
}
cluster_key_ <- NA
if (arena.chainSummary$clusteringEntity != "") cluster_key_ <- arena.chainSummary$clusteringEntityKeys
baseunit_key_ <- setdiff( arena.chainSummary$baseUnitEntityKeys, cluster_key_ )
if ( all( is.na(cluster_key_))) cluster_key_ <- ""
# Get active quantitative area-based categorical result variables from the Arena survey
result_attribute_names <- arena.chainSummary$resultVariables %>%
filter( active == TRUE & type == "Q" & areaBased == TRUE) %>%
mutate( name = ifelse(type == "Q", paste( name, "ha", sep = "_"), name)) %>%
select( name) %>%
pull()
reporting_categories <- arena.analyze$dimensions # arena.chainSummary$analysis$dimensions
reporting_categories_labels <- ""
computed_result_categories <- data.frame()
if (all( reporting_categories != "")) {
reporting_categories_labels <- paste( reporting_categories, "label", sep = "_")
computed_result_categories <- arena.chainSummary$resultVariables %>%
filter( active == TRUE & type == "C") %>%
filter( name %in% reporting_categories) %>%
select( name, table = categoryName )
}
# Get phase 2 data
data_phase2 <- get( arena.analyze$entity )
# Two-phase sampling, use combined attribute as stratum (Stratum__Common attribute)
if (arena.chainSummary$samplingStrategy == 5 & arena.analyze$stratification) {
strat_attribute_name <- paste( arena.chainSummary$stratumAttribute, arena.chainSummary$commonAttribute, sep = "__")
data_phase2[[strat_attribute_name]] <- paste( data_phase2[[arena.chainSummary$stratumAttribute]], data_phase2[[arena.chainSummary$commonAttribute]], sep = "__")
}
if ( arena.chainSummary$stratumAttribute != "") data_phase2$DOMAIN <- data_phase2[[ arena.chainSummary$stratumAttribute ]]
# Add labels for computed categorical result variables. These are not in the inputted data.
if ( nrow( computed_result_categories) > 0) {
for (i in(1:nrow( computed_result_categories) )) {
n = computed_result_categories$name[i]
t = categories[[ computed_result_categories$table[i] ]] %>%
dplyr::select( !!quo_name(n) := code, label) %>% # https://stackoverflow.com/questions/49650394/how-to-rename-a-variable-using-a-dynamic-name-and-dplyr
rename( !!paste0(colnames(.)[1], "_label") := 2) %>% # https://stackoverflow.com/questions/72957517/how-to-rename-a-column-using-string-from-another-column-in-r-dplyr
mutate( across( everything(), as.character))
n2 = colnames(t)[2]
if ((n %in% colnames(data_phase2)) & !(n2 %in% colnames(data_phase2))) {
data_phase2 <- data_phase2 %>%
dplyr::left_join( t, by = colnames(t)[1])
}
rm(n); rm(n2); rm(t)
}
}
# Copy cluster key attribute into "level_1_code"
if ( all( cluster_key_ != "")) {
cluster_attribute <- "level_1_code"
data_phase2 <- data_phase2 %>% unite(col = level_1_code, all_of( cluster_key_), sep = "*", remove = FALSE, na.rm = TRUE)
} else {
cluster_attribute <- NA
data_phase2$level_1_code <- ""
}
# Create "level_2_code"
data_phase2 <- data_phase2 %>% unite(col = level_2_code, all_of( baseunit_key_), sep = "*", remove = FALSE, na.rm = TRUE)
data_phase2 <- data_phase2 %>%
dplyr::select(level_1_code, level_2_code,
all_of( result_attribute_names),
any_of( reporting_categories),
any_of( reporting_categories_labels),
any_of( "DOMAIN"),
any_of( arena.chainSummary$stratumAttribute),
all_of( joint_category_),
all_of( joint_category_label_ )) %>%
mutate(phase = as.integer(2), plot_id_ = paste(level_1_code, level_2_code, sep = "*"))
# Merge the data of both phases
data_complete <- merge( data_phase1, data_phase2,
by = c("level_1_code", "level_2_code", "phase", "plot_id_", joint_category_, "DOMAIN"),
all.x = TRUE, all.y = TRUE)
######################
#### Two-phase land use type area ####
# Creation of the dummy variables for each land use
# Data frame field plots and selected attributes only
if (arena.chainSummary$stratumAttribute != "") {
cat( paste("Domain/Stratum column in 1st phase table: ", DomainAttribute_phase1), "\n")
cat( paste("Domain/Stratum column in 2nd phase table: ", arena.chainSummary$stratumAttribute), "\n")
cat( "Only 2-phase area estimates are currently verified to work. Rest of the results will be tested still.", "\n")
cat( "Processing..")
cat( "\n", "\n")
}
DomainClasses <- ""
if (DomainAttribute_phase1 != "") DomainClasses <- sort( unique( data_complete$DOMAIN ))
if (arena.chainSummary$stratumAttribute == "") {
DomainClasses <- c("0")
df_Domain <- data.frame(
Domain_code = "0",
area = 100.0,
label = "Unstratified")
if (arena.analyze$reportingArea != "") df_Domain$area <- as.numeric( arena.analyze$reportingArea)
}
jointCategoryClasses <- sort( unique( data_complete[[ joint_category_ ]]))
twophase_data <- data_complete
twophase_data <- twophase_data %>%
mutate( across( where(is.character), ~ replace_na(.x, "NA")))
twophase_data <- ronak_func( twophase_data, joint_category_, reporting_categories )
# names(twophase_data)
#### AREA ####
# Estimation of area proportions
i_count = 0
for (i_Domain in (1: length( DomainClasses))) {
Domain_area <- df_Domain %>%
filter(Domain_code == DomainClasses[i_Domain]) %>%
select( area ) %>%
as.numeric()
twophase_data_subset <- twophase_data
# get subset (i.e. Domain data)
if (arena.chainSummary$stratumAttribute != "") twophase_data_subset <- twophase_data %>% filter(DOMAIN == DomainClasses[i_Domain])
length( jointCategoryClasses)
two_phase_area <- list()
for (i in (1: length( jointCategoryClasses)) ) {
i_count <- i_count + 1
col_ph1 <- paste("joint_ph1", jointCategoryClasses[i], sep = "_")
col_ph2 <- paste("joint_ph2", jointCategoryClasses[i], sep = "_")
analysisExpression <- as.formula( paste( col_ph2, "~", col_ph1)) # https://stackoverflow.com/questions/52413017/create-formula-call-from-character-string
# compute count of samples in the land category class
samples_n1 <- sum( twophase_data_subset[[col_ph1]])
two_phase_area[[i]] <- forestinventory::twophase( formula = analysisExpression,
data = twophase_data_subset,
list(phase.col = "phase", terrgrid.id = 2), cluster = cluster_attribute)
# Summary of results
sum_Area <- two_phase_area[[i]]$estimation %>%
mutate( Report_label = joint_category_Table %>% filter(code == jointCategoryClasses[i]) %>% select(label) %>% pull()) %>%
mutate( Cover_percent = estimate * 100) %>%
mutate( Cover_SE = sqrt((estimate * (1 - estimate)/n1)) * 100) %>%
mutate( Total_area_ha = (Cover_percent/100) * Domain_area) %>%
mutate( Area_SE = sqrt(g_variance * Domain_area^2)) %>%
mutate( Domain_code = DomainClasses[i_Domain]) %>%
mutate( Report_code = jointCategoryClasses[i]) %>%
left_join( df_Domain %>% select(Domain_code, Domain_label = label), by = "Domain_code")
sum_Area = sum_Area[ , c(12:14, 7:11)]
sum_Area = cbind( sum_Area, two_phase_area[[i]]$samplesizes)
sum_Area$samples_n1 = samples_n1
if (i_count == 1) {
result_Area <- sum_Area
} else {
result_Area <- bind_rows( result_Area, sum_Area)
}
rm(col_ph1); rm(col_ph2); rm(analysisExpression)
}
}
row.names(result_Area) <- NULL
AREA_Results <- result_Area
# drop out not informative columns (n1_clust, n2_clust, n1, n2)
AREA_Results <- AREA_Results[ -c(9:12)]
names(two_phase_area) <- jointCategoryClasses
# format data similarly as in main estimation code
DF_aoi <- AREA_Results %>%
mutate( uuid = "0",
code = paste(Domain_code, Report_code, sep = "__"),
code_joint = code,
level = 1,
label = paste(Domain_label, Report_label, sep = " -- "),
area = Total_area_ha,
design_psu = 0,
design_ssu = 0) %>%
select(uuid, code, code_joint, level, label, area, design_psu, design_ssu)
# add a new category table: combined Stratum and Common attribute data, with area
categories[[ paste( arena.chainSummary$stratumAttribute, arena.chainSummary$commonAttribute, sep = "__") ]] <- DF_aoi
Results_out[[3]] <- DF_aoi
Results_out[[4]] <- twophase_data
Results_out[[5]] <- two_phase_area
rm( DF_aoi )
print( AREA_Results )
# define a folder for output files
if ( !exists('user_file_path')) user_file_path <- './user_output/'
# create a folder for files to be exported
if ( !dir.exists( user_file_path )) dir.create( user_file_path, showWarnings = FALSE )
out_path <- "area_estimates_(two-phase)/"
dir.create( paste0( user_file_path, "/", out_path), showWarnings = FALSE )
out_file_name <- paste0(user_file_path, out_path, "Arena_2phase_resuls_by_common_attribute.csv")
tryCatch({if (exists('user_file_path') & exists("AREA_Results")) write.csv(AREA_Results, out_file_name, row.names = F)},
warning = function( w ) { cat("No output - AREA_Results") },
error = function( e ) { cat("No output - AREA_Results")
})
# avoid using common, domain, clustering and base unit attribute as dimensions. Take these out.
reporting_categories <- reporting_categories[!reporting_categories %in% c(joint_category_, arena.chainSummary$stratumAttribute, baseunit_key_, cluster_key_)]
if (length( reporting_categories) == 0) reporting_categories = ""
return( Results_out )
# the rest of the code is experimental. Not used!
if ( all( reporting_categories != "")) {
# Estimation of area proportions by reporting categories
i_count <- 0
# loop across strata
for (i_Domain in (1: length( DomainClasses))) {
if (arena.chainSummary$stratumAttribute != "") twophase_data_subset <- twophase_data_subset %>% filter(DOMAIN == DomainClasses[i_Domain])
twophase_data_subset2 <- twophase_data_subset[ twophase_data_subset$DOMAIN == DomainClasses[i_Domain], ]
# get subset (i.e. Domain data)
Domain_label <- df_Domain %>% filter(Domain_code == DomainClasses[i_Domain]) %>% select(label) %>% pull()
# loop across (common) land categories in Domain [i_Domain]
for (i in (1: length( jointCategoryClasses ))) {
reportingCategoryClasses <- twophase_data_subset %>%
select( all_of( reporting_categories ))
jointCategory_label <- joint_category_Table %>% filter(code == jointCategoryClasses[i]) %>% select(label) %>% pull()
land_use_area <- AREA_Results$Total_area_ha[ AREA_Results$Domain_code == DomainClasses[i_Domain] & AREA_Results$Report_code == jointCategoryClasses[i] ]
land_use_cover <- AREA_Results$Cover_percent[ AREA_Results$Domain_code == DomainClasses[i_Domain] & AREA_Results$Report_code == jointCategoryClasses[i] ]
# loop across dimensions in land category i
for (j in (1: length( reporting_categories ))) {
Out_message <- paste0(jointCategory_label, " -- ", reporting_categories[j])
if (arena.chainSummary$stratumAttribute != "") Out_message <- paste0( Domain_label, " -- ", Out_message)
cat( paste0("Computing dimension: ", Out_message, "\n"))
unique_values_by_col <- sort( unique( reportingCategoryClasses[[j]], na.last = T))
# loop over classes in dimension j
for (k in (1:length( unique_values_by_col ))) {
i_count <- i_count + 1
attribute_label <- twophase_data_subset %>% filter( eval( parse( text = reporting_categories[j])) == unique_values_by_col[k] ) %>%
select(any_of(reporting_categories_labels[j])) %>% unique() %>% pull()
col_ph1 <- paste("joint_ph1", jointCategoryClasses[i], sep = "_")
joint_ph2 <- paste("joint_ph2", jointCategoryClasses[i], sep = "_")
col_ph2 <- paste("report_ph2_", reporting_categories[j], "__", unique_values_by_col[k], sep = "")
twophase_data_subset$col_phase2 <- twophase_data_subset[[ col_ph2 ]] * twophase_data_subset[[ joint_ph2 ]]
analysisExpression <- as.formula( paste( 'col_phase2 ~', col_ph1))
n_count <- sum( twophase_data_subset$col_phase2 )
b_small_area_estimation <- FALSE
twophase_data_subset$issmall <- NULL
# global estimator
if (!b_small_area_estimation) two_phase_area <- forestinventory::twophase( formula = analysisExpression,
data = twophase_data_subset,
list(phase.col = "phase", terrgrid.id = 2), cluster = cluster_attribute)
# Summary of results
sum_Area <- two_phase_area$estimation %>%
mutate( Cover_percent = ((estimate * 100) * 100) / land_use_cover) %>%
mutate( Cover_SE = suppressWarnings( sqrt((estimate * (1 - estimate)/n1)) * 100)) %>%
mutate( Total_area_ha = (Cover_percent/100) * land_use_area) %>%
mutate( Area_SE = suppressWarnings( sqrt(g_variance * land_use_area^2))) %>%
mutate( Domain_code = DomainClasses[i_Domain]) %>%
mutate( Report_code = jointCategoryClasses[i]) %>%
mutate( Dimension_code = unique_values_by_col[k]) %>%
mutate( Domain_label = Domain_label,
Report_label = jointCategory_label,
Dimension_label = attribute_label,
Domain_attribute_name = arena.chainSummary$stratumAttribute,
Report_attribute_name = joint_category_,
Dimension_attribute_name = reporting_categories[j])
sum_Area = sum_Area[ , c(17:19, 11:16, 7:10)]
sum_Area$n2_count = n_count
if (i_count == 1) {
result_Area <- sum_Area
} else {
result_Area <- bind_rows( result_Area, sum_Area)
}
rm(col_ph1); rm(col_ph2); rm(analysisExpression)
}
}
}
}
row.names(result_Area) <- NULL
result_Area <- result_Area %>% arrange(Domain_attribute_name, Dimension_attribute_name)
Results_out[[4]] <- result_Area
out_file_name <- paste0(user_file_path, out_path, "Arena_2phase_resuls_by_dimensions.csv")
tryCatch({if (exists('user_file_path') & exists("result_Area")) write.csv(result_Area, out_file_name, row.names = F)},
warning = function( w ) { cat("No output - Results by dimensions") },
error = function( e ) { cat("No output - Results by dimensions")
})
} # if (all( reporting_categories != ""))
return( Results_out )
}
#### Post stratification results with statistical estimators of variance
# To properly estimate the proportion of Forest estimators together with another
# post stratification variable (as ownership, stand origin, naturalness, etc) an
# extra estimate defined as the ratio of the corresponding two-phase estimates
# area needs to be produced in order to be able to present the improved two-phase
# estimates of total forest area partitioned by the used post stratification
# variable with the corresponding statistical estimates of shares of the variable
# categories on total forest area. As this statistical procedure was not available
# under R forest inventory package (in 2024), it's necessary to apply the following function
# that was build thanks to the contribution of Mr Radim Adolt, Forest specialist
# consultant from FAO.
#
# Double sampling for POST-STRATIFICATION estimator
fn_two_phase_ratio_estimator <- function(nominator, denominator)
{
t1 <- (nominator$estimation)$estimate
t2<- (denominator$estimation)$estimate
R12 <- t1/t2
u <- nominator$Rc_x_hat - denominator$mean_Rc_x_hat*R12
nominator_variable <-
substring(toString(nominator$input$formula),
gregexpr(" " , toString(nominator$input$formula))[[1]][1]+1,
gregexpr(", " , toString(nominator$input$formula))[[1]][2]-1)
nominator_variable_index <-
match(nominator_variable, names(nominator$input$data))
denominator_variable <-
substring(toString(denominator$input$formula),
gregexpr(" " , toString(denominator$input$formula))[[1]][1]+1,
gregexpr(", " , toString(denominator$input$formula))[[1]][2]-1)
denominator_variable_index <-
match(denominator_variable, names(denominator$input$data))
nominator2phase_data <-
nominator$input$data[nominator$input$data$phase==2,]
denominator2phase_data <-
denominator$input$data[denominator$input$data$phase==2,]
y1 <- nominator2phase_data[, nominator_variable_index]
y2 <- denominator2phase_data[, denominator_variable_index]
cluster <- nominator2phase_data$level_1_code
sum_y1_per_cluster <- aggregate(y1, by=list(cluster=cluster), sum)[,2]
sum_y2_per_cluster <- aggregate(y2, by=list(cluster=cluster), sum)[,2]
nplots_per_cluster <- aggregate(y2, by=list(cluster=cluster), length)[,2]
y1c <- sum_y1_per_cluster / nplots_per_cluster
y2c <- sum_y2_per_cluster / nplots_per_cluster
n1 <- (nominator$estimation)$n1
n2 <- (nominator$estimation)$n2
v1 <- 1 / t2^2 * (1 - n2/n1) / n2 / (n2-1) / mean(nplots_per_cluster)^2 *
sum(nplots_per_cluster^2 * (u - sum(u*nplots_per_cluster) /
sum(nplots_per_cluster))^2)
v2 <- 1 / t2^2 / n1 / (n2 - 1) / mean(nplots_per_cluster)^2 *
sum(nplots_per_cluster^2 * (y1c - R12*y2c)^2)
return (list(R12 = R12, var = v1 + v2, stderr = sqrt(v1 + v2)))
}