From 2c3b1677d660579d119909be9dadb084f0ca6297 Mon Sep 17 00:00:00 2001
From: Dongdong <kongdd.sysu@gmail.com>
Date: Mon, 23 Jul 2018 18:08:24 +1000
Subject: [PATCH] second test whittaker lambda formula

---
 DESCRIPTION                                   |   2 +-
 NEWS.md                                       |   6 +
 R/tools.R                                     |  35 +++-
 man/GOF.Rd                                    |  13 ++
 test/07_whit/03_evaluate_Whittaker_GOF.R      |  59 ++++--
 test/07_whit/04_read_GEE_whitMatrix.R         |  29 +--
 test/07_whit/main_phenofit_test.R             | 195 +++++++++++++-----
 .../whit_lambda/00_resample_points_1e3.R      |  19 ++
 .../07_whit/whit_lambda/02_whit_lambda_main.R |   5 +-
 test/GEE/read_gee.R                           |   9 +
 test/data/d01_GPPobs.R                        |  40 ++--
 test/stable/load_pkgs.R                       | 100 ++++++++-
 test/test-whit/whit/test-vcurve.R             |   3 -
 13 files changed, 374 insertions(+), 141 deletions(-)
 create mode 100644 NEWS.md
 create mode 100644 test/07_whit/whit_lambda/00_resample_points_1e3.R
 create mode 100644 test/GEE/read_gee.R
 delete mode 100644 test/test-whit/whit/test-vcurve.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 9d32954a..35dd8cea 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -8,7 +8,7 @@ Description: A remote sensing vegetation index phenology extraction package.
 License: MIT | file LICENSE
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 6.0.1.9000
+RoxygenNote: 6.0.1
 LinkingTo: Rcpp, RcppArmadillo
 Imports: Rcpp, RcppArmadillo, 
     tibble, dplyr, purrr, stringr, tidyr, ggplot2, lubridate, 
diff --git a/NEWS.md b/NEWS.md
new file mode 100644
index 00000000..fcbcfbcd
--- /dev/null
+++ b/NEWS.md
@@ -0,0 +1,6 @@
+# phenofit 0.1.2
+
+* Added a `NEWS.md` file to track changes to the package.
+
+
+
diff --git a/R/tools.R b/R/tools.R
index d36ba37d..7835a7b4 100644
--- a/R/tools.R
+++ b/R/tools.R
@@ -199,22 +199,39 @@ cv_coef <- function(x, w){
 #' @param Y_sim Numeric vector, corresponding simulated values
 #' @param w Numeric vector, weights of every points
 #' @param include.cv If true, cv will be returned. 
+#' 
+#' @return 
+#' \itemize{
+#' \item \code{RMSE} root mean square error
+#' \item \code{NSE} NASH coefficient
+#' \item \code{R2} correlation of determination
+#' \item \code{Bias} bias
+#' \item \code{Bias_perc} bias percentage 
+#' \item \code{MAE} mean absolute error 
+#' \item \code{pvalue} pvalue of \code{R}
+#' \item \code{n_sim} number of valid obs.
+#' \item \code{R} pearson correlation
+#' }
 #' @export
 GOF <- function(Y_obs, Y_sim, w, include.cv = FALSE){
     if (missing(w)) w <- rep(1, length(Y_obs))
 
-    # remove NA values in Y_sim, Y_obs and w
-    I <- which(!(is.na(Y_sim) | is.na(Y_obs) | is.na(w)))
+    # remove NA and Inf values in Y_sim, Y_obs and w
+    valid <- function(x) !is.na(x) & is.finite(x) 
+    
+    I <- which(valid(Y_sim) & valid(Y_obs) & valid(w))
     # n_obs <- length(Y_obs)
     n_sim <- length(I)
 
     Y_sim <- Y_sim[I]
     Y_obs <- Y_obs[I]
+    w     <- w[I]
 
     if (include.cv) CV <- cv_coef(Y_obs, w)
     if (is_empty(Y_obs)){
-        out <- c(Bias = NA, MAE = NA,RMSE = NA, NSE = NA, R2 = NA,
-             pvalue = NA, n_sim = NA, R = NA)
+        out <- c(RMSE = RMSE, NSE = NSE, R2 = R2, MAE = MAE, 
+            Bias = Bias, Bias_perc = Bias_perc, 
+            R = NA, pvalue = NA, n_sim = NA)
         if (include.cv) out <- c(out, CV)
         return(out) #R = R,
     }
@@ -228,10 +245,12 @@ GOF <- function(Y_obs, Y_sim, w, include.cv = FALSE){
 
     SSR    <- sum( (Y_sim - y_mean)^2 * w) 
     SST    <- sum( (Y_obs - y_mean)^2 * w)
-    R2     <- SSR / SST
+    # R2     <- SSR / SST
+    R2     <- summary(lm(Y_sim ~ Y_obs))$r.squared
 
     RE     <- Y_sim - Y_obs
     Bias   <- sum ( w*RE)     /sum(w)                     # bias
+    Bias_perc <- Bias/y_mean                              # bias percentage        
     MAE    <- sum ( w*abs(RE))/sum(w)                     # mean absolute error
     RMSE   <- sqrt( sum(w*(RE)^2)/sum(w) )                # root mean sqrt error
 
@@ -251,13 +270,15 @@ GOF <- function(Y_obs, Y_sim, w, include.cv = FALSE){
         message(e$message)
     })
 
-    out <- c(Bias = Bias, MAE = MAE,RMSE = RMSE, NSE = NSE, R2 = R2,
-             pvalue = pvalue, n_sim = n_sim, R = R)
+    out <- c(RMSE = RMSE, NSE = NSE, R2 = R2, MAE = MAE, 
+             Bias = Bias, Bias_perc = Bias_perc, 
+             R = R, pvalue = pvalue, n_sim = n_sim)
     if (include.cv) out <- c(out, CV)
     return(out)
 }
 
 
+
 #' Determinated correlation critical value
 #' 
 #' @param n length of observation.
diff --git a/man/GOF.Rd b/man/GOF.Rd
index 746c1f45..0b107ac5 100644
--- a/man/GOF.Rd
+++ b/man/GOF.Rd
@@ -15,6 +15,19 @@ GOF(Y_obs, Y_sim, w, include.cv = FALSE)
 
 \item{include.cv}{If true, cv will be returned.}
 }
+\value{
+\itemize{
+\item \code{RMSE} root mean square error
+\item \code{NSE} NASH coefficient
+\item \code{R2} correlation of determination
+\item \code{Bias} bias
+\item \code{Bias_perc} bias percentage 
+\item \code{MAE} mean absolute error 
+\item \code{pvalue} pvalue of \code{R}
+\item \code{n_sim} number of valid obs.
+\item \code{R} pearson correlation
+}
+}
 \description{
 Good of fitting
 }
diff --git a/test/07_whit/03_evaluate_Whittaker_GOF.R b/test/07_whit/03_evaluate_Whittaker_GOF.R
index 950d2ce7..9232a91b 100644
--- a/test/07_whit/03_evaluate_Whittaker_GOF.R
+++ b/test/07_whit/03_evaluate_Whittaker_GOF.R
@@ -19,6 +19,7 @@ if (!file.exists(file)){
 }else{
     load(file)
 }
+
 # load("D:/Documents/GoogleDrive/phenofit.rda")
 
 methods <- c('AG', 'BECK', 'ELMORE', 'ZHANG', 'whit_R', 'whit_gee')[-5]
@@ -29,35 +30,58 @@ prefix <- c("phenoflux", "phenocam")[i]
 df <- if(i == 1) df_flux else df_cam
 st <- if(i == 1) st_flux else st_cam
 
-df      <- df[iters == "iter2"]
 st$IGBPname %<>% factor(IGBPnames_006)
+st <- st[order(IGBPname,site), ] # reorder according to IGBP
+st[site %in% sites_sel, titlestr := sprintf("(%s) %s, %s", letters[1:.N],site, IGBPname)]
 
 # make sure different curve fitting methods have the same length fitting
-formula <- if(i == 1) formula(site+date+t+y+GPP_NT+GPP_DT+SummaryQA~meth) else
-    formula(site+date+t+y+gcc+vci+SummaryQA~meth)
-
-over_perform(df, formula, prefix)
+formula <- if(i == 1) formula(site+date+t+y+GPP_NT+GPP_DT+SummaryQA+iters~meth) else
+    formula(site+date+t+y+gcc+vci+SummaryQA+iters~meth)
+IGBP.all = F
+if (i == 1){
+    over_perform(df[iters == "iter2"], formula, prefix, IGBP.all = IGBP.all)
+} else{
+    over_perform(df[iters == "iter2"], formula, prefix, ylim2 = c(64, 100), IGBP.all = IGBP.all)
+}
 
 # site figure data input
-
 df_trim <- dcast(df, formula, value.var = "value", fun.aggregate = mean) # %>% na.omit()
 df_trim$SummaryQA %<>% factor(qc_levels)
 # df_trim <- melt(df_trim, measure.vars = methods, variable.name = "meth")
 
-sites <- unique(df$site)
+sites    <- st$site
+# sites <- unique(df$site)
 sitename <- sites[100]
 
-vars <- c("get_range", "save_pdf", "lgd_vci", "lgd_gpp",
-          "qc_levels", "qc_colors", "qc_shapes", "methods")
-cl <- cluster_Init(pkgs = c("data.table", "ggplot2", "magrittr"))
-clusterExport(cl, vars)
-res <- parLapplyLB(cl, sites, plot_whit,
-                   df_trim, st, prefix_fig = paste0("whit_", prefix))
-for (i in seq_along(sites)){
-    runningId(i)
-    sitename <- sites[i]
-    plot_whit(sitename, df_trim, st, prefix_fig = paste0("whit_", prefix))
+# vars <- c("get_range", "save_pdf", "lgd_vci", "lgd_gpp",
+#           "qc_levels", "qc_colors", "qc_shapes", "methods")
+# cl <- cluster_Init(pkgs = c("data.table", "ggplot2", "magrittr"))
+# clusterExport(cl, vars)
+# res <- parLapplyLB(cl, sites, plot_whit,
+#                    df_trim, st, prefix_fig = paste0("whit_", prefix))
+
+plot_whits <- function(sites, df_trim, file){
+    ps <- list()
+    for (i in seq_along(sites)){
+        runningId(i)
+        sitename <- sites[i]
+        # for single method, ggplot obj return
+        ps[[i]] <- plot_methods(sitename, df_trim, st, prefix_fig = paste0("whit_", prefix), methods = "whit_gee", show.legend = F)
+        # print(p)
+        # for all methods, grob obj return
+        # p <- plot_methods(sitename, df_trim, st, prefix_fig = paste0("whit_", prefix), methods)
+    }
+
+    ylab_r  <- expression("GPP ( gC "*mm^-1*d^-1*" )")
+    # ylab_r  <- "VCI"
+    FigsToPages(ps, lgd_gpp, ylab_r, file, width = 10)
 }
+plot_whits(sites, df_trim, "gee_whit_flux166.pdf") # all sites
+
+## 2. select representative points for fluxnet
+sites_sel <- c("RU-Fyo", "FR-Pue", "US-UMB", "BE-Vie", "US-KS2", "US-Whs",
+    "AU-How", "AU-Dry", "CH-Fru", "US-Los", "DE-Geb")
+plot_whits(sites_sel, df_trim, "gee_whit_flux11.pdf") # all sites
 
 # merge_pdf('../whit_phenoflux166.pdf', indir = "./")
 # merge_pdf('whit_phenoflux166.pdf', indir = "Figure/")
@@ -77,6 +101,5 @@ merge_pdf('whit_phenoflux166.pdf', indir = "Figure", "whit_phenoflux.*.pdf", del
 #                                   "snow/ice" = "#F8766D", "cloud" = "#C77CFF"), drop = F) +
 #     scale_shape_manual(values = c(19, 15, 4, 17), drop = F) +
 #     scale_y_continuous(lim = lim_raw)
-
 # p3 <- ggplot_dual_axis(p1, p2) #%>% as.ggplot()
 # p <- ggplot_dual_axis(p3, p0, add_yaxis_r = F)
diff --git a/test/07_whit/04_read_GEE_whitMatrix.R b/test/07_whit/04_read_GEE_whitMatrix.R
index 763d0a7e..d08389e8 100644
--- a/test/07_whit/04_read_GEE_whitMatrix.R
+++ b/test/07_whit/04_read_GEE_whitMatrix.R
@@ -26,7 +26,7 @@ lgd   <- phenofit:::make_legend(linename = c("iter1", "iter2"),
                                 linecolor = c("blue", "red"))
 
 file <- "whit_GEE.pdf"
-Cairo::CairoPDF(file, 10, 4)
+# Cairo::CairoPDF(file, 10, 4)
 # par(mfrow = c(4, 1), mar = c(1, 2, 3, 1), mgp = c(1.5, 0.6, 0))
 ## sometimes sample and reduceRegions result is different
 ps  <- list()
@@ -64,30 +64,7 @@ for (i in seq_along(sites)){
     # grid.draw(p2)
 }
 
-dev.off()
-file.show(file)
+# dev.off()
+# file.show(file)
 
 file = "Fig3_whit_point_example.pdf"
-Cairo::CairoPDF(file, 10, nrow*1.6)
-
-nrow  <- 6
-npage <- ceiling(length(ps)/nrow)
-for (i in 1:npage){
-    runningId(i)
-
-    I_beg <- (i - 1) * nrow + 1
-    I_end <- min(i*nrow, length(ps))
-
-    x  <- c(ps[I_beg:I_end], list(lgd))
-    nx <- length(x)
-    x[[nx - 1]] <- x[[nx - 1]] +
-        theme(axis.title.x = element_text(),
-              axis.text.x = element_text())
-    p2 <- gridExtra::arrangeGrob(grobs = x, nrow = nx, ncol = 1,
-                                 heights = c(rep(5, nx - 2), 5.5, 1),padding = unit(1, "line"),
-                                 left = textGrob("EVI", rot = 90, gp=gpar(fontsize=14)))
-    if (i != 1) grid.newpage();
-    grid.draw(p2)
-}
-dev.off()
-file.show(file)
diff --git a/test/07_whit/main_phenofit_test.R b/test/07_whit/main_phenofit_test.R
index 403e54d6..8f555492 100644
--- a/test/07_whit/main_phenofit_test.R
+++ b/test/07_whit/main_phenofit_test.R
@@ -6,9 +6,14 @@ fontsize = 14
 save_pdf <- function(file = "Rplot.pdf", width = 10, height = 5, p, open = F){
     if (missing(p)) p <- last_plot()
 
-    FUN <- print
-    if ("grob" %in% class(p)) FUN <- grid::grid.draw
 
+    if ("grob" %in% class(p)) {
+        FUN <- grid::grid.draw
+    } else{
+        FUN <- print
+    }
+    print(FUN)
+    # Cairo::CairoPDF, if only one figure cairo_pdf is the best
     Cairo::CairoPDF(file, width = width, height = height)
     FUN(p)
     dev.off()
@@ -68,13 +73,18 @@ agree_index <- function(Y_obs, Y_sim){
 }
 
 # boxplot for over all correlation and agreement index
-boxplot <- function(p, width = 0.9){
+boxplot <- function(p, width = 0.95){
+    # width  <- 0.95
+    width2 <- width - 0.15
+    dodge <- position_dodge(width = width)
+
     p + stat_summary(fun.data = box_qtl,
-                 position = position_dodge(width = width),
-                 geom = "errorbar", width = width) +
-    geom_boxplot2(coef = 0, width = width,
+                     position = dodge,
+                     geom = "errorbar", width = width2) +
+        geom_boxplot2(coef = 0,
+                  width = width2,
                  lwd = 0.3,
-                 notch = F, outlier.shape = NA) +
+                 notch = F, outlier.shape = NA, position=dodge) +
     theme_light(base_size = fontsize, base_family = "Arial") +
     theme(legend.position = c(1-0.01, 0.01), legend.justification = c(1, 0),
           panel.grid.major = element_line(linetype = 2),
@@ -133,30 +143,60 @@ get_phenofit_result <- function(infile){
 methods <- c('AG', 'BECK', 'ELMORE', 'ZHANG', 'whit_R', 'whit_gee')[-5]
 #' @examples
 #' over_perform(df, formula, prefix)
-over_perform <- function(df, formula, prefix){
+over_perform <- function(d, formula, prefix, ylim2, IGBP.all = F){
     # only period when all curve fitting methods have result is kept.
-    df_trim <- dcast(df, formula, value.var = "value", fun.aggregate = mean) %>% na.omit()
-    df_trim <- melt(df_trim, measure.vars = methods, variable.name = "meth")
+    df_trim <- dcast(d, formula, value.var = "value", fun.aggregate = mean) %>% na.omit()
+    df_trim[, raw := y]
+    df_trim <- melt(df_trim, measure.vars = c( "raw", methods), variable.name = "meth")
+
+    new_levels <- c("raw ", "AG ", "Beck ", "Elmore ", "Zhang ", "WH")
+    cols <- hue_pal()(5) %>% c("black", .)%>% set_names(new_levels)
 
+    df_trim$meth %<>% mapvalues(c("raw", methods), new_levels)
     # visualization
-    info_ai <- df_trim[SummaryQA == "good", .(ai = agree_index(y, value)), .(site, meth)] %>% merge(st)
+    info_ai <- df_trim[SummaryQA == "good" & meth != "raw ",
+                       .(ai = agree_index(y, value)), .(site, meth)] %>% merge(st)
     if(i == 1){
         info_r  <- df_trim[, .(R = stat_fun(value, GPP_DT)), .(site, meth)] %>% merge(st)
     }else{
         info_r  <- df_trim[, .(R = stat_fun(value, vci)), .(site, meth)] %>% merge(st)
     }
 
-    # 1. show correlation
-    p1 <- ggplot(info_r, aes(IGBPname, R, colour = meth), position = "dodge") %>%
-        boxplot() %>% `+`(labs(x = "IGBP", y = "Correlation (r)"))
+    # add a column 'all', independent of IGBP
+    add_IGBPall <- . %>% {
+        temp <- .; temp$IGBPname <- "all"
+        rbind(., temp)
+    }
+    if (IGBP.all){
+        info_ai %<>% add_IGBPall()
+        info_r  %<>% add_IGBPall()
+    }
 
-    p2 <- ggplot(info_ai, aes(IGBPname, ai, colour = meth), position = "dodge") %>%
+    # 1. show correlation, , alpha = 0.6, fill = meth
+    p1 <- { ggplot(info_r, aes(IGBPname, R, color = meth), position = "dodge") +
+                scale_colour_manual(values = cols,
+                                    guide = guide_legend(direction = "horizontal", nrow = 1, keywidth = 1)) +
+                # scale_fill_manual(values = cols) +
+                labs(x = "IGBP", y = "Correlation (r)")
+          } %>% boxplot()
+    p1 <- p1 +
+        # geom_text(data = info_r[1, ], aes(fontface = "bold"), x = -Inf, y = -Inf,
+        #           label = c("(a)"), hjust = -1, vjust = -2.6,
+        #           show.legend = F, size = 4) +
+        theme(axis.text.x = element_blank(),
+              axis.title.x = element_blank(),
+              plot.margin = margin(3, 3, 2, 3))
+
+    p2 <- {ggplot(info_ai, aes(IGBPname, ai, colour = meth), position = "dodge") +
+        scale_colour_manual(values = cols) } %>%
         boxplot() %>% `+`(labs(x = "IGBP", y = "Agreement Index (AI)"))
-    p2 <-p2 + theme(legend.position = 'none')
+    p2 <-p2 + theme(legend.position = 'none',
+                    plot.margin = margin(3, 3, 2, 3))
 
-    p <- gridExtra::arrangeGrob(p1, p2, nrow = 2, heights = c(1, 1), padding = unit(1, "line"))
-
-    save_pdf(sprintf("valid_%s.pdf", prefix), 9, 8, p)
+    if (!missing(ylim2)) p2 <- p2 + coord_cartesian(ylim = ylim2)
+    p <- gridExtra::arrangeGrob(p1, p2, nrow = 2, heights = c(0.9, 1), padding = unit(1, "line"))
+    # grid.draw(p)
+    save_pdf(sprintf("valid_%s.pdf", prefix), 9, 8, p, open = T)
     # save_pdf(sprintf("valid_%s_AI.pdf", prefix), 12, 5, p2)
 }
 
@@ -200,36 +240,69 @@ get_range <- function(d, alpha = c(0, 1)){
     unlist(res)
 }
 
-lgd_vci <- phenofit:::make_legend(linename = c("Curve fitting", "VCI"),
-                   linecolor = c("black", "blue"))
-lgd_gpp <- phenofit:::make_legend(linename = c("Curve fitting", "GPP"),
-                   linecolor = c("black", "blue"))
-
+# re-select colors
+cols <- colors()[c(74, 134, 50)]
+cols <- c("blue", "red", cols[3]) #"#B2B302"
+color_valid <- cols[3] #"green4"
+
+lgd_vci <- phenofit:::make_legend(linename = c("iter1", "iter2", "VCI"),
+                   linecolor = cols)
+lgd_gpp <- phenofit:::make_legend(linename = c("iter1", "iter2", "GPP"),
+                   linecolor = cols)
+
+#' plot_methods
+#'
+#' plot curve fitting series and validation data (i.e.GPP or VCI ) to check the
+#' smoothing performance
+#'
+#' @param df_trim A data.table of two iters curve fitting result.
+#' For MODIS product, the current year last value maybe same as
+#' the coming year first value. Need to remove the duplicated data. Besides,
+#' We don't constrain the equal length of different curve fitting series as
+#' performance index part.
+#' @param st A dataframe of station information, ID, site, IGBPname, lat, lon.
+#' @param methods one of 'AG', 'BECK', 'ELMORE', 'ZHANG', 'whit_R' and 'whit_gee'.
+#'
 #' @examples
+#' \dontrun{
 #' plot_whit(sitename, df_trim, st, prefix_fig = "whit")
-plot_whit <- function(sitename, df_trim, st, prefix_fig = "whit"){
+#' }
+plot_methods <- function(sitename, df_trim, st, prefix_fig = "whit", methods, show.legend = T){
     ## figure title and filename
     sp    <- st[site == sitename, ] # station point
-    titlestr <- with(sp, sprintf('[%03d,%s] %s, lat = %5.2f, lon = %6.2f',
+    # titlestr <- with(sp, sprintf('[%03d,%s] %s, lat = %5.2f, lon = %6.2f',
+    #                                  ID, site, IGBPname, lat, lon))
+    titlestr <- sp$titlestr
+    if ( length(titlestr) == 0 || is.na(titlestr) ){
+        titlestr <- with(sp, sprintf('[%03d,%s] %s, lat = %5.2f, lon = %6.2f',
                                      ID, site, IGBPname, lat, lon))
+    }
     file_pdf <- sprintf('Figure/%s_[%03d]_%s.pdf', prefix_fig, sp$ID[1], sp$site[1])
 
     ##
-    x <- df_trim[site == sitename, ]
+    x <- df_trim[site == sitename , ]
     if (all(is.na(x$whit_gee))) return()
     # d <- melt(x, measure.vars = methods, variable.name = "meth")
-    d <- melt(x, measure.vars = c(colnames(x)[c(4, 5:6)], methods), variable.name = "meth")
-    ## scale validation variable (e.g. GPP or VCI), to keep the same range as
-    # `whit_gee`
-    lim_fit   <- get_range(d[ grep("whit_gee", meth)], alpha = c(0.01, 0.99))
-    lim_valid <- get_range(d[ grep("GPP|vci", meth)], alpha = c(0.01, 0.99))
-
+    d <- melt(x,
+              measure.vars = c(contain(x, "^y$|GPP_NT|GPP_DT|vci|gcc"), methods),
+              variable.name = "meth")
+    pdat  <- d[meth %in% methods]
+
+    ## scale validation variable (e.g. GPP or VCI)
+    # The range of validation variable and \code{whit_gee} should be equal.
+    d_valid_scale <- d[meth %in% c("whit_gee", "GPP_DT", "vci") & iters == "iter2"] %>%
+        dcast(site+date~meth, value.var = "value") %>% na.omit() %>%
+        melt(id.vars = c("site", "date"), variable.name = "meth")
+    lim_fit   <- get_range(d_valid_scale[ grep("whit_gee", meth)], alpha = c(0.01, 0.99))
+    lim_valid <- get_range(d_valid_scale[ grep("GPP|vci", meth)], alpha = c(0.01, 0.99))
     lim_raw   <- get_range(d[ grep("y", meth)]) # ylim
 
     # r <- phenofit:::.normalize(y_fit, y_raw)
     # lim_valid_adj <- lm(lim_valid~r) %>% predict(data.frame(r = c(0, 1)))
     coef <- lm(lim_valid~lim_fit) %>% coef() # coefs used to rescale validation data
 
+    ## only keep iter2
+    x <- df_trim[site == sitename & iters == "iter2", ]
     if ("vci" %in% colnames(x)){
         d_valid <- x[, .(valid = (vci - coef[1])/coef[2]), .(site, date, t)]
         ylab_r  <- "VCI"
@@ -244,34 +317,58 @@ plot_whit <- function(sitename, df_trim, st, prefix_fig = "whit"){
 
     ## ggplot, not only whit_gee, I also need to know all curve fitting methods
     #  performance
-    p1 <- ggplot(d[-grep("y|GPP|vci|gcc", meth)], aes(date, value)) +
-        geom_line(color = "black", size = 0.9) +
-        geom_line(data = d_valid, aes(date, valid), size = 0.9, color = "blue") +
-        geom_point(data = d_raw, aes(date, y, shape = SummaryQA, color = SummaryQA), size = 1.2) +
+    # y|GPP|vci|gcc|
+
+    IsSingle <- length(methods) == 1
+    if (IsSingle){
+        d_lab <- data.frame(meth = methods, lab = titlestr)
+    }else{
+        d_lab <- data.frame(meth = methods,
+                         lab = sprintf("(%s) %-8s", letters[1:length(methods)], methods))
+    }
+    lwd <- 0.65
+    # color_valid <- "green4" # set as global variable
+    p1 <- ggplot(pdat, aes(date, value)) +
+        geom_vline(xintercept = ymd(0101 + (2001:2017)*1e4),
+                   color = "white", linetype = 3, size = 0.4) +
+        geom_point(data = d_raw, aes(date, y, shape = SummaryQA, color = SummaryQA), size = 1.4) +
+        geom_line(data = pdat[iters == "iter1"], color = "blue", size = lwd) +
+        geom_line(data = pdat[iters == "iter2"], color = "red", size = lwd) +
+        geom_line(data = d_valid, aes(date, valid), size = lwd, color = color_valid) +
         labs(y = "EVI") +
         theme(legend.position = "none",
-              axis.text.y.right = element_text(color = "blue"),
-              axis.title.y.right = element_text(color = "blue"),
+              # axis.text.y.left = element_text(color = cols[2]), #iter2 color
+              # axis.title.y.left = element_text(color = cols[2]),
+              axis.text.y.right = element_text(color = color_valid),
+              axis.title.y.right = element_text(color = color_valid),
               plot.margin = margin(t = 4, r = 2, b = 0, l = 2, unit = "pt"),
               legend.margin = margin(),
+              strip.text = element_blank(),
+              panel.grid.minor = element_blank(),
+              # panel.grid.major.x = element_blank(),
+              # panel.grid.major.y = element_line(size = 0.2),
+              panel.grid = element_line(size = 0.4) #linetype = 2
               # axis.ticks.y.right = element_text(color = "blue"),
               ) +
         scale_y_continuous(lim = lim_raw,
                            sec.axis = sec_axis(~.*coef[2]+coef[1], name = ylab_r)) +
+        scale_x_date(breaks = ymd(0101 + seq(2001, 2017, 4)*1e4)) +
         facet_wrap(~meth, ncol = 1) +
         scale_color_manual(values = qc_colors, drop = F) +
         scale_shape_manual(values = qc_shapes, drop = F) +
-        ggtitle(titlestr)
-
-    df_lab <- data.frame(meth = methods,
-                         lab = sprintf("(%s) %-8s", letters[1:length(methods)], methods))
+        geom_text(data = d_lab, aes(label = lab), fontface = "bold",
+            x = -Inf, y =Inf, vjust = 1.5, hjust = -0.08)
 
-    p1 <- p1 + geom_text(data = df_lab, x = -Inf, y =Inf, vjust = 1.5, hjust = -0.08,
-                   aes(label = lab), fontface = "bold") +
-        theme(strip.text = element_blank())
+    if (!IsSingle) p1 <- p1 + ggtitle(titlestr)
 
-    #
-    p1 <- gridExtra::arrangeGrob(p1, lgd, nrow = 2, heights = c(20, 1), padding = unit(0.5, "line")) #return,
+    if (show.legend)
+        p1 <- gridExtra::arrangeGrob(p1, lgd, nrow = 2,
+            heights = c(20, 1), padding = unit(0.5, "line")) #return,
 
-    save_pdf(file_pdf, 11, 7, p = p1)
+    if (!IsSingle) save_pdf(file_pdf, 11, 7, p = p1)
+    p1
 }
+
+
+
+
diff --git a/test/07_whit/whit_lambda/00_resample_points_1e3.R b/test/07_whit/whit_lambda/00_resample_points_1e3.R
new file mode 100644
index 00000000..36e33a59
--- /dev/null
+++ b/test/07_whit/whit_lambda/00_resample_points_1e3.R
@@ -0,0 +1,19 @@
+library(Ipaper)
+library(magrittr)
+library(maptools)
+library(rgdal)
+
+## point files
+file <- "F:/Github/lc_005/shp/st_1000.shp"
+sp <- readOGR(file) #%>% st_sfc()
+sp@data <- data.frame(site = 1:nrow(sp), IGBPcode_005deg = sp$IGBPcode)
+
+writePointsShape(sp, "st_1e3.shp")
+## raster
+file_left = "F:/Github/lc_005/grid/MCD12Q1_2010_006_20_left.tif"
+r <- read_stars(file_left) %>% st_as_sfc
+
+
+res <- over(sp[, "IGBPcode"], r)
+
+r <- rgdal::readGDAL(file_left)
diff --git a/test/07_whit/whit_lambda/02_whit_lambda_main.R b/test/07_whit/whit_lambda/02_whit_lambda_main.R
index 039f791c..e659f6e8 100644
--- a/test/07_whit/whit_lambda/02_whit_lambda_main.R
+++ b/test/07_whit/whit_lambda/02_whit_lambda_main.R
@@ -20,10 +20,11 @@ nptperyear <- 23
 
 optim_lambda <- function(sitename, df, IsPlot = F){
     # sitename <- sites[i]#; grp = 1
-    d    <- df[site == sitename]
+    d     <- df[site == sitename]
+    dnew  <- add_HeadTail(d) # 
     cat(sprintf('site: %s ...\n', sitename))
 
-    IGBP <- d$IGBPcode[1]
+    IGBP  <- d$IGBPcode[1]
     INPUT <- check_input(d$t, d$y, d$w, trim = T, maxgap = nptperyear / 4, alpha = 0.02)
 
     # optim lambda by group
diff --git a/test/GEE/read_gee.R b/test/GEE/read_gee.R
new file mode 100644
index 00000000..ec2d56ef
--- /dev/null
+++ b/test/GEE/read_gee.R
@@ -0,0 +1,9 @@
+# read GEE json data, clipped by buffered points
+source("test/stable/load_pkgs.R")
+
+indir <- paste0(dir_flush, "ET&GPP/fluxnet212")
+df <- read_jsons.gee(indir, pattern = ".*MOD16A2.*.geojson", IsSave = T)  # ET
+df <- read_jsons.gee(indir, pattern = ".*MOD17A2H.*.geojson", IsSave = T) # GPP
+df <- read_jsons.gee(indir, pattern = ".*MOD13A1.*.geojson", IsSave = T)  # 500m EVI & NDVI
+df <- read_jsons.gee(indir, pattern = ".*MOD13Q1.*.geojson", IsSave = T)  # 250m EVI & NDVI
+df <- read_jsons.gee(indir, pattern = ".*MCD15A3H.*.geojson", IsSave = T) # 500m, 4-day, LAI
diff --git a/test/data/d01_GPPobs.R b/test/data/d01_GPPobs.R
index 11987263..0748873b 100644
--- a/test/data/d01_GPPobs.R
+++ b/test/data/d01_GPPobs.R
@@ -1,23 +1,9 @@
 rm(list = ls())
-
-reorder_name <- function (d,
-                          headvars = c("site", "date", "year", "doy", "d8", "d16"),
-                          tailvars = "")
-{
-    headvars %<>% intersect(colnames(d))
-    tailvars %<>% intersect(colnames(d))
-    varnames <- c(headvars, setdiff(colnames(d), union(headvars,
-                                                       tailvars)), tailvars)
-    if (is.data.table(d)) {
-        d[, ..varnames]
-    } else {
-        d[, varnames]
-    }
-}
-# prepare gpp input data, 04 March, 2018
 source("test/stable/load_pkgs.R")
 stations <- fread("F:/Github/MATLAB/PML/data/flux_166.csv")
 
+# prepare gpp input data, 04 March, 2018
+
 # 1. fluxsite observations -----------------------------------------------------
 df_raw <- fread("F:/Github/PML_v2/fluxsites_tidy/data/fluxsites166_official_dd.csv")
 df_raw <- merge(df_raw, stations[, .(site, lat, IGBP)])
@@ -52,14 +38,20 @@ save("d_obs", file = "Y:/R/phenofit/data/phenofit_INPUT_flux136_obs.rda")
 
 rm(list = ls())
 
-# 2. zhang yao, 2017, scientific data, VPMGPP, 8day -------------------------------
-files <- dir("C:/Users/kon055/Desktop/VPMGPP", full.names = T) %>%
-    set_names(gsub(".csv","", basename(.)))
-d_vpm <- ldply(files, fread, .id = "site")[, c(1, 3, 4)] %>%
-    set_names(c("site", "date", "GPP")) %>% as.data.table()
-d_vpm[, date := as.Date(date, "%Y%j")]
-d_vpm %<>% set_colnames(c("site", "date", "GPP_vpm"))
-d_vpm$w <- 1
+# 2. zhang yao, 2017, scientific data, VPMGPP, 8day ----------------------------
+indir <- paste0(dir_flush, "ET&GPP/fluxnet212/GPP_vpm")
+file_vpm <- paste0(dirname(indir), "/flux212_GPP_vpm(zhang2017).csv")
+if (!file.exists(file_vpm)){
+    files <- dir(indir, full.names = T) %>% set_names(gsub(".csv","", basename(.)))
+    d_vpm <- ldply(files, fread, .id = "site")[, c(1, 3, 4)] %>%
+        set_names(c("site", "date", "GPP")) %>% as.data.table()
+    d_vpm[, date := as.Date(as.character(date), "%Y%j")]
+    d_vpm %<>% set_colnames(c("site", "date", "GPP_vpm"))
+    d_vpm$w <- 1
+    fwrite(d_vpm, file_vpm)
+} else{
+    d_vpm <- fread(file_vpm)
+}
 
 # main functions ----------------------------------------------------------
 clip_selectedSite <- function(INPUT){
diff --git a/test/stable/load_pkgs.R b/test/stable/load_pkgs.R
index c57e54d2..9e7bd5ac 100644
--- a/test/stable/load_pkgs.R
+++ b/test/stable/load_pkgs.R
@@ -20,6 +20,8 @@ library(pbmcapply)
 library(MASS)
 library(broom)
 
+fontsize  <- 14
+
 qc_values <- c("0", "1", "2", "3")
 qc_levels <- c("good", "margin", "snow/ice", "cloud")
 qc_colors <- c("grey60", "#00BFC4", "#F8766D", "#C77CFF") %>% set_names(qc_levels)
@@ -34,7 +36,7 @@ file_st_flux <- paste0(dir_data, "st_phenoflux166.csv")
 file_flux <- paste0(dir_data, "phenoflux166_MOD13A1_INPUT.csv")
 file_cam  <- paste0(dir_data, "phenocam133_MOD13A1_INPUT.csv")
 
-## 
+##
 if (.Platform$OS.type == "unix"){
     dir_climate <- "/OSM/CBR/CoRE/working/timeseries/Climate/"
     dir_flush   <- "/flush1/kon055/"
@@ -46,12 +48,12 @@ if (.Platform$OS.type == "unix"){
 # MCD12Q1.006 land cover 1-17 and 255, IGBP scheme
 IGBPnames_006 <- c("ENF", "EBF", "DNF", "DBF", "MF" , "CSH",
                    "OSH", "WSA", "SAV", "GRA", "WET", "CRO",
-                   "URB", "CNV", "SNOW", "BSV", "water", "UNC")
+                   "URB", "CNV", "SNO", "BSV", "water", "UNC")
 # MCD12Q1.005 land cover 0-16 and 254, IGBP scheme
 # https://code.earthengine.google.com/dataset/MODIS/051/MCD12Q1
 IGBPnames_005 <- c("water", "ENF", "EBF", "DNF", "DBF", "MF" , "CSH",
                    "OSH", "WSA", "SAV", "GRA", "WET", "CRO",
-                   "URB", "CNV", "SNOW", "BSV", "UNC")
+                   "URB", "CNV", "SNO", "BSV", "UNC")
 
 get_phenofit <- function(sitename, df, st, prefix_fig = 'phenofit_v3'){
     d     <- df[site == sitename, ] # get the first site data
@@ -124,17 +126,17 @@ get_phenofit <- function(sitename, df, st, prefix_fig = 'phenofit_v3'){
 # rename phenofit phenology metrics names
 fix_level <- function(x){
     phenophase <- c(
-        'TRS1.sos', 'TRS2.sos', 'ZHANG.Greenup', 'GU.UD', 
+        'TRS1.sos', 'TRS2.sos', 'ZHANG.Greenup', 'GU.UD',
         'TRS5.sos', 'TRS6.sos', 'DER.sos',
-        'ZHANG.Maturity','GU.SD', 'DER.pop', 
-        'ZHANG.Senescence', 'GU.DD', 
+        'ZHANG.Maturity','GU.SD', 'DER.pop',
+        'ZHANG.Senescence', 'GU.DD',
         'TRS5.eos', 'TRS6.eos','DER.eos',
         rev(c('TRS1.eos', 'TRS2.eos', 'ZHANG.Dormancy', 'GU.RD')))
     phenophase_spl <- c(
-        'TRS1.SOS', 'TRS2.SOS', 'Greenup', 'UD', 
+        'TRS1.SOS', 'TRS2.SOS', 'Greenup', 'UD',
         'TRS5.SOS', 'TRS6.SOS', 'DER.SOS',
-        'Maturity', 'SD', 'POP', 
-        'Senescence', 'DD', 
+        'Maturity', 'SD', 'POP',
+        'Senescence', 'DD',
         'TRS5.EOS', 'TRS6.EOS','DER.EOS',
         rev(c('TRS1.EOS', 'TRS2.EOS', 'Dormancy', 'RD')))
     factor(x, phenophase) %>% mapvalues(phenophase, phenophase_spl)#return
@@ -200,7 +202,7 @@ read_whitMat.gee <- function(file){
 
 
 #' read gee_phenofit whittaker from multiple json files
-#' 
+#'
 #' @examples
 #' indir <- "D:/Document/GoogleDrive/phenofit/data/gee_phenofit/v2/"
 #' files <- dir(indir, "*.geojson", full.names = T)
@@ -217,7 +219,7 @@ read_whitMats.gee <- function(files){
 }
 
 #' read gee whit matrix
-#' 
+#'
 readwhitMAT <- function(indir, prefix){
     pattern <- paste0(prefix, "_.*.geojson")
     files   <- dir(indir, pattern, full.names = T)
@@ -225,6 +227,82 @@ readwhitMAT <- function(indir, prefix){
     df
 }
 
+#' FigsToPages
+#'
+#' Subplots xlab and ylab are unified, and only keet singe one.
+#' Currently, only support ggplot figures; And only support arrange figures
+#' into rows (nrow*1).
+#'
+#' @param ps A list of ggplot figure objects. And
+#' @param lgd A grid grob object, legend to show in the bottom.
+#' @param ylab y label title
+#' @param width
+#' @param height
+#'
+#' @param
+FigsToPages <- function(ps, lgd, ylab.right, file, width = 10, height){
+    nrow  <- 6
+    npage <- ceiling(length(ps)/nrow)
+    if (missing(height)) height = nrow*1.6
+
+    ylab.left        <- ps[[1]]$labels$y
+    ylab.left.color <- ps[[1]]$theme$axis.title.y.left$colour
+    ylab.right.color <- ps[[1]]$theme$axis.title.y.right$colour
+
+    params <- list(ncol = 1, padding = unit(1, "line"),
+        left  = textGrob(ylab.left , rot = 90, 
+                         gp=gpar(fontsize=14, col=ylab.left.color)) )
+
+    # parameters for arrangeGrob
+    if (!missing(ylab.right))
+        params$right = textGrob(ylab.right, rot = 270,
+                                gp=gpar(fontsize=14, col=ylab.right.color))
+
+    Cairo::CairoPDF(file, width, height)
+    for (i in 1:npage){
+        runningId(i)
+        I_beg <- (i - 1) * nrow + 1
+        I_end <- min(i*nrow, length(ps))
+
+        I  <- I_beg:I_end
+        n  <- length(I)
+
+        ps_i <- ps[I]
+        for (j in seq_along(I)){
+            theme_j <- theme(
+                axis.text.x = element_blank(),
+                axis.title = element_blank(),
+                axis.title.y.right = element_blank(),
+                axis.title.y.left  = element_blank()
+            )
+            if (j == n)
+                theme_j <- theme(
+                    axis.title.y.right = element_blank(),
+                    axis.title.y.left  = element_blank() )
+            ps_i[[j]] <- ps_i[[j]] + theme_j
+        }
+
+        ps_i  <- c(ps_i, list(lgd))
+        nx    <- length(ps_i)
+
+        params$grobs <- ps_i
+        params$nrow  <- nx
+
+        if (missing(lgd)){
+            params$heights <- c(rep(5, nx - 1), 5.5)
+        } else{
+            params$heights <- c(rep(5, nx - 2), 5.5, 1)
+        }
+
+        g <- do.call(gridExtra::arrangeGrob, params)
+
+        if (i != 1) grid.newpage();
+        grid::grid.draw(g)
+    }
+    dev.off()
+    file.show(file)
+}
+
 # nptperyear = 46
 # # df <- fread("data/lc006/PMLv2_flux112_sgfitw&TSM.csv")
 # df <- fread("PMLv2_flux112_CV.csv")
diff --git a/test/test-whit/whit/test-vcurve.R b/test/test-whit/whit/test-vcurve.R
deleted file mode 100644
index 542cea96..00000000
--- a/test/test-whit/whit/test-vcurve.R
+++ /dev/null
@@ -1,3 +0,0 @@
-## test V-curve
-system.time(whitV(x, IsPlot = TRUE))
-system.time(v_curve(INPUT$y, INPUT$w, llas = seq(1, 6, 0.1),  d = 2, show = T))