From 693d230dfa9f82f971ae8c9dd815aeb51815b570 Mon Sep 17 00:00:00 2001 From: meetagrawal09 Date: Sun, 6 Aug 2023 10:43:05 +0530 Subject: [PATCH 1/5] added integration tests for AmerifluxLBL --- .../test.download.AmerifluxLBL.R | 165 ++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 modules/data.atmosphere/inst/integrationTests/test.download.AmerifluxLBL.R diff --git a/modules/data.atmosphere/inst/integrationTests/test.download.AmerifluxLBL.R b/modules/data.atmosphere/inst/integrationTests/test.download.AmerifluxLBL.R new file mode 100644 index 00000000000..144db86dea2 --- /dev/null +++ b/modules/data.atmosphere/inst/integrationTests/test.download.AmerifluxLBL.R @@ -0,0 +1,165 @@ +library(testthat) + +tmpdir <- tempfile(pattern = "amerifluxData") +dir.create(tmpdir) +on.exit(teardown(unlink(tmpdir, recursive = TRUE))) + +sitename <- "US-Akn" +# outfolder <- "./amerifluxData" +outfolder <- tmpdir +start_date <- "2011-01-01" +end_date <- "2011-12-31" +username <- "pecan" +useremail <- "@" +data_product <- "BASE-BADM" +data_policy <- "CCBY4.0" +overwrite <- FALSE +verbose <- FALSE + + +start_date <- as.POSIXlt(start_date, tz = "UTC") +end_date <- as.POSIXlt(end_date, tz = "UTC") + +start_year <- lubridate::year(start_date) +end_year <- lubridate::year(end_date) + + +site <- sub(".* \\((.*)\\)", "\\1", sitename) + +# make sure output folder exists +if (!file.exists(outfolder)) { + dir.create(outfolder, showWarnings = FALSE, recursive = TRUE) +} + +repeat { + tout <- options("timeout") + zip_file <- try(amerifluxr::amf_download_base( + user_id = username, + user_email = useremail, + site_id = site, + data_product = data_product, + data_policy = data_policy, + agree_policy = TRUE, + intended_use = "model", + intended_use_text = "PEcAn download", + verbose = verbose, + out_dir = outfolder + )) + if (!inherits(zip_file, "try-error")) { + break + } else if (tout$timeout > 250) { + PEcAn.logger::logger.severe("Download takes too long, check your connection.") + break + } + PEcAn.logger::logger.info("Added 100 seconds before the download timeouts") + options(timeout = tout$timeout + 100) +} + +# Path to created zip-file +ftplink <- zip_file +if (!grepl(".zip", ftplink)) { + PEcAn.logger::logger.info("Not able to download a zip-file. Check download.AmerifluxLBL inputs") +} + +# get zip and csv filenames +outfname <- strsplit(ftplink, "/") +outfname <- outfname[[1]][length(outfname[[1]])] +output_zip_file <- file.path(outfolder, outfname) +file_timestep_hh <- "HH" +file_timestep_hr <- "HR" +file_timestep <- file_timestep_hh + +endname <- strsplit(outfname, "_") +endname <- endname[[1]][length(endname[[1]])] +endname <- gsub("\\..*", "", endname) +outcsvname <- paste0(substr(outfname, 1, 15), "_", file_timestep_hh, "_", endname, ".csv") +output_csv_file <- file.path(outfolder, outcsvname) +outcsvname_hr <- paste0(substr(outfname, 1, 15), "_", file_timestep_hr, "_", endname, ".csv") +output_csv_file_hr <- file.path(outfolder, outcsvname_hr) + +download_file_flag <- TRUE +extract_file_flag <- TRUE +if (!overwrite && file.exists(output_zip_file)) { + PEcAn.logger::logger.debug("File '", output_zip_file, "' already exists, skipping download") + download_file_flag <- FALSE +} +if (!overwrite && file.exists(output_csv_file)) { + PEcAn.logger::logger.debug("File '", output_csv_file, "' already exists, skipping extraction.") + download_file_flag <- FALSE + extract_file_flag <- FALSE + file_timestep <- "HH" +} else { + if (!overwrite && file.exists(output_csv_file_hr)) { + PEcAn.logger::logger.debug("File '", output_csv_file_hr, "' already exists, skipping extraction.") + download_file_flag <- FALSE + extract_file_flag <- FALSE + file_timestep <- "HR" + outcsvname <- outcsvname_hr + output_csv_file <- output_csv_file_hr + } +} + +if (download_file_flag) { + extract_file_flag <- TRUE + PEcAn.utils::download_file(ftplink, output_zip_file, method) + if (!file.exists(output_zip_file)) { + PEcAn.logger::logger.severe("FTP did not download ", output_zip_file, " from ", ftplink) + } +} +if (extract_file_flag) { + avail_file <- utils::unzip(output_zip_file, list = TRUE) + if (length(grep("HH", avail_file)) > 0) { + file_timestep <- "HH" + } else { + if (length(grep("HR", avail_file)) > 0) { + file_timestep <- "HR" + output_csv_file <- output_csv_file_hr + outcsvname <- outcsvname_hr + } else { + PEcAn.logger::logger.severe("Half-hourly or Hourly data file was not found in ", output_zip_file) + } + } + utils::unzip(output_zip_file, outcsvname, exdir = outfolder) + if (!file.exists(output_csv_file)) { + PEcAn.logger::logger.severe("ZIP file ", output_zip_file, " did not contain CSV file ", outcsvname) + } +} + +dbfilename <- paste0(substr(outfname, 1, 15), "_", file_timestep, "_", endname) + +# get start and end year of data from file +firstline <- system(paste0("head -4 ", output_csv_file), intern = TRUE) +firstline <- firstline[4] +lastline <- system(paste0("tail -1 ", output_csv_file), intern = TRUE) + +firstdate_st <- paste0( + substr(firstline, 1, 4), "-", + substr(firstline, 5, 6), "-", + substr(firstline, 7, 8), " ", + substr(firstline, 9, 10), ":", + substr(firstline, 11, 12) +) +firstdate <- as.POSIXlt(firstdate_st) +lastdate_st <- paste0( + substr(lastline, 1, 4), "-", + substr(lastline, 5, 6), "-", + substr(lastline, 7, 8), " ", + substr(lastline, 9, 10), ":", + substr(lastline, 11, 12) +) +lastdate <- as.POSIXlt(lastdate_st) + +syear <- lubridate::year(firstdate) +eyear <- lubridate::year(lastdate) + +if (start_year > eyear) { + PEcAn.logger::logger.severe("Start_Year", start_year, "exceeds end of record ", eyear, " for ", site) +} +if (end_year < syear) { + PEcAn.logger::logger.severe("End_Year", end_year, "precedes start of record ", syear, " for ", site) +} + +test_that("Downloaded data files are present at the desired location", { + expect_true(file.exists(paste0(output_csv_file))) + expect_true(file.exists(paste0(output_zip_file))) +}) From f23fe001c548a48790bd7ba0150c0e01c1785f3f Mon Sep 17 00:00:00 2001 From: meetagrawal09 Date: Mon, 21 Aug 2023 14:15:19 +0530 Subject: [PATCH 2/5] updated download function --- .../test.download.AmerifluxLBL.R | 192 +++--------------- 1 file changed, 29 insertions(+), 163 deletions(-) diff --git a/modules/data.atmosphere/inst/integrationTests/test.download.AmerifluxLBL.R b/modules/data.atmosphere/inst/integrationTests/test.download.AmerifluxLBL.R index 144db86dea2..3ab07a98939 100644 --- a/modules/data.atmosphere/inst/integrationTests/test.download.AmerifluxLBL.R +++ b/modules/data.atmosphere/inst/integrationTests/test.download.AmerifluxLBL.R @@ -1,165 +1,31 @@ -library(testthat) - -tmpdir <- tempfile(pattern = "amerifluxData") -dir.create(tmpdir) -on.exit(teardown(unlink(tmpdir, recursive = TRUE))) - -sitename <- "US-Akn" -# outfolder <- "./amerifluxData" -outfolder <- tmpdir -start_date <- "2011-01-01" -end_date <- "2011-12-31" -username <- "pecan" -useremail <- "@" -data_product <- "BASE-BADM" -data_policy <- "CCBY4.0" -overwrite <- FALSE -verbose <- FALSE - - -start_date <- as.POSIXlt(start_date, tz = "UTC") -end_date <- as.POSIXlt(end_date, tz = "UTC") - -start_year <- lubridate::year(start_date) -end_year <- lubridate::year(end_date) - - -site <- sub(".* \\((.*)\\)", "\\1", sitename) - -# make sure output folder exists -if (!file.exists(outfolder)) { - dir.create(outfolder, showWarnings = FALSE, recursive = TRUE) -} - -repeat { - tout <- options("timeout") - zip_file <- try(amerifluxr::amf_download_base( - user_id = username, - user_email = useremail, - site_id = site, - data_product = data_product, - data_policy = data_policy, - agree_policy = TRUE, - intended_use = "model", - intended_use_text = "PEcAn download", - verbose = verbose, - out_dir = outfolder - )) - if (!inherits(zip_file, "try-error")) { - break - } else if (tout$timeout > 250) { - PEcAn.logger::logger.severe("Download takes too long, check your connection.") - break - } - PEcAn.logger::logger.info("Added 100 seconds before the download timeouts") - options(timeout = tout$timeout + 100) -} - -# Path to created zip-file -ftplink <- zip_file -if (!grepl(".zip", ftplink)) { - PEcAn.logger::logger.info("Not able to download a zip-file. Check download.AmerifluxLBL inputs") -} - -# get zip and csv filenames -outfname <- strsplit(ftplink, "/") -outfname <- outfname[[1]][length(outfname[[1]])] -output_zip_file <- file.path(outfolder, outfname) -file_timestep_hh <- "HH" -file_timestep_hr <- "HR" -file_timestep <- file_timestep_hh - -endname <- strsplit(outfname, "_") -endname <- endname[[1]][length(endname[[1]])] -endname <- gsub("\\..*", "", endname) -outcsvname <- paste0(substr(outfname, 1, 15), "_", file_timestep_hh, "_", endname, ".csv") -output_csv_file <- file.path(outfolder, outcsvname) -outcsvname_hr <- paste0(substr(outfname, 1, 15), "_", file_timestep_hr, "_", endname, ".csv") -output_csv_file_hr <- file.path(outfolder, outcsvname_hr) - -download_file_flag <- TRUE -extract_file_flag <- TRUE -if (!overwrite && file.exists(output_zip_file)) { - PEcAn.logger::logger.debug("File '", output_zip_file, "' already exists, skipping download") - download_file_flag <- FALSE -} -if (!overwrite && file.exists(output_csv_file)) { - PEcAn.logger::logger.debug("File '", output_csv_file, "' already exists, skipping extraction.") - download_file_flag <- FALSE - extract_file_flag <- FALSE - file_timestep <- "HH" -} else { - if (!overwrite && file.exists(output_csv_file_hr)) { - PEcAn.logger::logger.debug("File '", output_csv_file_hr, "' already exists, skipping extraction.") - download_file_flag <- FALSE - extract_file_flag <- FALSE - file_timestep <- "HR" - outcsvname <- outcsvname_hr - output_csv_file <- output_csv_file_hr - } -} - -if (download_file_flag) { - extract_file_flag <- TRUE - PEcAn.utils::download_file(ftplink, output_zip_file, method) - if (!file.exists(output_zip_file)) { - PEcAn.logger::logger.severe("FTP did not download ", output_zip_file, " from ", ftplink) - } -} -if (extract_file_flag) { - avail_file <- utils::unzip(output_zip_file, list = TRUE) - if (length(grep("HH", avail_file)) > 0) { - file_timestep <- "HH" - } else { - if (length(grep("HR", avail_file)) > 0) { - file_timestep <- "HR" - output_csv_file <- output_csv_file_hr - outcsvname <- outcsvname_hr - } else { - PEcAn.logger::logger.severe("Half-hourly or Hourly data file was not found in ", output_zip_file) - } - } - utils::unzip(output_zip_file, outcsvname, exdir = outfolder) - if (!file.exists(output_csv_file)) { - PEcAn.logger::logger.severe("ZIP file ", output_zip_file, " did not contain CSV file ", outcsvname) - } -} - -dbfilename <- paste0(substr(outfname, 1, 15), "_", file_timestep, "_", endname) - -# get start and end year of data from file -firstline <- system(paste0("head -4 ", output_csv_file), intern = TRUE) -firstline <- firstline[4] -lastline <- system(paste0("tail -1 ", output_csv_file), intern = TRUE) - -firstdate_st <- paste0( - substr(firstline, 1, 4), "-", - substr(firstline, 5, 6), "-", - substr(firstline, 7, 8), " ", - substr(firstline, 9, 10), ":", - substr(firstline, 11, 12) -) -firstdate <- as.POSIXlt(firstdate_st) -lastdate_st <- paste0( - substr(lastline, 1, 4), "-", - substr(lastline, 5, 6), "-", - substr(lastline, 7, 8), " ", - substr(lastline, 9, 10), ":", - substr(lastline, 11, 12) +# putting logger to debug mode +PEcAn.logger::logger.setUseConsole(TRUE, FALSE) +on.exit(PEcAn.logger::logger.setUseConsole(TRUE, TRUE), add = TRUE) +PEcAn.logger::logger.setLevel("DEBUG") + + +# mocking functions +mockery::stub(PEcAn.DB::convert_input, 'dbfile.input.check', data.frame()) +mockery::stub(PEcAn.DB::convert_input, 'db.query', data.frame(id = 1)) + +# calling download function +PEcAn.DB::convert_input( + input.id = NA, + outfolder = "testAmerifluxData", + formatname = NULL, + mimetype = NULL, + site.id = 1, + start_date = "2011-01-01", + end_date = "2011-12-31", + pkg = 'PEcAn.data.atmosphere', + fcn = 'download.AmerifluxLBL', + con = NULL, + host = data.frame(name = "localhost"), + browndog = NULL, + write = FALSE, + lat.in = 40, + lon.in = -88, + sitename = 'US-Akn' ) -lastdate <- as.POSIXlt(lastdate_st) - -syear <- lubridate::year(firstdate) -eyear <- lubridate::year(lastdate) - -if (start_year > eyear) { - PEcAn.logger::logger.severe("Start_Year", start_year, "exceeds end of record ", eyear, " for ", site) -} -if (end_year < syear) { - PEcAn.logger::logger.severe("End_Year", end_year, "precedes start of record ", syear, " for ", site) -} -test_that("Downloaded data files are present at the desired location", { - expect_true(file.exists(paste0(output_csv_file))) - expect_true(file.exists(paste0(output_zip_file))) -}) +# checking the downloaded files From acb23b4e9f73db64ad97467d15fe2b903c9865b1 Mon Sep 17 00:00:00 2001 From: meetagrawal09 Date: Wed, 23 Aug 2023 16:21:06 +0530 Subject: [PATCH 3/5] moved test to function, using withr for tempfile --- .../test.download.AmerifluxLBL.R | 62 +++++++++++-------- 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/modules/data.atmosphere/inst/integrationTests/test.download.AmerifluxLBL.R b/modules/data.atmosphere/inst/integrationTests/test.download.AmerifluxLBL.R index 3ab07a98939..4a0c4703b9f 100644 --- a/modules/data.atmosphere/inst/integrationTests/test.download.AmerifluxLBL.R +++ b/modules/data.atmosphere/inst/integrationTests/test.download.AmerifluxLBL.R @@ -1,31 +1,43 @@ -# putting logger to debug mode -PEcAn.logger::logger.setUseConsole(TRUE, FALSE) -on.exit(PEcAn.logger::logger.setUseConsole(TRUE, TRUE), add = TRUE) -PEcAn.logger::logger.setLevel("DEBUG") +library(testthat) +test_download_AmerifluxLBL <- function(start_date, end_date, sitename, lat.in, lon.in) { + # putting logger to debug mode + PEcAn.logger::logger.setUseConsole(TRUE, FALSE) + on.exit(PEcAn.logger::logger.setUseConsole(TRUE, TRUE), add = TRUE) + PEcAn.logger::logger.setLevel("DEBUG") -# mocking functions -mockery::stub(PEcAn.DB::convert_input, 'dbfile.input.check', data.frame()) -mockery::stub(PEcAn.DB::convert_input, 'db.query', data.frame(id = 1)) + # mocking functions + mockery::stub(PEcAn.DB::convert_input, 'dbfile.input.check', data.frame()) + mockery::stub(PEcAn.DB::convert_input, 'db.query', data.frame(id = 1)) -# calling download function -PEcAn.DB::convert_input( - input.id = NA, - outfolder = "testAmerifluxData", - formatname = NULL, - mimetype = NULL, - site.id = 1, + withr::with_dir(tempdir(), { + tmpdir <- getwd() + # calling download function + PEcAn.DB::convert_input( + input.id = NA, + outfolder = tmpdir, + formatname = NULL, + mimetype = NULL, + site.id = 1, + start_date = start_date, + end_date = end_date, + pkg = 'PEcAn.data.atmosphere', + fcn = 'download.AmerifluxLBL', + con = NULL, + host = data.frame(name = "localhost"), + browndog = NULL, + write = FALSE, + lat.in = lat.in, + lon.in = lon.in, + sitename = sitename + ) + }) +} + +test_download_AmerifluxLBL( start_date = "2011-01-01", end_date = "2011-12-31", - pkg = 'PEcAn.data.atmosphere', - fcn = 'download.AmerifluxLBL', - con = NULL, - host = data.frame(name = "localhost"), - browndog = NULL, - write = FALSE, + sitename = 'US-Akn', lat.in = 40, - lon.in = -88, - sitename = 'US-Akn' -) - -# checking the downloaded files + lon.in = -88 +) \ No newline at end of file From a009c1ad3079a4ff28b2abe0dfe40eff2d0c067b Mon Sep 17 00:00:00 2001 From: meetagrawal09 Date: Wed, 23 Aug 2023 18:50:41 +0530 Subject: [PATCH 4/5] added test : check if file exists --- .../inst/integrationTests/test.download.AmerifluxLBL.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules/data.atmosphere/inst/integrationTests/test.download.AmerifluxLBL.R b/modules/data.atmosphere/inst/integrationTests/test.download.AmerifluxLBL.R index 4a0c4703b9f..24401898180 100644 --- a/modules/data.atmosphere/inst/integrationTests/test.download.AmerifluxLBL.R +++ b/modules/data.atmosphere/inst/integrationTests/test.download.AmerifluxLBL.R @@ -32,6 +32,11 @@ test_download_AmerifluxLBL <- function(start_date, end_date, sitename, lat.in, l sitename = sitename ) }) + + # checking if the file is downloaded + test_that("Downloaded files are present in the desired location", { + expect_true(file.exists(paste0(tmpdir, "/AMF_US-Akn_BASE_HH_6-5.csv"))) + }) } test_download_AmerifluxLBL( From 6c7988ef1e67d6bf1b1f7b2609219ded10e42fc0 Mon Sep 17 00:00:00 2001 From: meetagrawal09 Date: Thu, 24 Aug 2023 18:56:07 +0530 Subject: [PATCH 5/5] added tests : check files, check units --- .../test.download.AmerifluxLBL.R | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/modules/data.atmosphere/inst/integrationTests/test.download.AmerifluxLBL.R b/modules/data.atmosphere/inst/integrationTests/test.download.AmerifluxLBL.R index 24401898180..e46d1d4830f 100644 --- a/modules/data.atmosphere/inst/integrationTests/test.download.AmerifluxLBL.R +++ b/modules/data.atmosphere/inst/integrationTests/test.download.AmerifluxLBL.R @@ -13,7 +13,7 @@ test_download_AmerifluxLBL <- function(start_date, end_date, sitename, lat.in, l withr::with_dir(tempdir(), { tmpdir <- getwd() # calling download function - PEcAn.DB::convert_input( + res <- PEcAn.DB::convert_input( input.id = NA, outfolder = tmpdir, formatname = NULL, @@ -34,9 +34,21 @@ test_download_AmerifluxLBL <- function(start_date, end_date, sitename, lat.in, l }) # checking if the file is downloaded - test_that("Downloaded files are present in the desired location", { + test_that("Downloaded files are present at the desired location", { expect_true(file.exists(paste0(tmpdir, "/AMF_US-Akn_BASE_HH_6-5.csv"))) }) + + test_that("Downloaded data files have the right format", { + firstline <- system(paste0("head -4 ", paste0(tmpdir, "/AMF_US-Akn_BASE_HH_6-5.csv")), intern = TRUE) + lastline <- system(paste0("tail -1 ", paste0(tmpdir, "/AMF_US-Akn_BASE_HH_6-5.csv")), intern = TRUE) + + # checking if first line of CSV has the sitename + expect_true(grepl(sitename, firstline[1])) + + # fourth and last row checked to contain non-alphabetical data since these are used to verify start and end dates + expect_false(grepl("[A-Za-z]", firstline[4])) + expect_false(grepl("[A-Za-z]", lastline[1])) + }) } test_download_AmerifluxLBL(