-
Notifications
You must be signed in to change notification settings - Fork 173
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Pull request #282: enl/feature/chunk-rename
Merge in JGCRI/gcam-core from enl/feature/chunk-rename to master Squashed commit of the following: commit 141cfdd2bbe03d38f421280ca273a609a8cd9cd4 Author: enlochner <[email protected]> Date: Sun Jun 4 16:13:09 2023 -0500 Remove successive mutates commit 39924b6d553cfb0729f1039616689b4f8f0935ce Author: enlochner <[email protected]> Date: Sun Jun 4 15:23:50 2023 -0500 Rename macro chunks commit 2a2158fc47f6114a3e713bfc2f72e632cf4502b0 Merge: cb5b30a31 2b55555 Author: enlochner <[email protected]> Date: Sun Jun 4 14:57:03 2023 -0500 Merge branch 'master' into enl/feature/chunk-rename commit cb5b30a312a319131c69e32213cd10610e28a223 Author: enlochner <[email protected]> Date: Sun Jun 4 14:46:24 2023 -0500 Add export line to script commit 89656b4206be23a31fff8ba88f0c66816e535141 Author: enlochner <[email protected]> Date: Sun Jun 4 11:33:20 2023 -0500 Rename gcamdata chunks commit beac6e8745667a04de25e265ab7349954a963134 Merge: d1ec8dc4d bdfb2c8 Author: enlochner <[email protected]> Date: Sat Jun 3 15:29:34 2023 -0500 Merge branch 'master' into enl/feature/chunk-rename commit d1ec8dc4d0f5fea7df2d1387cdf57f8fe8d3771e Author: enlochner <[email protected]> Date: Mon May 8 13:30:24 2023 -0500 Small updates to chunk rename function commit ba6cb95ec3af14986a92e399a9c4a39ce2a83769 Author: enlochner <[email protected]> Date: Sat May 6 16:32:07 2023 -0500 Add chunk rename function to data system commit a7e4cd5e9997989835dbe646a6ee43a9c72492da Author: enlochner <[email protected]> Date: Sat May 6 15:22:32 2023 -0500 Add test that checks if chunks need to be renamed commit 21848d08f4f04b869c208e8b398f358ba778d1fd Author: enlochner <[email protected]> Date: Thu Jan 5 12:29:51 2023 -0600 Update GCAM_DATA_MAP commit 885f55cb8850337cdda99689a5d23830046313a5 Author: enlochner <[email protected]> Date: Thu Jan 5 09:26:32 2023 -0600 gcamdata changes to get timeshift test to pass commit eec7fc3660775d409f1c7d250223126beb476ed9 Author: enlochner <[email protected]> Date: Tue Nov 1 13:21:58 2022 -0500 Updates to get tests to pass including removing consecutive mutates commit 6b3c93726cfbe55315f72cceb77681940fda909a Author: enlochner <[email protected]> Date: Wed Oct 26 11:35:16 2022 -0500 Fix capitalization errors in constants commit ea8c28f53ecc288631a743aa49a2aa10b7b733fe Author: enlochner <[email protected]> Date: Mon Oct 24 22:08:34 2022 -0500 Update xml script module names commit 4d69d080e044c785b59c654519e1d0055a5de8a4 Author: enlochner <[email protected]> Date: Mon Oct 24 21:02:49 2022 -0500 Rename xml chunks commit ea4fad8398da5d9bb29dfd712879d53d9b2616d9 Author: enlochner <[email protected]> Date: Mon Oct 24 15:25:55 2022 -0500 Update module names for gcamusa chunks commit f969d2d49bf41496da1aaff2de03aa4c8faf2773 Author: enlochner <[email protected]> Date: Mon Oct 24 13:54:21 2022 -0500 Rename gcamusa files with zgcamusa in front, and remove _USA for simplicity commit f804dd092cf27208829071b077185a4437928457 Author: enlochner <[email protected]> Date: Mon Oct 24 13:52:53 2022 -0500 Remove successive mutates from R chunks commit a26cf19dfeefe38fff75abb334bbb521cf5b1870 Author: enlochner <[email protected]> Date: Mon Oct 24 10:08:12 2022 -0500 Remove module_aglu_L151.ag_MIRCA_ctry_C_GLU_irr from exported functions commit 4c0ca482286ec12c9d028680d65b43936af30c5d Author: enlochner <[email protected]> Date: Mon Oct 24 09:59:15 2022 -0500 Documentation updates for file renaming ... and 6 more commits
- Loading branch information
Showing
1,308 changed files
with
71,076 additions
and
71,124 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,254 @@ | ||
# Copyright 2019 Battelle Memorial Institute; see the LICENSE file. | ||
|
||
#' module_aglu_L100.0_LDS_preprocessing | ||
#' | ||
#' Read in and process LDS (Land Data System) files. | ||
#' | ||
#' @param command API command to execute | ||
#' @param ... other optional parameters, depending on command | ||
#' @return Depends on \code{command}: either a vector of required inputs, | ||
#' a vector of output names, or (if \code{command} is "MAKE") all | ||
#' the generated outputs: \code{object}. The corresponding file in the | ||
#' original data system was \code{LA100.0_LDS_preprocessing.R} (aglu level1). | ||
#' @details Read in the various LDS datasets; regularize their column names and | ||
#' GLU (Geographic Land Unit) data; change Taiwan ISO to that of mainland China; make LDS_ag_HA_ha and | ||
#' LDS_ag_prod_t tables consistent. See Di Vittorio, A., P. Kyle, and W. Collins. 2016. | ||
#' What are the effects of Agro-Ecological Zones and land use region boundaries on | ||
#' land resource projection using the Global Change Assessment Model? Environmental | ||
#' Modelling & Software 85, 246-265. http://dx.doi.org/10.1016/j.envsoft.2016.08.016. | ||
#' @importFrom assertthat assert_that | ||
#' @importFrom dplyr filter mutate semi_join summarise | ||
#' @author BBL March 2017 | ||
module_aglu_L100.0_LDS_preprocessing <- function(command, ...) { | ||
|
||
namelist <- c("Land_type_area_ha", | ||
"LDS_ag_HA_ha", | ||
"LDS_ag_prod_t", | ||
"LDS_value_milUSD", | ||
"MIRCA_irrHA_ha", | ||
"MIRCA_rfdHA_ha", | ||
"Mueller_yield_levels", | ||
"Ref_veg_carbon_Mg_per_ha", | ||
"Water_footprint_m3") | ||
dirname <- "aglu/LDS/" | ||
|
||
if(command == driver.DECLARE_INPUTS) { | ||
x <- paste0(dirname, namelist) | ||
names(x) <- rep("FILE", length(x)) | ||
return(x) | ||
} else if(command == driver.DECLARE_OUTPUTS) { | ||
return(paste0("L100.", namelist)) | ||
} else if(command == driver.MAKE) { | ||
|
||
. <- value <- iso <- GTAP_crop <- GLU <- MIRCA_crop <- NULL # silence package check. | ||
L100.Land_type_area_ha <- L100.LDS_value_milUSD <- L100.MIRCA_irrHA_ha <- | ||
L100.MIRCA_rfdHA_ha <- L100.Mueller_yield_levels <- | ||
L100.Ref_veg_carbon_Mg_per_ha <- L100.Water_footprint_m3 <- NULL | ||
|
||
all_data <- list(...)[[1]] | ||
|
||
# Load required inputs ---- | ||
LDSfiles <- list() | ||
for(nm in namelist) { | ||
LDSfiles[[nm]] <- get_data(all_data, paste0(dirname, nm)) | ||
} | ||
|
||
# Go through all data frames and... | ||
for(nm in namelist) { | ||
|
||
# Regularize data frame names | ||
names(LDSfiles[[nm]]) %>% | ||
sub("ctry_iso", "iso", .) %>% | ||
sub("reglr_iso", "GTAP_region", .) %>% | ||
sub("glu_code", aglu.GLU, .) %>% | ||
sub("land_type", "land_code", .) %>% | ||
sub("SAGE_crop", "GTAP_crop", .) %>% | ||
sub("mirca_crop", "MIRCA_crop", .) %>% | ||
sub("use_sector", "GTAP_use", .) -> | ||
names(LDSfiles[[nm]]) | ||
|
||
# Replace numerical GLU code with a concatenation of "GLU" and the | ||
# three-digit code (padded with zeroes as necessary) | ||
if(aglu.GLU %in% names(LDSfiles[[nm]])) { | ||
LDSfiles[[nm]][[aglu.GLU]] <- paste(aglu.GLU, sprintf("%03d", LDSfiles[[nm]][[aglu.GLU]]), sep = aglu.GLU_NAME_DELIMITER) | ||
} | ||
|
||
# From GPK 3/31/17: We don't have Taiwan as an aglu region, because (a) Taiwan was excluded | ||
# from SAGE/HYDE in the aggregation that Yuyu did, and (b) it was excluded from FAOSTAT | ||
# when I queried the data four years ago. Alan Di Vittorio has addressed (a); it's now | ||
# included in the land cover data that's the input to the data system. And, FAOSTAT now | ||
# includes Taiwan in their various databases. However, the FAOSTAT data currently in the | ||
# data system was queried in about 2012, and Taiwan was added in 2014, so it's not actually | ||
# in our level0 data files. As such, we're not yet in a position to add Taiwan as an aglu | ||
# region. We would need to update our FAOSTAT queries, which will come along with more fun | ||
# because they've certainly changed country names and quantities (if not the available data | ||
# altogether). The steps performed here basically re-map the ISO code of Taiwan back to China, | ||
# and aggregate anything that needs aggregation (e.g., quantity variables like land cover | ||
# totals, but not characteristic variables like vegetative carbon densities). | ||
|
||
# Re-set Taiwan to mainland China, as the current version of AgLU | ||
# (and pre-2015 versions of FAOSTAT) doesn't disaggregate Taiwan | ||
if("iso" %in% names(LDSfiles[[nm]])) { | ||
d <- LDSfiles[[nm]] | ||
if(nm != "Ref_veg_carbon_Mg_per_ha") { | ||
at <- attributes(d) | ||
d %>% | ||
# group by everything EXCEPT for value and sum up | ||
dplyr::group_by_at(dplyr::vars(-value)) %>% | ||
summarise(value = sum(value)) %>% | ||
ungroup() %>% | ||
# summarise() produces a new tibble, but we don't want to lose file info | ||
same_attributes_as(d) %>% | ||
add_comments("Since 2015 BY update, data available for Taiwan as an agricultural region.")-> | ||
LDSfiles[[nm]] | ||
} | ||
# Drop Taiwan from the carbon contents | ||
if(nm == "Ref_veg_carbon_Mg_per_ha") { | ||
d %>% | ||
add_comments("Since 2015 BY update, data available for Taiwan as an agricultural region.") -> | ||
LDSfiles[[nm]] | ||
} | ||
} | ||
} | ||
|
||
# Add necessary legacy and precursor information and assign to environment | ||
for(nm in namelist) { | ||
legacy_name <- paste0("L100.", nm) | ||
LDSfiles[[nm]] %>% | ||
add_legacy_name(legacy_name) %>% | ||
add_precursors(paste0(dirname, nm)) -> | ||
df | ||
assign(legacy_name, df) | ||
} | ||
|
||
# The production and harvested area tables have values <1 clipped, resulting | ||
# in some country/glu/crops present in one but not the other. For now these will | ||
# simply be dropped; in the future, we may want to add a digit of rounding in the lds | ||
L100.LDS_ag_HA_ha %>% | ||
semi_join(L100.LDS_ag_prod_t, by = c("iso", aglu.GLU, "GTAP_crop")) -> | ||
L100.LDS_ag_HA_ha | ||
L100.LDS_ag_prod_t %>% | ||
semi_join(L100.LDS_ag_HA_ha, by = c("iso", aglu.GLU, "GTAP_crop")) -> | ||
L100.LDS_ag_prod_t | ||
|
||
## 9/30/2019 modification (gpk, kbn) | ||
# Taiwan is now included but has some discrepancies between FAOSTAT and Monfreda that cause issues in one of the | ||
# land use regions. Specifically, Flax Fiber and Tow has about 1000 ha in FAOSTAT, and 1 in Monfreda, which is assigned | ||
# to the smaller land use region. Also, wheat harvested area has a significant dip in the years around 2000; the FAOSTAT | ||
# estimate in the trough years is ~50 whereas otherwise it is about 1000. Monfreda allocates 100% of this area to the smaller | ||
# land use region. The steps below insert quantities for harvested area and production... | ||
# - Flax fiber and tow: assign a value to GLU103 of similar magnitude to the data in FAOSTAT | ||
# - Wheat: re-assign the production from GLU078 to GLU103 | ||
|
||
|
||
#1. Adjustment for wheat ---- | ||
#a. Get Taiwan's data for GLU078 for wheat for production, harvested area and MIRCA | ||
GLUDataforWheat = subset(L100.LDS_ag_HA_ha,GTAP_crop=='Wheat'& GLU=='GLU078'& iso=="twn") | ||
Value = GLUDataforWheat$value | ||
|
||
GLUProdDataforWheat = subset(L100.LDS_ag_prod_t,GTAP_crop=='Wheat'& GLU=='GLU078'& iso=="twn") | ||
ProdValue=GLUProdDataforWheat$value | ||
|
||
GLUIRRDataforWheat=subset(L100.MIRCA_rfdHA_ha,MIRCA_crop==1 & GLU=='GLU078'& iso=="twn") | ||
irrValue=GLUIRRDataforWheat$value | ||
#b. Transfer data into GLU103, assign a small seed value to GLU078 to avoid null values.MIRCA also bifurcates arable land by irrigated and rainfed. | ||
#We have adjusted both below. | ||
if (Value > 0){ | ||
|
||
L100.LDS_ag_HA_ha<-add_row(L100.LDS_ag_HA_ha,iso="twn",GLU="GLU103",GTAP_crop='Wheat',value=Value) | ||
L100.LDS_ag_HA_ha<-L100.LDS_ag_HA_ha[!(L100.LDS_ag_HA_ha$iso =="twn" & L100.LDS_ag_HA_ha$GLU=="GLU078" & L100.LDS_ag_HA_ha$GTAP_crop=="Wheat"),] | ||
L100.LDS_ag_HA_ha<-add_row(L100.LDS_ag_HA_ha,iso="twn",GLU="GLU078",GTAP_crop='Wheat',value=1) | ||
|
||
L100.LDS_ag_prod_t<-add_row(L100.LDS_ag_prod_t,iso="twn",GLU="GLU103",GTAP_crop='Wheat',value=Value) | ||
L100.LDS_ag_prod_t<-L100.LDS_ag_prod_t[!(L100.LDS_ag_prod_t$iso =="twn" & L100.LDS_ag_prod_t$GLU=="GLU078" & L100.LDS_ag_prod_t$GTAP_crop=="Wheat"),] | ||
L100.LDS_ag_prod_t<-add_row(L100.LDS_ag_prod_t,iso="twn",GLU="GLU078",GTAP_crop='Wheat',value=1) | ||
|
||
L100.MIRCA_rfdHA_ha<-add_row(L100.MIRCA_rfdHA_ha,iso="twn",GLU="GLU103",MIRCA_crop=1,value=Value) | ||
L100.MIRCA_rfdHA_ha<-L100.MIRCA_rfdHA_ha[!(L100.MIRCA_rfdHA_ha$iso =="twn" & L100.MIRCA_rfdHA_ha$GLU=="GLU078" & L100.MIRCA_rfdHA_ha$MIRCA_crop==1),] | ||
L100.MIRCA_rfdHA_ha<-add_row(L100.MIRCA_rfdHA_ha,iso="twn",GLU="GLU078",MIRCA_crop=1,value=1) | ||
|
||
L100.MIRCA_irrHA_ha<-add_row(L100.MIRCA_irrHA_ha,iso="twn",GLU="GLU078",MIRCA_crop=1,value=0) | ||
|
||
} | ||
|
||
#2. Adjustment for Flax ---- | ||
#Add rows for production and harvested area for GLU103 with values that are commensurate with FAOSTAT. | ||
GLUDataforFlax = subset(L100.LDS_ag_HA_ha,GTAP_crop=='FlaxFibr_Tow'& GLU=='GLU078'& iso=="twn") | ||
Value = GLUDataforFlax$value | ||
|
||
if (Value > 0){ | ||
|
||
L100.LDS_ag_HA_ha<-add_row(L100.LDS_ag_HA_ha,iso="twn",GLU="GLU103",GTAP_crop='FlaxFibr_Tow',value=2000) | ||
L100.LDS_ag_prod_t<-add_row(L100.LDS_ag_prod_t,iso="twn",GLU="GLU103",GTAP_crop='FlaxFibr_Tow',value=1000) | ||
} | ||
|
||
#3. Adjustment for Rapeseed and Barley ---- | ||
# Move all harvested area and production from GLU078 to GLU103 | ||
L100.LDS_ag_HA_ha$GLU[L100.LDS_ag_HA_ha$iso == "twn" & | ||
L100.LDS_ag_HA_ha$GTAP_crop == "Rapeseed"] <- "GLU103" | ||
L100.LDS_ag_prod_t$GLU[L100.LDS_ag_prod_t$iso == "twn" & | ||
L100.LDS_ag_prod_t$GTAP_crop == "Rapeseed"] <- "GLU103" | ||
|
||
L100.LDS_ag_HA_ha$GLU[L100.LDS_ag_HA_ha$iso == "twn" & | ||
L100.LDS_ag_HA_ha$GTAP_crop == "Barley"] <- "GLU103" | ||
L100.LDS_ag_prod_t$GLU[L100.LDS_ag_prod_t$iso == "twn" & | ||
L100.LDS_ag_prod_t$GTAP_crop == "Barley"] <- "GLU103" | ||
|
||
#4. Adjustment for Soybean (production in the 1970's was >100x the production in ~2000; using the 2000-era GLU shares leads to too much land required in GLU078) ---- | ||
# Soybean: move nearly all harvested area and production from GLU078 to GLU103, by setting the production and harvested area in GLU078 to a nominal value. | ||
L100.LDS_ag_HA_ha$value[L100.LDS_ag_HA_ha$iso == "twn" & | ||
L100.LDS_ag_HA_ha$GTAP_crop == "Soybeans" & | ||
L100.LDS_ag_HA_ha$GLU == "GLU078"] <- 1 | ||
L100.LDS_ag_prod_t$value[L100.LDS_ag_prod_t$iso == "twn" & | ||
L100.LDS_ag_prod_t$GTAP_crop == "Soybeans" & | ||
L100.LDS_ag_HA_ha$GLU == "GLU078"] <- 1 | ||
|
||
#5. Adjustment for Sweet potatoes (production in the 1970's was >20x the production in ~2000. GLU-wise allocation from ~2000 causes issues in GLU078 ---- | ||
L100.LDS_ag_HA_ha$value[L100.LDS_ag_HA_ha$iso == "twn" & | ||
L100.LDS_ag_HA_ha$GTAP_crop == "SweetPotato" & | ||
L100.LDS_ag_HA_ha$GLU == "GLU078"] <- 1 | ||
L100.LDS_ag_prod_t$value[L100.LDS_ag_prod_t$iso == "twn" & | ||
L100.LDS_ag_prod_t$GTAP_crop == "SweetPotato" & | ||
L100.LDS_ag_HA_ha$GLU == "GLU078"] <- 1 | ||
|
||
#6. Adjustment for GrndntWShll | ||
L100.LDS_ag_HA_ha$value[L100.LDS_ag_HA_ha$iso == "twn" & | ||
L100.LDS_ag_HA_ha$GTAP_crop == "GrndntWShll" & | ||
L100.LDS_ag_HA_ha$GLU == "GLU078"] <- 1 | ||
L100.LDS_ag_prod_t$value[L100.LDS_ag_prod_t$iso == "twn" & | ||
L100.LDS_ag_prod_t$GTAP_crop == "GrndntWShll" & | ||
L100.LDS_ag_HA_ha$GLU == "GLU078"] <- 1 | ||
#6. Adjustment for VgtbFrshNES | ||
L100.LDS_ag_HA_ha$value[L100.LDS_ag_HA_ha$iso == "twn" & | ||
L100.LDS_ag_HA_ha$GTAP_crop == "VgtbFrshNES" & | ||
L100.LDS_ag_HA_ha$GLU == "GLU078"] <- 1 | ||
L100.LDS_ag_prod_t$value[L100.LDS_ag_prod_t$iso == "twn" & | ||
L100.LDS_ag_prod_t$GTAP_crop == "VgtbFrshNES" & | ||
L100.LDS_ag_HA_ha$GLU == "GLU078"] <- 1 | ||
|
||
|
||
#6. Adjustment for small yield crops in small region ---- | ||
# yield too small: GTAP_crop == "FrgProdNES", GLU %in% c("GLU049", "GLU021"), iso == "pol" | ||
# adjust using average yield in the region doesn't help | ||
# simply filter out both land and prod | ||
|
||
L100.LDS_ag_prod_t %>% filter(!(iso == "pol" & GTAP_crop == "FrgProdNES" & GLU %in% c("GLU049", "GLU021"))) -> | ||
L100.LDS_ag_prod_t | ||
L100.LDS_ag_HA_ha %>% filter(!(iso == "pol" & GTAP_crop == "FrgProdNES" & GLU %in% c("GLU049", "GLU021"))) -> | ||
L100.LDS_ag_HA_ha | ||
|
||
|
||
# And we're done | ||
return_data(L100.Land_type_area_ha, | ||
L100.LDS_ag_HA_ha, | ||
L100.LDS_ag_prod_t, | ||
L100.LDS_value_milUSD, | ||
L100.MIRCA_irrHA_ha, | ||
L100.MIRCA_rfdHA_ha, | ||
L100.Mueller_yield_levels, | ||
L100.Ref_veg_carbon_Mg_per_ha, | ||
L100.Water_footprint_m3) | ||
} else { | ||
stop("Unknown command") | ||
} | ||
} |
Oops, something went wrong.