Pull request #282: enl/feature/chunk-rename

Merge in JGCRI/gcam-core from enl/feature/chunk-rename to master Squashed commit of the following: commit 141cfdd2bbe03d38f421280ca273a609a8cd9cd4 Author: enlochner <[email protected]> Date: Sun Jun 4 16:13:09 2023 -0500 Remove successive mutates commit 39924b6d553cfb0729f1039616689b4f8f0935ce Author: enlochner <[email protected]> Date: Sun Jun 4 15:23:50 2023 -0500 Rename macro chunks commit 2a2158fc47f6114a3e713bfc2f72e632cf4502b0 Merge: cb5b30a31 2b55555 Author: enlochner <[email protected]> Date: Sun Jun 4 14:57:03 2023 -0500 Merge branch 'master' into enl/feature/chunk-rename commit cb5b30a312a319131c69e32213cd10610e28a223 Author: enlochner <[email protected]> Date: Sun Jun 4 14:46:24 2023 -0500 Add export line to script commit 89656b4206be23a31fff8ba88f0c66816e535141 Author: enlochner <[email protected]> Date: Sun Jun 4 11:33:20 2023 -0500 Rename gcamdata chunks commit beac6e8745667a04de25e265ab7349954a963134 Merge: d1ec8dc4d bdfb2c8 Author: enlochner <[email protected]> Date: Sat Jun 3 15:29:34 2023 -0500 Merge branch 'master' into enl/feature/chunk-rename commit d1ec8dc4d0f5fea7df2d1387cdf57f8fe8d3771e Author: enlochner <[email protected]> Date: Mon May 8 13:30:24 2023 -0500 Small updates to chunk rename function commit ba6cb95ec3af14986a92e399a9c4a39ce2a83769 Author: enlochner <[email protected]> Date: Sat May 6 16:32:07 2023 -0500 Add chunk rename function to data system commit a7e4cd5e9997989835dbe646a6ee43a9c72492da Author: enlochner <[email protected]> Date: Sat May 6 15:22:32 2023 -0500 Add test that checks if chunks need to be renamed commit 21848d08f4f04b869c208e8b398f358ba778d1fd Author: enlochner <[email protected]> Date: Thu Jan 5 12:29:51 2023 -0600 Update GCAM_DATA_MAP commit 885f55cb8850337cdda99689a5d23830046313a5 Author: enlochner <[email protected]> Date: Thu Jan 5 09:26:32 2023 -0600 gcamdata changes to get timeshift test to pass commit eec7fc3660775d409f1c7d250223126beb476ed9 Author: enlochner <[email protected]> Date: Tue Nov 1 13:21:58 2022 -0500 Updates to get tests to pass including removing consecutive mutates commit 6b3c93726cfbe55315f72cceb77681940fda909a Author: enlochner <[email protected]> Date: Wed Oct 26 11:35:16 2022 -0500 Fix capitalization errors in constants commit ea8c28f53ecc288631a743aa49a2aa10b7b733fe Author: enlochner <[email protected]> Date: Mon Oct 24 22:08:34 2022 -0500 Update xml script module names commit 4d69d080e044c785b59c654519e1d0055a5de8a4 Author: enlochner <[email protected]> Date: Mon Oct 24 21:02:49 2022 -0500 Rename xml chunks commit ea4fad8398da5d9bb29dfd712879d53d9b2616d9 Author: enlochner <[email protected]> Date: Mon Oct 24 15:25:55 2022 -0500 Update module names for gcamusa chunks commit f969d2d49bf41496da1aaff2de03aa4c8faf2773 Author: enlochner <[email protected]> Date: Mon Oct 24 13:54:21 2022 -0500 Rename gcamusa files with zgcamusa in front, and remove _USA for simplicity commit f804dd092cf27208829071b077185a4437928457 Author: enlochner <[email protected]> Date: Mon Oct 24 13:52:53 2022 -0500 Remove successive mutates from R chunks commit a26cf19dfeefe38fff75abb334bbb521cf5b1870 Author: enlochner <[email protected]> Date: Mon Oct 24 10:08:12 2022 -0500 Remove module_aglu_L151.ag_MIRCA_ctry_C_GLU_irr from exported functions commit 4c0ca482286ec12c9d028680d65b43936af30c5d Author: enlochner <[email protected]> Date: Mon Oct 24 09:59:15 2022 -0500 Documentation updates for file renaming ... and 6 more commits
JGCRI · Jun 4, 2023 · 26cbbbd · 26cbbbd
1 parent 2039103
commit 26cbbbd
Show file tree

Hide file tree

Showing 1,308 changed files with 71,076 additions and 71,124 deletions.
diff --git a/input/gcamdata/NAMESPACE b/input/gcamdata/NAMESPACE
@@ -11,7 +11,6 @@ export(approx_fun_constant)
 export(change_iso_code)
 export(chunk_inputs)
 export(chunk_outputs)
-export(chunk_readylist)
 export(cmp_xml_files)
 export(create_xml)
 export(driver)
@@ -26,7 +25,6 @@ export(info)
 export(inputs_of)
 export(left_join_error_no_match)
 export(load_from_cache)
-export(module_aglu_LB151.ag_MIRCA_ctry_C_GLU_irr)
 export(outputs_of)
 export(parse_csv_header)
 export(protect_integer_cols)

diff --git a/input/gcamdata/R/admin.R b/input/gcamdata/R/admin.R
diff --git a/input/gcamdata/R/constants.R b/input/gcamdata/R/constants.R
@@ -818,7 +818,7 @@ emissions.ZERO_EM_TECH  <- c("electricity", "Electric", "BEV","FCEV","district h
 emissions.HIGH_EM_FACTOR_THRESHOLD <- 1000  # All emission factors above this threshold are replaced with the global median of emission factors.
 emissions.GFED_NODATA <- c("ala","bes","blm","ggy","jey","maf","xad","xko","xnc")  # GFED LULC dataset does not contaian data for these isos. These get filtered out so we can use the left_join_error_no_match.
 emissions.UNMGD_LAND_AVG_YRS <- 30 # Years for climatological average for the GFED LULC data.
-emissions.CEDS_scale    <- "usa" # iso's that will be scaled to CEDS emissions
+emissions.CEDS_SCALE    <- "usa" # iso's that will be scaled to CEDS emissions
 emissions.CH4.GWP.AR4 <- 25 # used for EPA non-CO2 scaling, the 2019 EPA non-CO2 report uses AR4 GWPs
 emissions.N2O.GWP.AR4 <- 298 # used for EPA non-CO2 scaling, the 2019 EPA non-CO2 report uses AR4 GWPs
 
@@ -1066,7 +1066,7 @@ gcamusa.TRN_MARKAL_EMISSION_YEARS <- seq(2005,2050, 5)
 gcamusa.TRN_EMISSION_YEARS <- seq(2005,2100, 5)
 
 # emission factor timestep
-gcamusa.TRN_EF_timestep <- 5
+gcamusa.TRN_EF_TIMESTEP <- 5
 
 # GCAM-USA StubTranTech missing lifetime
 gcamusa.STUBTRANTECH_LIFETIME_2045V <- 25 # lifetime for missing vehicles vintages 2045 and earlier
@@ -1084,10 +1084,10 @@ gcamusa.INTL_SHIP_PM_RATIO <- 0.92 # this is the ratio of PM2.5 to PM10 for inte
 gcamusa.IND_PROC_EM_NEI_GCAM_SECTORS <- c("industry_processes", "solvents")
 gcamusa.URB_PROC_EM_NEI_GCAM_SECTORS <- c("landfills", "wastewater", "waste_incineration")
 gcamusa.CEMENT_NEI_GCAM_SECTORS <- c("cement")
-gcamusa.NONGHG_PROC_SECTORS.missing_pollutants <- c("PM2.5", "PM10", "NH3")
-gcamusa.NONGHG_PROC_SECTORS.missing_subsectors <- c("wastewater")
-gcamusa.NONGHG_PROC_SECTORS.gdp_max_reduction <- 30
-gcamusa.NONGHG_PROC_SECTORS.gdp_steepness <- 3.5
+gcamusa.NONGHG_PROC_SECTORS.MISSING_POLLUTANTS <- c("PM2.5", "PM10", "NH3")
+gcamusa.NONGHG_PROC_SECTORS.MISSING_SUBSECTORS <- c("wastewater")
+gcamusa.NONGHG_PROC_SECTORS.GDP_MAX_REDUCTION <- 30
+gcamusa.NONGHG_PROC_SECTORS.GDP_STEEPNESS <- 3.5
 
 gcamusa.PROC_DEFAULT_SECTOR <- "industrial processes"
 gcamusa.PROC_DEFAULT_S_T <- "other industrial processes"

diff --git a/input/gcamdata/R/module-helpers.R b/input/gcamdata/R/module-helpers.R
@@ -1166,3 +1166,94 @@ compute_BC_OC_elc <- function(df, BC_OC_assumptions) {
   return (df)
 
 }
+
+
+#' join.gdp.ts
+#'
+#' Join past GDP time series to future.
+#'
+#' When we have to join two GDP time series, we usually find that they don't
+#' match up at year of overlap (the "base year").  What we do in these cases is
+#' we compute, for the later time series, ratios of GDPs in the future years to
+#' those in the base year.  We then multiply the future ratios by the past base
+#' year value.  That future time series can then be grafted onto the past
+#' without leaving a seam.
+#'
+#' In practice, the past is often a single time series, while the future is
+#' often a collection of scenarios.  Therefore, we assume that the past time
+#' series has no scenario column.  If the future does not have a scenario
+#' column, it is given a dummy one, which is dropped before the new table is
+#' returned.  Note that we look for lower-case 'scenario' for this.
+#'
+#' The base year is calculated automatically.  It is the maximum of the years
+#' that overlap between the two data sets.
+#'
+#' We also have to know how to group the data for calculating the gdp ratios.
+#' Normally this will be either by country ('iso') or by GCAM region
+#' ('GCAM_region_ID').  The choice of which is passed in as the 'grouping'
+#' argument.
+#'
+#' Finally, although we have discussed this function in terms of joining two GDP
+#' time series, in the future time series we use only the ratios of GDP to base
+#' year GDP.  Therefore, any time series with the correct ratios will work.  For
+#' example, if we have a time series of growth rates, we can convert those to
+#' ratios using \code{\link[base]{cumprod}} and pass those ratios as the future
+#' time series.  For similar reasons, even if the two time series have different
+#' units (e.g., different dollar-years or PPP vs. MER), they can still be
+#' joined.  The units of the output time series will be the same as the units of
+#' \code{past}.
+#'
+#' @param past Tibble with the past time series (year, gdp, and grouping).
+#' @param future Tibble with the future data (year, gdp, scenario, and
+#' grouping).
+#' @param grouping Name of the grouping column (generally either 'iso' or
+#' 'GCAM_region_ID', but could be anything
+#' @return Time series with the past and future joined as described in details.
+join.gdp.ts <- function(past, future, grouping) {
+
+  year <- gdp <- base.gdp <- gdp.ratio <- . <- scenario <-
+    NULL                            # silence notes on package check.
+
+  if(! 'scenario' %in% names(future)) {
+    ## This saves us having to make a bunch of exceptions below when we
+    ## include 'scenario' among the columns to join by.
+    future$scenario <- 'scen'
+    drop.scenario <- TRUE
+  }
+  else {
+    drop.scenario <- FALSE
+  }
+
+  ## Find the base year
+  base.year <- max(intersect(past$year, future$year))
+  assert_that(is.finite(base.year))
+
+  ## Base year gdp from the future dataset
+  baseyear.future.gdp <- filter(future, year == base.year) %>%
+    rename(base.gdp = gdp) %>%
+    select(-year)
+
+  gdp.future.ratio <- filter(future, year > base.year) %>%
+    left_join_error_no_match(baseyear.future.gdp, by = c('scenario', grouping)) %>%
+    mutate(gdp.ratio = gdp / base.gdp) %>%
+    select('scenario', grouping, 'year', 'gdp.ratio')
+
+  ## add the scenario column to the past
+  gdp.past <- tidyr::crossing(past, scenario = unique(gdp.future.ratio[['scenario']]))
+  baseyear.past.gdp <- filter(gdp.past, year == base.year) %>%
+    rename(base.gdp = gdp) %>%
+    select(-year)
+
+  rslt <- left_join(baseyear.past.gdp, gdp.future.ratio,
+                    by = c('scenario', grouping)) %>%
+    mutate(gdp = base.gdp * gdp.ratio) %>%
+    select('scenario', grouping, 'year', 'gdp') %>%
+    bind_rows(gdp.past, .)
+
+  if(drop.scenario) {
+    select(rslt, -scenario)
+  }
+  else {
+    rslt
+  }
+}
diff --git a/input/gcamdata/R/zaglu_L100.0_LDS_preprocessing.R b/input/gcamdata/R/zaglu_L100.0_LDS_preprocessing.R
@@ -0,0 +1,254 @@
+# Copyright 2019 Battelle Memorial Institute; see the LICENSE file.
+
+#' module_aglu_L100.0_LDS_preprocessing
+#'
+#' Read in and process LDS (Land Data System) files.
+#'
+#' @param command API command to execute
+#' @param ... other optional parameters, depending on command
+#' @return Depends on \code{command}: either a vector of required inputs,
+#' a vector of output names, or (if \code{command} is "MAKE") all
+#' the generated outputs: \code{object}. The corresponding file in the
+#' original data system was \code{LA100.0_LDS_preprocessing.R} (aglu level1).
+#' @details Read in the various LDS datasets; regularize their column names and
+#' GLU (Geographic Land Unit) data; change Taiwan ISO to that of mainland China; make LDS_ag_HA_ha and
+#' LDS_ag_prod_t tables consistent. See Di Vittorio, A., P. Kyle, and W. Collins. 2016.
+#' What are the effects of Agro-Ecological Zones and land use region boundaries on
+#' land resource projection using the Global Change Assessment Model? Environmental
+#' Modelling & Software 85, 246-265. http://dx.doi.org/10.1016/j.envsoft.2016.08.016.
+#' @importFrom assertthat assert_that
+#' @importFrom dplyr filter mutate semi_join summarise
+#' @author BBL March 2017
+module_aglu_L100.0_LDS_preprocessing <- function(command, ...) {
+
+  namelist <- c("Land_type_area_ha",
+                "LDS_ag_HA_ha",
+                "LDS_ag_prod_t",
+                "LDS_value_milUSD",
+                "MIRCA_irrHA_ha",
+                "MIRCA_rfdHA_ha",
+                "Mueller_yield_levels",
+                "Ref_veg_carbon_Mg_per_ha",
+                "Water_footprint_m3")
+  dirname <- "aglu/LDS/"
+
+  if(command == driver.DECLARE_INPUTS) {
+    x <- paste0(dirname, namelist)
+    names(x) <- rep("FILE", length(x))
+    return(x)
+  } else if(command == driver.DECLARE_OUTPUTS) {
+    return(paste0("L100.", namelist))
+  } else if(command == driver.MAKE) {
+
+    . <- value <- iso <- GTAP_crop <- GLU <- MIRCA_crop <- NULL             # silence package check.
+    L100.Land_type_area_ha <- L100.LDS_value_milUSD <- L100.MIRCA_irrHA_ha <-
+        L100.MIRCA_rfdHA_ha <- L100.Mueller_yield_levels <-
+        L100.Ref_veg_carbon_Mg_per_ha <- L100.Water_footprint_m3 <- NULL
+
+    all_data <- list(...)[[1]]
+
+    # Load required inputs ----
+    LDSfiles <- list()
+    for(nm in namelist) {
+      LDSfiles[[nm]] <- get_data(all_data, paste0(dirname, nm))
+    }
+
+    # Go through all data frames and...
+    for(nm in namelist) {
+
+      # Regularize data frame names
+      names(LDSfiles[[nm]]) %>%
+        sub("ctry_iso", "iso", .) %>%
+        sub("reglr_iso", "GTAP_region", .) %>%
+        sub("glu_code", aglu.GLU, .) %>%
+        sub("land_type", "land_code", .) %>%
+        sub("SAGE_crop", "GTAP_crop", .) %>%
+        sub("mirca_crop", "MIRCA_crop", .) %>%
+        sub("use_sector", "GTAP_use", .) ->
+        names(LDSfiles[[nm]])
+
+      # Replace numerical GLU code with a concatenation of "GLU" and the
+      # three-digit code (padded with zeroes as necessary)
+      if(aglu.GLU %in% names(LDSfiles[[nm]])) {
+        LDSfiles[[nm]][[aglu.GLU]] <- paste(aglu.GLU, sprintf("%03d", LDSfiles[[nm]][[aglu.GLU]]), sep = aglu.GLU_NAME_DELIMITER)
+      }
+
+      # From GPK 3/31/17: We don't have Taiwan as an aglu region, because (a) Taiwan was excluded
+      # from SAGE/HYDE in the aggregation that Yuyu did, and (b) it was excluded from FAOSTAT
+      # when I queried the data four years ago. Alan Di Vittorio has addressed (a); it's now
+      # included in the land cover data that's the input to the data system. And, FAOSTAT now
+      # includes Taiwan in their various databases. However, the FAOSTAT data currently in the
+      # data system was queried in about 2012, and Taiwan was added in 2014, so it's not actually
+      # in our level0 data files. As such, we're not yet in a position to add Taiwan as an aglu
+      # region. We would need to update our FAOSTAT queries, which will come along with more fun
+      # because they've certainly changed country names and quantities (if not the available data
+      # altogether). The steps performed here basically re-map the ISO code of Taiwan back to China,
+      # and aggregate anything that needs aggregation (e.g., quantity variables like land cover
+      # totals, but not characteristic variables like vegetative carbon densities).
+
+      # Re-set Taiwan to mainland China, as the current version of AgLU
+      # (and pre-2015 versions of FAOSTAT) doesn't disaggregate Taiwan
+      if("iso" %in% names(LDSfiles[[nm]])) {
+        d <- LDSfiles[[nm]]
+        if(nm != "Ref_veg_carbon_Mg_per_ha") {
+          at <- attributes(d)
+          d %>%
+            # group by everything EXCEPT for value and sum up
+            dplyr::group_by_at(dplyr::vars(-value)) %>%
+            summarise(value = sum(value)) %>%
+            ungroup() %>%
+            # summarise() produces a new tibble, but we don't want to lose file info
+            same_attributes_as(d) %>%
+            add_comments("Since 2015 BY update, data available for Taiwan as an agricultural region.")->
+            LDSfiles[[nm]]
+        }
+        # Drop Taiwan from the carbon contents
+        if(nm == "Ref_veg_carbon_Mg_per_ha") {
+          d %>%
+            add_comments("Since 2015 BY update, data available for Taiwan as an agricultural region.") ->
+            LDSfiles[[nm]]
+        }
+      }
+    }
+
+    # Add necessary legacy and precursor information and assign to environment
+    for(nm in namelist) {
+      legacy_name <- paste0("L100.", nm)
+      LDSfiles[[nm]] %>%
+        add_legacy_name(legacy_name) %>%
+        add_precursors(paste0(dirname, nm)) ->
+        df
+      assign(legacy_name, df)
+    }
+
+    # The production and harvested area tables have values <1 clipped, resulting
+    # in some country/glu/crops present in one but not the other. For now these will
+    # simply be dropped; in the future, we may want to add a digit of rounding in the lds
+    L100.LDS_ag_HA_ha %>%
+      semi_join(L100.LDS_ag_prod_t, by = c("iso", aglu.GLU, "GTAP_crop")) ->
+      L100.LDS_ag_HA_ha
+    L100.LDS_ag_prod_t %>%
+      semi_join(L100.LDS_ag_HA_ha, by = c("iso", aglu.GLU, "GTAP_crop")) ->
+      L100.LDS_ag_prod_t
+
+    ## 9/30/2019 modification (gpk, kbn)
+    # Taiwan is now included but has some discrepancies between FAOSTAT and Monfreda that cause issues in one of the
+    # land use regions. Specifically, Flax Fiber and Tow has about 1000 ha in FAOSTAT, and 1 in Monfreda, which is assigned
+    # to the smaller land use region. Also, wheat harvested area has a significant dip in the years around 2000; the FAOSTAT
+    # estimate in the trough years is ~50 whereas otherwise it is about 1000. Monfreda allocates 100% of this area to the smaller
+    # land use region. The steps below insert quantities for harvested area and production...
+    #  - Flax fiber and tow: assign a value to GLU103 of similar magnitude to the data in FAOSTAT
+    #  - Wheat: re-assign the production from GLU078 to GLU103
+
+
+    #1. Adjustment for wheat ----
+    #a. Get Taiwan's data for GLU078 for wheat for production, harvested area and MIRCA
+    GLUDataforWheat = subset(L100.LDS_ag_HA_ha,GTAP_crop=='Wheat'& GLU=='GLU078'& iso=="twn")
+    Value = GLUDataforWheat$value
+
+    GLUProdDataforWheat = subset(L100.LDS_ag_prod_t,GTAP_crop=='Wheat'& GLU=='GLU078'& iso=="twn")
+    ProdValue=GLUProdDataforWheat$value
+
+    GLUIRRDataforWheat=subset(L100.MIRCA_rfdHA_ha,MIRCA_crop==1 & GLU=='GLU078'& iso=="twn")
+    irrValue=GLUIRRDataforWheat$value
+    #b. Transfer data into GLU103, assign a small seed value to GLU078 to avoid null values.MIRCA also bifurcates arable land by irrigated and rainfed.
+    #We have adjusted both below.
+    if (Value > 0){
+
+      L100.LDS_ag_HA_ha<-add_row(L100.LDS_ag_HA_ha,iso="twn",GLU="GLU103",GTAP_crop='Wheat',value=Value)
+      L100.LDS_ag_HA_ha<-L100.LDS_ag_HA_ha[!(L100.LDS_ag_HA_ha$iso =="twn" & L100.LDS_ag_HA_ha$GLU=="GLU078" & L100.LDS_ag_HA_ha$GTAP_crop=="Wheat"),]
+      L100.LDS_ag_HA_ha<-add_row(L100.LDS_ag_HA_ha,iso="twn",GLU="GLU078",GTAP_crop='Wheat',value=1)
+
+      L100.LDS_ag_prod_t<-add_row(L100.LDS_ag_prod_t,iso="twn",GLU="GLU103",GTAP_crop='Wheat',value=Value)
+      L100.LDS_ag_prod_t<-L100.LDS_ag_prod_t[!(L100.LDS_ag_prod_t$iso =="twn" & L100.LDS_ag_prod_t$GLU=="GLU078" & L100.LDS_ag_prod_t$GTAP_crop=="Wheat"),]
+      L100.LDS_ag_prod_t<-add_row(L100.LDS_ag_prod_t,iso="twn",GLU="GLU078",GTAP_crop='Wheat',value=1)
+
+      L100.MIRCA_rfdHA_ha<-add_row(L100.MIRCA_rfdHA_ha,iso="twn",GLU="GLU103",MIRCA_crop=1,value=Value)
+      L100.MIRCA_rfdHA_ha<-L100.MIRCA_rfdHA_ha[!(L100.MIRCA_rfdHA_ha$iso =="twn" & L100.MIRCA_rfdHA_ha$GLU=="GLU078" & L100.MIRCA_rfdHA_ha$MIRCA_crop==1),]
+      L100.MIRCA_rfdHA_ha<-add_row(L100.MIRCA_rfdHA_ha,iso="twn",GLU="GLU078",MIRCA_crop=1,value=1)
+
+      L100.MIRCA_irrHA_ha<-add_row(L100.MIRCA_irrHA_ha,iso="twn",GLU="GLU078",MIRCA_crop=1,value=0)
+
+    }
+
+    #2. Adjustment for Flax ----
+    #Add rows for production and harvested area for GLU103 with values that are commensurate with FAOSTAT.
+    GLUDataforFlax = subset(L100.LDS_ag_HA_ha,GTAP_crop=='FlaxFibr_Tow'& GLU=='GLU078'& iso=="twn")
+    Value = GLUDataforFlax$value
+
+    if (Value > 0){
+
+      L100.LDS_ag_HA_ha<-add_row(L100.LDS_ag_HA_ha,iso="twn",GLU="GLU103",GTAP_crop='FlaxFibr_Tow',value=2000)
+      L100.LDS_ag_prod_t<-add_row(L100.LDS_ag_prod_t,iso="twn",GLU="GLU103",GTAP_crop='FlaxFibr_Tow',value=1000)
+    }
+
+    #3. Adjustment for Rapeseed and Barley ----
+    # Move all harvested area and production from GLU078 to GLU103
+    L100.LDS_ag_HA_ha$GLU[L100.LDS_ag_HA_ha$iso == "twn" &
+                            L100.LDS_ag_HA_ha$GTAP_crop == "Rapeseed"] <- "GLU103"
+    L100.LDS_ag_prod_t$GLU[L100.LDS_ag_prod_t$iso == "twn" &
+                             L100.LDS_ag_prod_t$GTAP_crop == "Rapeseed"] <- "GLU103"
+
+    L100.LDS_ag_HA_ha$GLU[L100.LDS_ag_HA_ha$iso == "twn" &
+                            L100.LDS_ag_HA_ha$GTAP_crop == "Barley"] <- "GLU103"
+    L100.LDS_ag_prod_t$GLU[L100.LDS_ag_prod_t$iso == "twn" &
+                             L100.LDS_ag_prod_t$GTAP_crop == "Barley"] <- "GLU103"
+
+    #4. Adjustment for Soybean (production in the 1970's was >100x the production in ~2000; using the 2000-era GLU shares leads to too much land required in GLU078) ----
+    # Soybean: move nearly all harvested area and production from GLU078 to GLU103, by setting the production and harvested area in GLU078 to a nominal value.
+    L100.LDS_ag_HA_ha$value[L100.LDS_ag_HA_ha$iso == "twn" &
+                          L100.LDS_ag_HA_ha$GTAP_crop == "Soybeans" &
+                          L100.LDS_ag_HA_ha$GLU == "GLU078"] <- 1
+    L100.LDS_ag_prod_t$value[L100.LDS_ag_prod_t$iso == "twn" &
+                           L100.LDS_ag_prod_t$GTAP_crop == "Soybeans" &
+                           L100.LDS_ag_HA_ha$GLU == "GLU078"] <- 1
+
+    #5. Adjustment for Sweet potatoes (production in the 1970's was >20x the production in ~2000. GLU-wise allocation from ~2000 causes issues in GLU078 ----
+    L100.LDS_ag_HA_ha$value[L100.LDS_ag_HA_ha$iso == "twn" &
+                              L100.LDS_ag_HA_ha$GTAP_crop == "SweetPotato" &
+                              L100.LDS_ag_HA_ha$GLU == "GLU078"] <- 1
+    L100.LDS_ag_prod_t$value[L100.LDS_ag_prod_t$iso == "twn" &
+                               L100.LDS_ag_prod_t$GTAP_crop == "SweetPotato" &
+                               L100.LDS_ag_HA_ha$GLU == "GLU078"] <- 1
+
+    #6. Adjustment for GrndntWShll
+    L100.LDS_ag_HA_ha$value[L100.LDS_ag_HA_ha$iso == "twn" &
+                              L100.LDS_ag_HA_ha$GTAP_crop == "GrndntWShll" &
+                              L100.LDS_ag_HA_ha$GLU == "GLU078"] <- 1
+    L100.LDS_ag_prod_t$value[L100.LDS_ag_prod_t$iso == "twn" &
+                               L100.LDS_ag_prod_t$GTAP_crop == "GrndntWShll" &
+                               L100.LDS_ag_HA_ha$GLU == "GLU078"] <- 1
+    #6. Adjustment for VgtbFrshNES
+    L100.LDS_ag_HA_ha$value[L100.LDS_ag_HA_ha$iso == "twn" &
+                              L100.LDS_ag_HA_ha$GTAP_crop == "VgtbFrshNES" &
+                              L100.LDS_ag_HA_ha$GLU == "GLU078"] <- 1
+    L100.LDS_ag_prod_t$value[L100.LDS_ag_prod_t$iso == "twn" &
+                               L100.LDS_ag_prod_t$GTAP_crop == "VgtbFrshNES" &
+                               L100.LDS_ag_HA_ha$GLU == "GLU078"] <- 1
+
+
+    #6. Adjustment for small yield crops in small region ----
+    # yield too small: GTAP_crop == "FrgProdNES", GLU %in% c("GLU049", "GLU021"), iso == "pol"
+    # adjust using average yield in the region doesn't help
+    # simply filter out both land and prod
+
+    L100.LDS_ag_prod_t %>% filter(!(iso == "pol" & GTAP_crop == "FrgProdNES" & GLU %in% c("GLU049", "GLU021"))) ->
+      L100.LDS_ag_prod_t
+    L100.LDS_ag_HA_ha %>% filter(!(iso == "pol" & GTAP_crop == "FrgProdNES" & GLU %in% c("GLU049", "GLU021"))) ->
+      L100.LDS_ag_HA_ha
+
+
+    # And we're done
+    return_data(L100.Land_type_area_ha,
+                L100.LDS_ag_HA_ha,
+                L100.LDS_ag_prod_t,
+                L100.LDS_value_milUSD,
+                L100.MIRCA_irrHA_ha,
+                L100.MIRCA_rfdHA_ha,
+                L100.Mueller_yield_levels,
+                L100.Ref_veg_carbon_Mg_per_ha,
+                L100.Water_footprint_m3)
+  } else {
+    stop("Unknown command")
+  }
+}