diff --git a/.Rbuildignore b/.Rbuildignore index bbe2579..7c11360 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -5,3 +5,14 @@ ^docs$ ^pkgdown$ ^\.github$ +^README\.Rmd$ +^LICENSE\.md$ +^doc$ +^Meta$ +^CODE_OF_CONDUCT\.md$ +^CONTRIBUTING\.md$ +^paper$ +^.vscode$ +^citation.cff$ +^vignettes/articles$ +^cran-comments\.md$ diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml new file mode 100644 index 0000000..e050312 --- /dev/null +++ b/.github/workflows/test-coverage.yaml @@ -0,0 +1,61 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + +name: test-coverage.yaml + +permissions: read-all + +jobs: + test-coverage: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::covr, any::xml2 + needs: coverage + + - name: Test coverage + run: | + cov <- covr::package_coverage( + quiet = FALSE, + clean = FALSE, + install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") + ) + covr::to_cobertura(cov) + shell: Rscript {0} + + - uses: codecov/codecov-action@v4 + with: + # Fail if error if not on PR, or if on PR and token is given + fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }} + file: ./cobertura.xml + plugin: noop + disable_search: true + token: ${{ secrets.CODECOV_TOKEN }} + + - name: Show testthat output + if: always() + run: | + ## -------------------------------------------------------------------- + find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash + + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v4 + with: + name: coverage-test-failures + path: ${{ runner.temp }}/package diff --git a/DESCRIPTION b/DESCRIPTION index 3f35f7e..773cf32 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,10 @@ Package: greenAlgoR Title: Compute ecological footprint in R -Version: 0.0.0.9000 +Version: 0.1 Authors@R: person("Adrien", "Taudière", email = "adrien.taudiere@zaclys.net", role = c("aut", "cre", "cph"), comment = c(ORCID = "0000-0003-1088-1182")) Description: This package computes ecological footprint in R (based on [green-algorithms](https://calculator.green-algorithms.org/). - greenAlgoR also made it simple to compute ecological footprint of {[targets](https://github.com/ropensci/targets)} pipelines. + greenAlgoR also made it simple to compute ecological footprint of \{[targets](https://github.com/ropensci/targets)\} pipelines. License: GPL (>= 3) Encoding: UTF-8 Roxygen: list(markdown = TRUE) diff --git a/NAMESPACE b/NAMESPACE index 9063147..570565c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,4 +3,9 @@ export(csv_from_url_ga) export(ga_footprint) export(ga_targets) +export(round_conditionaly) export(session_runtime) +import(benchmarkme) +import(ggplot2) +import(targets) +importFrom(utils,read.csv) diff --git a/NEWS.md b/NEWS.md index 0801202..d228ab6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,4 @@ # greenAlgoR 0.1 -* Initial CRAN submission. +* Initial github submission. +* Creation of function `ga_footprint()` and `ga_targets()` diff --git a/R/ga_footprint.R b/R/ga_footprint.R index 9230c4b..3494500 100644 --- a/R/ga_footprint.R +++ b/R/ga_footprint.R @@ -16,7 +16,9 @@ #' - TDP_per_core: https://raw.githubusercontent.com/GreenAlgorithms/green-algorithms-tool/refs/heads/master/data/v2.2/TDP_cpu.csv #' - power_draw_per_gb: https://onlinelibrary.wiley.com/doi/10.1002/advs.202100707 #' -#' Text from the [green-algorithms](https://github.com/GreenAlgorithms/green-algorithms-tool) website: +#' Description of the algorithm from the +#' [green-algorithms](https://github.com/GreenAlgorithms/green-algorithms-tool) +#' website: #' #' """ #' @@ -41,18 +43,18 @@ #' #' The Carbon Intensity depends on the location and the technologies used #' to produce electricity. But note that the "energy needed" -#' [...] is independent of the location. +#' \[...\] is independent of the location. #' #' """ #' #' @param runtime_h Run time in hours (int). If runtime_h == "session", #' the runtime is compute using the actual R session #' @param location_code (character list of country or region available in ) -#' @param PUE (int) Power usage effectiveness of the server. -#' See https://github.com/GreenAlgorithms/green-algorithms-tool/blob/master/data/v2.2/defaults_PUE.csv -#' for example of values. If you are using your personal computer, set PUE to 1. -#' @param TDP_per_core (int. in Watt, default 12). Find your cpu TDP and your nb of cpu on -#' https://www.techpowerup.com/cpu-specs/ or in +#' @param PUE (int) Power usage effectiveness of the server. See +#' https://github.com/GreenAlgorithms/green-algorithms-tool/blob/master/data/v2.2/defaults_PUE.csv +#' for example of values. If you are using your personal computer, set PUE to 1. +#' @param TDP_per_core (int. in Watt, default 12). Find your cpu TDP and your +#' nb of cpu on https://www.techpowerup.com/cpu-specs/ or in #' http://calculator.green-algorithms.org/ if available. #' Owerwrite by cpu_model param. #' @param n_cores (int, default 1) Number of cores. @@ -94,17 +96,20 @@ #' A dataframe with `location` and `carbonIntensity` #' columns. Set to carbon_intensity_internal if NULL. #' carbon_intensity_internal is set using command line -#' csv_from_url_ga("https://raw.githubusercontent.com/GreenAlgorithms/green-algorithms-tool/refs/heads/master/data/v2.2/CI_aggregated.csv") +#' csv_from_url_ga("https://raw.githubusercontent.com/GreenAlgorithms/ +#' green-algorithms-tool/refs/heads/master/data/v2.2/CI_aggregated.csv") #' @param TDP_cpu (default NULL). Advanced users only. #' A dataframe with `model`, `n_cores` and `TDP_per_core` #' columns. Set to TDP_cpu_internal if NULL. #' TDP_cpu_internal is set using command line -#' csv_from_url_ga("https://raw.githubusercontent.com/GreenAlgorithms/green-algorithms-tool/refs/heads/master/data/v2.2/TDP_cpu.csv") +#' csv_from_url_ga("https://raw.githubusercontent.com/GreenAlgorithms/ +#' green-algorithms-tool/refs/heads/master/data/v2.2/TDP_cpu.csv") #' @param ref_value (default NULL). Advanced users only. #' A dataframe with `variable` and `value` #' columns. Set to ref_value_internal if NULL. #' ref_value_internal is set using command line -#' csv_from_url_ga("https://raw.githubusercontent.com/GreenAlgorithms/green-algorithms-tool/refs/heads/master/data/v2.2/referenceValues.csv") +#' csv_from_url_ga("https://raw.githubusercontent.com/GreenAlgorithms/ +#' green-algorithms-tool/refs/heads/master/data/v2.2/referenceValues.csv") #' #' @return A list of values #' - `runtime_h`: the input run time in hours @@ -158,18 +163,44 @@ #' #' ggplot(res_ga$ref_value, aes(y = variable, x = as.numeric(value), fill = log10(prop_footprint))) + #' geom_col() + -#' geom_col(data = data.frame(variable = "Total", value = res_ga$carbon_footprint_total_gCO2), fill = "grey30") + -#' geom_col(data = data.frame(variable = "Cores", value = res_ga$carbon_footprint_cores), fill = "darkred") + -#' geom_col(data = data.frame(variable = "Memory", value = res_ga$carbon_footprint_memory), fill = "orange") + -#' geom_col(data = data.frame(variable = "Mass storage", value = res_ga$carbon_footprint_storage), fill = "violet") + -#' scale_x_continuous(trans = "log1p") + -#' geom_vline(xintercept = res_ga$carbon_footprint_total_gCO2, col = "grey30", lwd = 1.2) + -#' geom_label(aes(label = round_conditionaly(prop_footprint)), fill = "grey90",position = position_stack(vjust = 1.1)) + -#' labs(title="Carbon footprint of the analysis", -#' subtitle = paste0("(", res_ga$carbon_footprint_total_gCO2, " g CO2",")"), -#' caption = "Please cite Lannelongue et al. 2021 (10.1002/advs.202100707)") + +#' geom_col(data = data.frame( +#' variable = "Total", +#' value = res_ga$carbon_footprint_total_gCO2 +#' ), fill = "grey30") + +#' geom_col(data = data.frame( +#' variable = "Cores", +#' value = res_ga$carbon_footprint_cores +#' ), fill = "darkred") + +#' geom_col(data = data.frame( +#' variable = "Memory", +#' value = res_ga$carbon_footprint_memory +#' ), fill = "orange") + +#' geom_col(data = data.frame( +#' variable = "Mass storage", +#' value = res_ga$carbon_footprint_storage +#' ), fill = "violet") + +#' scale_x_continuous( +#' trans = "log1p", +#' breaks = c(0, 10^c(1:max(log1p(as.numeric(res_ga$ref_value$value))))) +#' ) + +#' geom_vline( +#' xintercept = res_ga$carbon_footprint_total_gCO2, +#' col = "grey30", lwd = 1.2 +#' ) + +#' geom_label(aes(label = round_conditionaly(prop_footprint)), +#' fill = "grey90", position = position_stack(vjust = 1.1) +#' ) + +#' labs( +#' title = "Carbon footprint of the analysis", +#' subtitle = paste0( +#' "(", res_ga$carbon_footprint_total_gCO2, +#' " g CO2", ")" +#' ), +#' caption = "Please cite Lannelongue et al. 2021 (10.1002/advs.202100707)" +#' ) + #' xlab("Carbon footprint (g CO2) in log10") + -#' ylab("Modality") +#' ylab("Modality") + +#' theme(legend.position = "none") ga_footprint <- function(runtime_h = NULL, location_code = "WORLD", PUE = 1.67, @@ -269,7 +300,7 @@ ga_footprint <- function(runtime_h = NULL, if (add_ref_values) { if (is.null(ref_value)) { - ref_value <- ref_value_internal[order(as.numeric(ref_value_internal$value)),] + ref_value <- ref_value_internal[order(as.numeric(ref_value_internal$value)), ] rownames(ref_value) <- NULL } res[["ref_value"]] <- rbind( @@ -291,7 +322,7 @@ ga_footprint <- function(runtime_h = NULL, res[["energy_needed_kWh"]] <- res[["energy_needed_kWh"]] + res[["power_draw_storage_kWh"]] res[["carbon_footprint_total_gCO2"]] <- res[["carbon_footprint_cores"]] + res[["carbon_footprint_memory"]] + res[["carbon_footprint_storage"]] - if (add_ref_values) { + if (add_ref_values) { res[["ref_value"]] <- rbind( c("Total", res$carbon_footprint_total_gCO2, NA), c("Cores", res$carbon_footprint_cores, NA), @@ -308,9 +339,9 @@ ga_footprint <- function(runtime_h = NULL, # to force ggplot to keep row order res[["ref_value"]]$variable <- factor(res[["ref_value"]]$variable, - levels = res[["ref_value"]]$variable ) - -} + levels = res[["ref_value"]]$variable + ) + } return(res) } diff --git a/R/ga_targets.R b/R/ga_targets.R index 3a67d2c..5b4977a 100644 --- a/R/ga_targets.R +++ b/R/ga_targets.R @@ -1,4 +1,4 @@ -#' Compute footprint in grams of CO2 for {targets} pipelines +#' Compute footprint in grams of CO2 for \{targets\} pipelines #' #' @description #' @@ -40,7 +40,10 @@ #' command = Sys.sleep(2), #' description = "Sleep 2 seconds" #' ), -#' tar_target(x, writeLines(targets::tar_option_get("error"), "error.txt")) +#' tar_target(x, writeLines( +#' targets::tar_option_get("error"), +#' "error.txt" +#' )) #' ) #' }, #' ask = FALSE @@ -59,14 +62,41 @@ #' add_storage_estimation = TRUE #' ) #' -#' ggplot(res_gat$ref_value, aes(y = reorder(variable, as.numeric(value)), x = as.numeric(value), fill = log10(prop_footprint))) + +#' ggplot(res_gat$ref_value, aes( +#' y = reorder(variable, as.numeric(value)), +#' x = as.numeric(value), fill = log10(prop_footprint) +#' )) + #' geom_col() + -#' geom_col(data = data.frame(variable = "Total ", value = res_gat$carbon_footprint_gCO2), fill = "grey30") + -#' geom_col(data = data.frame(variable = "Cores", value = res_gat$carbon_intensity * res_gat$power_draw_for_cores_kWh), fill = "darkred") + -#' geom_col(data = data.frame(variable = "Memory", value = res_gat$carbon_intensity * res_gat$power_draw_for_memory_kWh), fill = "orange") + -#' geom_col(data = data.frame(variable = "Storage", value = res_gat$carbon_intensity * res_gat$power_draw_per_gb), fill = "violet") + +#' geom_col(data = data.frame( +#' variable = "Total ", +#' value = res_gat$carbon_footprint_total_gCO2 +#' ), fill = "grey30") + +#' geom_col( +#' data = data.frame( +#' variable = "Cores", +#' value = res_gat$carbon_intensity * res_gat$power_draw_for_cores_kWh +#' ), +#' fill = "darkred" +#' ) + +#' geom_col( +#' data = data.frame( +#' variable = "Memory", +#' value = res_gat$carbon_intensity * res_gat$power_draw_for_memory_kWh +#' ), +#' fill = "orange" +#' ) + +#' geom_col( +#' data = data.frame( +#' variable = "Storage", +#' value = res_gat$carbon_intensity * res_gat$power_draw_per_gb +#' ), +#' fill = "violet" +#' ) + #' scale_x_continuous(trans = "log1p") + -#' geom_vline(xintercept = res_gat$carbon_footprint_gCO2, col = "grey30", lwd = 1.2) + +#' geom_vline( +#' xintercept = res_gat$carbon_footprint_total_gCO2, +#' col = "grey30", lwd = 1.2 +#' ) + #' geom_label(aes(label = round(prop_footprint, 1)), fill = "grey90") + #' xlab("g CO^2") + #' ylab("Modality") @@ -121,8 +151,8 @@ ga_targets <- function(names = NULL, df_meta <- tar_meta_raw } - runtime_targets <- sum(df_meta$seconds, na.rm = TRUE) / 3600 - power_draw_stocks <- sum(df_meta$bytes, na.rm = TRUE) / 10^9 + runtime_targets <- sum(as.numeric(df_meta$seconds), na.rm = TRUE) / 3600 + power_draw_stocks <- sum(as.numeric(df_meta$bytes), na.rm = TRUE) / 10^9 res <- ga_footprint( runtime_h = as.numeric(runtime_targets), diff --git a/R/greenAlgo-package.R b/R/greenAlgo-package.R new file mode 100644 index 0000000..8239760 --- /dev/null +++ b/R/greenAlgo-package.R @@ -0,0 +1,7 @@ +#' \code{greenAlgoR} package +#' +#' This package computes ecological footprint in R (based on [green-algorithms](https://calculator.green-algorithms.org/). +#' greenAlgoR also made it simple to compute ecological footprint of {[targets](https://github.com/ropensci/targets)} pipelines.. +#' @name greenAlgoR-package +#' @import ggplot2 targets benchmarkme +NULL diff --git a/R/utils.R b/R/utils.R index 2aed245..c6b437e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -13,21 +13,19 @@ #' #' @return a data.frame #' @export +#' @importFrom utils read.csv #' @author Adrien Taudière #' @examples -#' carbon_intensity_internal <- csv_from_url_ga("https://raw.githubusercontent.com/GreenAlgorithms/green-algorithms-tool/refs/heads/master/data/v2.2/CI_aggregated.csv") -# TDP_cpu_internal <- csv_from_url_ga("https://raw.githubusercontent.com/GreenAlgorithms/green-algorithms-tool/refs/heads/master/data/v2.2/TDP_cpu.csv") -# ref_value_internal <- csv_from_url_ga("https://raw.githubusercontent.com/GreenAlgorithms/green-algorithms-tool/refs/heads/master/data/v2.2/referenceValues.csv") -# -# # usethis::use_data(carbon_intensity_internal, TDP_cpu_internal, ref_value_internal, internal = TRUE,overwrite = TRUE) -csv_from_url_ga <- function(url, remove_first_line = TRUE){ +#' carbon_intensity_internal <- +#' csv_from_url_ga("https://raw.githubusercontent.com/GreenAlgorithms/green-algorithms-tool/refs/heads/master/data/v2.2/CI_aggregated.csv") +csv_from_url_ga <- function(url, remove_first_line = TRUE) { url <- RCurl::getURL(url) - df <- read.csv(text = url, header = FALSE) - if(remove_first_line) { - df <- df[-1,] + df <- read.csv(text = url, header = FALSE) + if (remove_first_line) { + df <- df[-1, ] } - res <- as.data.frame(df[-1,]) - colnames(res) <- as.character(df[1,]) + res <- as.data.frame(df[-1, ]) + colnames(res) <- as.character(df[1, ]) return(res) } @@ -51,7 +49,6 @@ csv_from_url_ga <- function(url, remove_first_line = TRUE){ #' @examples #' session_runtime() #' session_runtime(compute_mass_storage = FALSE) - session_runtime <- function(compute_mass_storage = TRUE) { cpu_times_all <- proc.time() cpu_times_users <- cpu_times_all[1] + cpu_times_all[2] @@ -64,26 +61,44 @@ session_runtime <- function(compute_mass_storage = TRUE) { "time_elapsed" = time_elapsed, "cpu_times" = cpu_times_users_system ) - if(compute_mass_storage) { - mass_storage_used <- sum(gc()[1:2,2]) - mass_storage_max <- sum(gc()[1:2,6]) + if (compute_mass_storage) { + mass_storage_used <- sum(gc()[1:2, 2]) + mass_storage_max <- sum(gc()[1:2, 6]) - res[["mass_storage_used"]] = mass_storage_used - res[["mass_storage_max"]] = mass_storage_max + res[["mass_storage_used"]] <- mass_storage_used + res[["mass_storage_max"]] <- mass_storage_max } return(res) } -round_conditionaly(vec=c(1000.27890, 10.87988, 1.769869, 0.99796, 0.000179)) -round_conditionaly <- function(vec, cond = cbind(c(1.e-5, 5), c(0.001,3), c(0.01,3), c(1,2), c(10,1), c(100,0))){ + +#' Round numeric vector conditionaly +#' +#' @param vec a numeric vector +#' @param cond : a matrix of 2 row an n column with the first row defining the +#' condition and the second row defining the number to round. cond is order +#' in decreasing order of the 1 row internally. Thus the order in cond rows +#' is not important +#' @return a numeric vector of the same length as vec +#' @export +#' @author Adrien Taudière +#' @examples +#' round_conditionaly(vec = c(1000.27890, 10.87988, 1.769869, 0.99796, 0.000179)) +#' round_conditionaly( +#' vec = c(1000.27890, 0.000179, 10e-11), +#' cond = cbind(c(10e-5, 5), c(10, 2)) +#' ) +round_conditionaly <- function(vec, cond = cbind(c(1.e-5, 5), c(0.001, 3), c(0.01, 3), c(1, 2), c(10, 1), c(100, 0))) { + cond <- cond[, order(cond[1, ], decreasing = TRUE)] + res <- vec - for(j in 1:ncol(cond)) { - cond_local <- vec > cond[1,j] - res[cond_local] <- round(vec[cond_local], cond[2,j]) + for (j in 1:ncol(cond)) { + cond_local <- vec > cond[1, j] + res[cond_local] <- round(vec[cond_local], cond[2, j]) } -return(res) + return(res) } diff --git a/README.Rmd b/README.Rmd index 7baf935..6b45612 100644 --- a/README.Rmd +++ b/README.Rmd @@ -1,7 +1,7 @@ --- output: github_document always_allow_html: yes -bibliography: bibliography.bib +bibliography: pkgdown/assets/bibliography.bib --- @@ -9,13 +9,23 @@ bibliography: bibliography.bib ![R](https://img.shields.io/badge/r-%23276DC3.svg?style=for-the-badge&logo=r&logoColor=white) [![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) -[![codecov](https://codecov.io/gh/adrientaudiere/MiscMetabar/graph/badge.svg?token=NXFRSIKYC0)](https://app.codecov.io/gh/adrientaudiere/MiscMetabar) -[![CodeFactor](https://www.codefactor.io/repository/github/adrientaudiere/miscmetabar/badge/master)](https://www.codefactor.io/repository/github/adrientaudiere/miscmetabar/overview/master) + + + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.path = "man/figures/README-", + out.width = "100%", + message = FALSE +) +``` # greenAlgoR -This package computes ecological footprint in R (based on [green-algorithms](https://calculator.green-algorithms.org/, @lannelongue_green_2021). The aim is to facilitate the programmatic used of green-algorithms using the function `ga_footprint()`. Moreover, greenAlgoR made it simple to compute ecological footprint of {[targets](https://github.com/ropensci/targets)} pipelines using function `ga_targets()`. +This package computes ecological footprint in R (based on [green-algorithms](https://calculator.green-algorithms.org/), @lannelongue_green_2021). The aim is to facilitate the programmatic used of green-algorithms using the function `ga_footprint()`. Moreover, greenAlgoR made it simple to compute ecological footprint of {[targets](https://github.com/ropensci/targets)} pipelines using function `ga_targets()`. It is a beta-version. Please submit issues if you found problems or if you want to contribute. @@ -30,8 +40,42 @@ if (!require("devtools", quietly = TRUE)) { devtools::install_github("adrientaudiere/greenAlgoR") ``` +## Scientific foundation + +### The algorithm from @lannelongue_green_2021 + +You can refer to [green-algorithms](https://calculator.green-algorithms.org/) and +@lannelongue_green_2021 to understand the algorithm. Here is the short description +from [green-algorithms](https://calculator.green-algorithms.org/) + +The carbon footprint is calculated by estimating the energy draw of the +algorithm and the carbon intensity of producing this energy at a +given location: + +$$carbon footprint = energy needed * carbon intensity$$ + +Where the energy needed is: + +$$runtime * (power draw for cores * usage + power draw for memory) * PUE * PSF$$ + +The power draw for the computing cores depends on the model and number of cores, +while the memory power draw only depends on the size of memory available. +The usage factor corrects for the real core usage (default is 1, i.e. full usage). +The PUE (Power Usage Effectiveness) measures how much extra energy is needed +to operate the data centre (cooling, lighting etc.). + +The PSF (Pragmatic Scaling Factor) is used to take into account multiple +identical runs (e.g. for testing or optimisation). The Carbon Intensity depends +on the location and the technologies used to produce electricity. + ## Basic use +```{r} +devtools::load_all() +library("greenAlgoR") +``` + + ### In classical workflow #### Using explicit input @@ -40,93 +84,78 @@ devtools::install_github("adrientaudiere/greenAlgoR") ```{r} # Your model must match exactly a name in TDP_cpu_internal$model fp_12h <- ga_footprint(runtime_h = 12, cpu_model = "Core i3-10300") -fp_12h$carbon_footprint_gCO2 +fp_12h$carbon_footprint_total_gCO2 fp_12h$energy_needed_kWh ``` ```{r} +#| fig.alt: > +#| Barplot of the total footprint of the analysis, the footprint of +#| memory and the footprint of cpu. Other footprint such as flight +#| Paris-London or one-hour of netflix streaming are also plot. ggplot(fp_12h$ref_value, aes( - y = reorder(variable, as.numeric(value)), + y = variable, x = as.numeric(value), fill = log10(prop_footprint) )) + geom_col() + geom_col( - data = data.frame(variable = "Total ", value = fp_12h$carbon_footprint_gCO2), + data = data.frame( + variable = "Total", + value = fp_12h$carbon_footprint_total_gCO2 + ), fill = "grey30" ) + geom_col( data = data.frame( variable = "Cores", - value = fp_12h$carbon_intensity * fp_12h$power_draw_for_cores_kWh + value = fp_12h$carbon_footprint_cores ), fill = "darkred" ) + geom_col( data = data.frame( variable = "Memory", - value = fp_12h$carbon_intensity * fp_12h$power_draw_for_memory_kWh + value = fp_12h$carbon_footprint_memory ), fill = "orange" ) + - scale_x_continuous(trans = "log1p") + + scale_x_continuous( + trans = "log1p", + breaks = c(0, 10^c(1:max(log1p(as.numeric(fp_12h$ref_value$value))))) + ) + geom_vline( - xintercept = fp_12h$carbon_footprint_gCO2, + xintercept = fp_12h$carbon_footprint_total_gCO2, col = "grey30", lwd = 1.2 ) + - geom_label(aes(label = round(prop_footprint, 2)), fill = "grey90") + + geom_label( + aes(label = round_conditionaly(prop_footprint)), + fill = "grey90", + position = position_stack(vjust = 1.1) + ) + + labs( + title = "Carbon footprint of the analysis", + subtitle = paste0("(", fp_12h$carbon_footprint_total_gCO2, " g CO2", ")"), + caption = "Please cite Lannelongue et al. 2021 (10.1002/advs.202100707)" + ) + xlab("Carbon footprint (g CO2) in log10") + - ylab("Modality") + ylab("Modality") + + theme(legend.position = "none") ``` #### Based on the R session ```{r} -fp_session <- ga_footprint(runtime_h = "session") -fp_session$carbon_footprint_gCO2 +fp_session <- ga_footprint(runtime_h = "session", add_storage_estimation = TRUE) +fp_session$carbon_footprint_total_gCO2 fp_session$energy_needed_kWh ``` -```{r} -ggplot(fp_session$ref_value, aes( - y = reorder(variable, as.numeric(value)), - x = as.numeric(value), - fill = log10(prop_footprint) -)) + - geom_col() + - geom_col( - data = data.frame(variable = "Total ", value = fp_session$carbon_footprint_gCO2), - fill = "grey30" - ) + - geom_col( - data = data.frame( - variable = "Cores", - value = fp_session$carbon_intensity * fp_session$power_draw_for_cores_kWh - ), - fill = "darkred" - ) + - geom_col( - data = data.frame( - variable = "Memory", - value = fp_session$carbon_intensity * fp_session$power_draw_for_memory_kWh - ), - fill = "orange" - ) + - scale_x_continuous(trans = "log1p") + - geom_vline( - xintercept = fp_session$carbon_footprint_gCO2, - col = "grey30", - lwd = 1.2 - ) + - geom_label(aes(label = round(prop_footprint, 2)), fill = "grey90") + - xlab("Carbon footprint (g CO2) in log10") + - ylab("Modality") -``` ### Based on a targets pipeline -```{r} +```{r, eval=FALSE} ga_targets() ``` diff --git a/README.md b/README.md new file mode 100644 index 0000000..742f47a --- /dev/null +++ b/README.md @@ -0,0 +1,181 @@ + + + + + +![R](https://img.shields.io/badge/r-%23276DC3.svg?style=for-the-badge&logo=r&logoColor=white) +[![License: GPL +v3](https://img.shields.io/badge/License-GPL%20v3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) + + + +# greenAlgoR + +This package computes ecological footprint in R (based on +[green-algorithms](https://calculator.green-algorithms.org/), +Lannelongue, Grealey, and Inouye (2021)). The aim is to facilitate the +programmatic used of green-algorithms using the function +`ga_footprint()`. Moreover, greenAlgoR made it simple to compute +ecological footprint of {[targets](https://github.com/ropensci/targets)} +pipelines using function `ga_targets()`. + +It is a beta-version. Please submit issues if you found problems or if +you want to contribute. + +## Installation + +greenAlgoR is not available on CRAN for the moment. You can install the +stable development version from GitHub with: + +``` r +if (!require("devtools", quietly = TRUE)) { + install.packages("devtools") +} +devtools::install_github("adrientaudiere/greenAlgoR") +``` + +## Scientific foundation + +### The algorithm from Lannelongue, Grealey, and Inouye (2021) + +You can refer to +[green-algorithms](https://calculator.green-algorithms.org/) and +Lannelongue, Grealey, and Inouye (2021) to understand the algorithm. +Here is the short description from +[green-algorithms](https://calculator.green-algorithms.org/) + +The carbon footprint is calculated by estimating the energy draw of the +algorithm and the carbon intensity of producing this energy at a given +location: + +``` math +carbon footprint = energy needed * carbon intensity +``` + +Where the energy needed is: + +``` math +runtime * (power draw for cores * usage + power draw for memory) * PUE * PSF +``` + +The power draw for the computing cores depends on the model and number +of cores, while the memory power draw only depends on the size of memory +available. The usage factor corrects for the real core usage (default is +1, i.e. full usage). The PUE (Power Usage Effectiveness) measures how +much extra energy is needed to operate the data centre (cooling, +lighting etc.). + +The PSF (Pragmatic Scaling Factor) is used to take into account multiple +identical runs (e.g. for testing or optimisation). The Carbon Intensity +depends on the location and the technologies used to produce +electricity. + +## Basic use + +``` r +devtools::load_all() +library("greenAlgoR") +``` + +### In classical workflow + +#### Using explicit input + +``` r +# Your model must match exactly a name in TDP_cpu_internal$model +fp_12h <- ga_footprint(runtime_h = 12, cpu_model = "Core i3-10300") +fp_12h$carbon_footprint_total_gCO2 +#> [1] 829.0232 +fp_12h$energy_needed_kWh +#> [1] 1.745312 +``` + +``` r +ggplot(fp_12h$ref_value, aes( + y = variable, + x = as.numeric(value), + fill = log10(prop_footprint) +)) + + geom_col() + + geom_col( + data = data.frame( + variable = "Total", + value = fp_12h$carbon_footprint_total_gCO2 + ), + fill = "grey30" + ) + + geom_col( + data = data.frame( + variable = "Cores", + value = fp_12h$carbon_footprint_cores + ), + fill = "darkred" + ) + + geom_col( + data = data.frame( + variable = "Memory", + value = fp_12h$carbon_footprint_memory + ), + fill = "orange" + ) + + scale_x_continuous( + trans = "log1p", + breaks = c(0, 10^c(1:max(log1p(as.numeric(fp_12h$ref_value$value))))) + ) + + geom_vline( + xintercept = fp_12h$carbon_footprint_total_gCO2, + col = "grey30", + lwd = 1.2 + ) + + geom_label( + aes(label = round_conditionaly(prop_footprint)), + fill = "grey90", + position = position_stack(vjust = 1.1) + ) + + labs( + title = "Carbon footprint of the analysis", + subtitle = paste0("(", fp_12h$carbon_footprint_total_gCO2, " g CO2", ")"), + caption = "Please cite Lannelongue et al. 2021 (10.1002/advs.202100707)" + ) + + xlab("Carbon footprint (g CO2) in log10") + + ylab("Modality") + + theme(legend.position = "none") +``` + + + +#### Based on the R session + +``` r +fp_session <- ga_footprint(runtime_h = "session", add_storage_estimation = TRUE) +fp_session$carbon_footprint_total_gCO2 +#> user.self +#> 0.01989738 +fp_session$energy_needed_kWh +#> user.self +#> 4.188922e-05 +``` + +### Based on a targets pipeline + +``` r +ga_targets() +``` + +## Roadmap + +- [ ] Automatically find cpu model using benchmarkme::get_cpu() +- [ ] Submit to CRAN? + +