From c79f5977342a4e13aab579387ba4c8753b21cbb2 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 18 Jan 2024 22:22:15 -0500 Subject: [PATCH 1/5] Check if offline in nixlibs.R --- r/tools/nixlibs.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index dfe379ebe20df..0410227d7a6b9 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -926,8 +926,7 @@ if (!is_release && !test_mode) { options(.arrow.cleanup = character()) on.exit(unlink(getOption(".arrow.cleanup"), recursive = TRUE), add = TRUE) -# enable full featured builds for macOS in case of CRAN source builds. -if (not_cran || on_macos) { +if (not_cran) { # Set more eager defaults if (env_is("LIBARROW_BINARY", "")) { Sys.setenv(LIBARROW_BINARY = "true") @@ -944,6 +943,15 @@ build_ok <- !env_is("LIBARROW_BUILD", "false") # Check if we're authorized to download download_ok <- !test_mode && !env_is("TEST_OFFLINE_BUILD", "true") +# If not forbidden from downloading, check if we are offline and turn off downloading. +# The default libarrow source build will download its source dependencies and fail +# if they can't be retrieved. +# But, don't do this if the user has requested a binary or a non-minimal build: +# we should error rather than silently succeeding with a minimal build. +if (Sys.getenv("LIBARROW_BINARY") %in% c("false", "") && !env_is("LIBARROW_MINIMAL", "false")) { + download_ok <- download_ok && try_download("https://apache.jfrog.io/artifactory/arrow/r/") +} + download_libarrow_ok <- download_ok && !env_is("LIBARROW_DOWNLOAD", "false") # This "tools/thirdparty_dependencies" path, within the tar file, might exist if From 704dc89363332bca033e8f9c584a33d55b61b470 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Thu, 18 Jan 2024 22:23:07 -0500 Subject: [PATCH 2/5] Rename TEST_OFFLINE_BUILD to ARROW_OFFLINE_BUILD --- dev/tasks/r/github.linux.offline.build.yml | 2 +- dev/tasks/tasks.yml | 2 +- r/NEWS.md | 2 +- r/configure | 2 +- r/tools/nixlibs.R | 2 +- r/vignettes/developers/setup.Rmd | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dev/tasks/r/github.linux.offline.build.yml b/dev/tasks/r/github.linux.offline.build.yml index b116accda891c..7a747ac480084 100644 --- a/dev/tasks/r/github.linux.offline.build.yml +++ b/dev/tasks/r/github.linux.offline.build.yml @@ -79,7 +79,7 @@ jobs: shell: Rscript {0} - name: Install env: - TEST_OFFLINE_BUILD: true + ARROW_OFFLINE_BUILD: true LIBARROW_MINIMAL: false {{ macros.github_set_sccache_envvars()|indent(8)}} run: | diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 04faef427e281..5f294a3d4f641 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1279,7 +1279,7 @@ tasks: r_org: library r_image: r-base r_tag: latest - flags: '-e TEST_OFFLINE_BUILD=true' + flags: '-e ARROW_OFFLINE_BUILD=true' test-r-dev-duckdb: ci: github diff --git a/r/NEWS.md b/r/NEWS.md index 9badf4700a36e..22eb5b34ceb0f 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -32,7 +32,7 @@ ## Minor improvements and fixes -* Don't download cmake when TEST_OFFLINE_BUILD=true and update `SystemRequirements` (#39602). +* Don't download cmake when ARROW_OFFLINE_BUILD=true and update `SystemRequirements` (#39602). * Fallback to source build gracefully if binary download fails (#39587). * An error is now thrown instead of warning and pulling the data into R when any of `sub`, `gsub`, `stringr::str_replace`, `stringr::str_replace_all` are diff --git a/r/configure b/r/configure index 029fc004dfc4c..0882ee6719c4b 100755 --- a/r/configure +++ b/r/configure @@ -73,7 +73,7 @@ FORCE_BUNDLED_BUILD=`echo $FORCE_BUNDLED_BUILD | tr '[:upper:]' '[:lower:]'` ARROW_USE_PKG_CONFIG=`echo $ARROW_USE_PKG_CONFIG | tr '[:upper:]' '[:lower:]'` # Just used in testing: whether or not it is ok to download dependencies (in the # bundled build) -TEST_OFFLINE_BUILD=`echo $TEST_OFFLINE_BUILD | tr '[:upper:]' '[:lower:]'` +ARROW_OFFLINE_BUILD=`echo $ARROW_OFFLINE_BUILD | tr '[:upper:]' '[:lower:]'` VERSION=`grep '^Version' DESCRIPTION | sed s/Version:\ //` UNAME=`uname -s` diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 0410227d7a6b9..3d21201fea035 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -942,7 +942,7 @@ if (not_cran) { build_ok <- !env_is("LIBARROW_BUILD", "false") # Check if we're authorized to download -download_ok <- !test_mode && !env_is("TEST_OFFLINE_BUILD", "true") +download_ok <- !test_mode && !env_is("ARROW_OFFLINE_BUILD", "true") # If not forbidden from downloading, check if we are offline and turn off downloading. # The default libarrow source build will download its source dependencies and fail # if they can't be retrieved. diff --git a/r/vignettes/developers/setup.Rmd b/r/vignettes/developers/setup.Rmd index 119bc78419410..4c1eab1e6972f 100644 --- a/r/vignettes/developers/setup.Rmd +++ b/r/vignettes/developers/setup.Rmd @@ -280,7 +280,7 @@ withr::with_makevars(list(CPPFLAGS = "", LDFLAGS = ""), remotes::install_github( * See the user-facing [article on installation](../install.html) for a large number of environment variables that determine how the build works and what features get built. -* `TEST_OFFLINE_BUILD`: When set to `true`, the build script will not download +* `ARROW_OFFLINE_BUILD`: When set to `true`, the build script will not download prebuilt the C++ library binary or, if needed, `cmake`. It will turn off any features that require a download, unless they're available in `ARROW_THIRDPARTY_DEPENDENCY_DIR` or the `tools/thirdparty_download/` subfolder. From 2773c31e8385fcc999bdc6eaa77903c6ef7582c3 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 19 Jan 2024 08:38:57 -0500 Subject: [PATCH 3/5] Add log messages and don't set not_cran just because we're on dev version --- r/tools/nixlibs.R | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 3d21201fea035..a6eac5b954c11 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -912,8 +912,6 @@ if (is_release) { VERSION <- VERSION[1, 1:3] arrow_repo <- paste0(getOption("arrow.repo", sprintf("https://apache.jfrog.io/artifactory/arrow/r/%s", VERSION)), "/libarrow/") } else { - # Don't override explictily set NOT_CRAN env var, as it is used in CI. - not_cran <- !env_is("NOT_CRAN", "false") arrow_repo <- paste0(getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), "/libarrow/") } @@ -943,13 +941,19 @@ build_ok <- !env_is("LIBARROW_BUILD", "false") # Check if we're authorized to download download_ok <- !test_mode && !env_is("ARROW_OFFLINE_BUILD", "true") +if (!download_ok) { + lg("Dependency downloading disabled. Unset ARROW_OFFLINE_BUILD to enable", .indent = "***") +} # If not forbidden from downloading, check if we are offline and turn off downloading. # The default libarrow source build will download its source dependencies and fail # if they can't be retrieved. # But, don't do this if the user has requested a binary or a non-minimal build: # we should error rather than silently succeeding with a minimal build. -if (Sys.getenv("LIBARROW_BINARY") %in% c("false", "") && !env_is("LIBARROW_MINIMAL", "false")) { - download_ok <- download_ok && try_download("https://apache.jfrog.io/artifactory/arrow/r/") +if (download_ok && Sys.getenv("LIBARROW_BINARY") %in% c("false", "") && !env_is("LIBARROW_MINIMAL", "false")) { + download_ok <- try_download("https://apache.jfrog.io/artifactory/arrow/r/", tempfile()) + if (!download_ok) { + lg("Network connection not available", .indent = "***") + } } download_libarrow_ok <- download_ok && !env_is("LIBARROW_DOWNLOAD", "false") From ab0598ed89dbb011ece758176d9085c5791ffba7 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 19 Jan 2024 08:40:09 -0500 Subject: [PATCH 4/5] run styler --- r/tools/nixlibs.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index a6eac5b954c11..c23701c7669d4 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -120,11 +120,11 @@ validate_checksum <- function(binary_url, libfile, hush = quietly) { # The warnings from system2 if it fails pop up later in the log and thus are # more confusing than they are helpful (so we suppress them) checksum_ok <- suppressWarnings(system2( - "shasum", - args = c("--status", "-a", "512", "-c", checksum_file), - stdout = ifelse(quietly, FALSE, ""), - stderr = ifelse(quietly, FALSE, "") - )) == 0 + "shasum", + args = c("--status", "-a", "512", "-c", checksum_file), + stdout = ifelse(quietly, FALSE, ""), + stderr = ifelse(quietly, FALSE, "") + )) == 0 if (!checksum_ok) { checksum_ok <- suppressWarnings(system2( @@ -565,8 +565,8 @@ build_libarrow <- function(src_dir, dst_dir) { env_var_list <- c(env_var_list, ARROW_DEPENDENCY_SOURCE = "BUNDLED") } - # On macOS, if not otherwise set, let's override Boost_SOURCE to be bundled - # Necessary due to #39590 for CRAN + # On macOS, if not otherwise set, let's override Boost_SOURCE to be bundled + # Necessary due to #39590 for CRAN if (on_macos) { # Using lowercase (e.g. Boost_SOURCE) to match the cmake args we use already. deps_to_bundle <- c("Boost", "lz4") From 01eb6ee10fc65c8ebdd635d12921e22392bf24d3 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 19 Jan 2024 08:54:07 -0500 Subject: [PATCH 5/5] Reorganize so we don't check for a nightly if offline --- r/tools/nixlibs.R | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index c23701c7669d4..17c6ab0a8078b 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -906,24 +906,12 @@ on_windows <- tolower(Sys.info()[["sysname"]]) == "windows" # For local debugging, set ARROW_R_DEV=TRUE to make this script print more quietly <- !env_is("ARROW_R_DEV", "true") -not_cran <- env_is("NOT_CRAN", "true") - -if (is_release) { - VERSION <- VERSION[1, 1:3] - arrow_repo <- paste0(getOption("arrow.repo", sprintf("https://apache.jfrog.io/artifactory/arrow/r/%s", VERSION)), "/libarrow/") -} else { - arrow_repo <- paste0(getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), "/libarrow/") -} - -if (!is_release && !test_mode) { - VERSION <- find_latest_nightly(VERSION) -} - # To collect dirs to rm on exit, use cleanup() to add dirs # we reset it to avoid errors on reruns in the same session. options(.arrow.cleanup = character()) on.exit(unlink(getOption(".arrow.cleanup"), recursive = TRUE), add = TRUE) +not_cran <- env_is("NOT_CRAN", "true") if (not_cran) { # Set more eager defaults if (env_is("LIBARROW_BINARY", "")) { @@ -958,6 +946,19 @@ if (download_ok && Sys.getenv("LIBARROW_BINARY") %in% c("false", "") && !env_is( download_libarrow_ok <- download_ok && !env_is("LIBARROW_DOWNLOAD", "false") +# Set binary repos +if (is_release) { + VERSION <- VERSION[1, 1:3] + arrow_repo <- paste0(getOption("arrow.repo", sprintf("https://apache.jfrog.io/artifactory/arrow/r/%s", VERSION)), "/libarrow/") +} else { + arrow_repo <- paste0(getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), "/libarrow/") +} + +# If we're on a dev version, look for the most recent libarrow binary version +if (download_libarrow_ok && !is_release && !test_mode) { + VERSION <- find_latest_nightly(VERSION) +} + # This "tools/thirdparty_dependencies" path, within the tar file, might exist if # create_package_with_all_dependencies() was run, or if someone has created it # manually before running make build.