From 12c6b7ceeabdb277af1d100acd42ecdb5229ad85 Mon Sep 17 00:00:00 2001
From: david-cortes <david.cortes.rivera@gmail.com>
Date: Mon, 26 Aug 2024 22:16:36 +0200
Subject: [PATCH] [R] Remove demos (#10750)

---
 R-package/demo/00Index                    |  14 ---
 R-package/demo/README.md                  |  19 ----
 R-package/demo/basic_walkthrough.R        | 113 ---------------------
 R-package/demo/boost_from_prediction.R    |  26 -----
 R-package/demo/create_sparse_matrix.R     | 117 ----------------------
 R-package/demo/cross_validation.R         |  51 ----------
 R-package/demo/custom_objective.R         |  65 ------------
 R-package/demo/early_stopping.R           |  40 --------
 R-package/demo/generalized_linear_model.R |  33 ------
 R-package/demo/gpu_accelerated.R          |  45 ---------
 R-package/demo/interaction_constraints.R  | 113 ---------------------
 R-package/demo/poisson_regression.R       |   6 --
 R-package/demo/predict_first_ntree.R      |  23 -----
 R-package/demo/predict_leaf_indices.R     |  54 ----------
 R-package/demo/runall.R                   |  13 ---
 R-package/demo/tweedie_regression.R       |  49 ---------
 tests/ci_build/test_r_package.py          |   3 -
 17 files changed, 784 deletions(-)
 delete mode 100644 R-package/demo/00Index
 delete mode 100644 R-package/demo/README.md
 delete mode 100644 R-package/demo/basic_walkthrough.R
 delete mode 100644 R-package/demo/boost_from_prediction.R
 delete mode 100644 R-package/demo/create_sparse_matrix.R
 delete mode 100644 R-package/demo/cross_validation.R
 delete mode 100644 R-package/demo/custom_objective.R
 delete mode 100644 R-package/demo/early_stopping.R
 delete mode 100644 R-package/demo/generalized_linear_model.R
 delete mode 100644 R-package/demo/gpu_accelerated.R
 delete mode 100644 R-package/demo/interaction_constraints.R
 delete mode 100644 R-package/demo/poisson_regression.R
 delete mode 100644 R-package/demo/predict_first_ntree.R
 delete mode 100644 R-package/demo/predict_leaf_indices.R
 delete mode 100644 R-package/demo/runall.R
 delete mode 100644 R-package/demo/tweedie_regression.R

diff --git a/R-package/demo/00Index b/R-package/demo/00Index
deleted file mode 100644
index fa09fa900486..000000000000
--- a/R-package/demo/00Index
+++ /dev/null
@@ -1,14 +0,0 @@
-basic_walkthrough               Basic feature walkthrough
-custom_objective                Customize loss function, and evaluation metric
-boost_from_prediction           Boosting from existing prediction
-predict_first_ntree             Predicting using first n trees
-generalized_linear_model        Generalized Linear Model
-cross_validation                Cross validation
-create_sparse_matrix            Create Sparse Matrix
-predict_leaf_indices            Predicting the corresponding leaves
-early_stopping                  Early Stop in training
-poisson_regression              Poisson regression on count data
-tweedie_regression              Tweedie regression
-gpu_accelerated                 GPU-accelerated tree building algorithms
-interaction_constraints         Interaction constraints among features
-
diff --git a/R-package/demo/README.md b/R-package/demo/README.md
deleted file mode 100644
index 99a492230d45..000000000000
--- a/R-package/demo/README.md
+++ /dev/null
@@ -1,19 +0,0 @@
-XGBoost R Feature Walkthrough
-====
-* [Basic walkthrough of wrappers](basic_walkthrough.R)
-* [Customize loss function, and evaluation metric](custom_objective.R)
-* [Boosting from existing prediction](boost_from_prediction.R)
-* [Predicting using first n trees](predict_first_ntree.R)
-* [Generalized Linear Model](generalized_linear_model.R)
-* [Cross validation](cross_validation.R)
-* [Create a sparse matrix from a dense one](create_sparse_matrix.R)
-* [Use GPU-accelerated tree building algorithms](gpu_accelerated.R)
-
-Benchmarks
-====
-* [Starter script for Kaggle Higgs Boson](../../demo/kaggle-higgs)
- 
-Notes
-====
-* Contribution of examples, benchmarks is more than welcomed!
-* If you like to share how you use xgboost to solve your problem, send a pull request :)
diff --git a/R-package/demo/basic_walkthrough.R b/R-package/demo/basic_walkthrough.R
deleted file mode 100644
index c65790109fc2..000000000000
--- a/R-package/demo/basic_walkthrough.R
+++ /dev/null
@@ -1,113 +0,0 @@
-require(xgboost)
-require(methods)
-
-# we load in the agaricus dataset
-# In this example, we are aiming to predict whether a mushroom is edible
-data(agaricus.train, package = 'xgboost')
-data(agaricus.test, package = 'xgboost')
-train <- agaricus.train
-test <- agaricus.test
-# the loaded data is stored in sparseMatrix, and label is a numeric vector in {0,1}
-class(train$label)
-class(train$data)
-
-#-------------Basic Training using XGBoost-----------------
-# this is the basic usage of xgboost you can put matrix in data field
-# note: we are putting in sparse matrix here, xgboost naturally handles sparse input
-# use sparse matrix when your feature is sparse(e.g. when you are using one-hot encoding vector)
-print("Training xgboost with sparseMatrix")
-bst <- xgboost(x = train$data, y = factor(train$label, c(0, 1)),
-               params = list(max_depth = 2, eta = 1),
-               nrounds = 2, nthread = 2)
-# alternatively, you can put in dense matrix, i.e. basic R-matrix
-print("Training xgboost with Matrix")
-bst <- xgboost(x = as.matrix(train$data), y = factor(train$label, c(0, 1)),
-               params = list(max_depth = 2, eta = 1),
-               nrounds = 2, nthread = 2)
-
-# you can also put in xgb.DMatrix object, which stores label, data and other meta datas needed for advanced features
-print("Training xgboost with xgb.DMatrix")
-dtrain <- xgb.DMatrix(data = train$data, label = train$label)
-params <- list(max_depth = 2, eta = 1, nthread = 2, objective = "binary:logistic")
-bst <- xgb.train(data = dtrain, params = params, nrounds = 2)
-
-# Verbose = 0,1,2
-print("Train xgboost with verbose 0, no message")
-bst <- xgb.train(data = dtrain, params = params, nrounds = 2, verbose = 0)
-print("Train xgboost with verbose 1, print evaluation metric")
-bst <- xgb.train(data = dtrain, params = params, nrounds = 2, verbose = 1)
-print("Train xgboost with verbose 2, also print information about tree")
-bst <- xgb.train(data = dtrain, params = params, nrounds = 2, verbose = 2)
-
-# you can also specify data as file path to a LIBSVM format input
-# since we do not have this file with us, the following line is just for illustration
-# bst <- xgboost(data = 'agaricus.train.svm', max_depth = 2, eta = 1, nrounds = 2,objective = "binary:logistic")
-
-#--------------------basic prediction using xgboost--------------
-# you can do prediction using the following line
-# you can put in Matrix, sparseMatrix, or xgb.DMatrix
-pred <- predict(bst, test$data)
-err <- mean(as.numeric(pred > 0.5) != test$label)
-print(paste("test-error=", err))
-
-#-------------------save and load models-------------------------
-# save model to binary local file
-xgb.save(bst, "xgboost.model")
-# load binary model to R
-# Function doesn't take 'nthreads', but can be set like this:
-RhpcBLASctl::omp_set_num_threads(1)
-bst2 <- xgb.load("xgboost.model")
-pred2 <- predict(bst2, test$data)
-# pred2 should be identical to pred
-print(paste("sum(abs(pred2-pred))=", sum(abs(pred2 - pred))))
-
-# save model to R's raw vector
-raw <- xgb.save.raw(bst)
-# load binary model to R
-bst3 <- xgb.load.raw(raw)
-pred3 <- predict(bst3, test$data)
-# pred3 should be identical to pred
-print(paste("sum(abs(pred3-pred))=", sum(abs(pred3 - pred))))
-
-#----------------Advanced features --------------
-# to use advanced features, we need to put data in xgb.DMatrix
-dtrain <- xgb.DMatrix(data = train$data, label = train$label)
-dtest <- xgb.DMatrix(data = test$data, label = test$label)
-#---------------Using an evaluation set----------------
-# 'evals' is a list of xgb.DMatrix, each of them is tagged with name
-evals <- list(train = dtrain, test = dtest)
-# to train with an evaluation set, use xgb.train, which contains more advanced features
-# 'evals' argument allows us to monitor the evaluation result on all data in the list
-print("Train xgboost using xgb.train with evaluation data")
-bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
-                 nthread = 2, objective = "binary:logistic")
-# we can change evaluation metrics, or use multiple evaluation metrics
-print("train xgboost using xgb.train with evaluation data, watch logloss and error")
-bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
-                 eval_metric = "error", eval_metric = "logloss",
-                 nthread = 2, objective = "binary:logistic")
-
-# xgb.DMatrix can also be saved using xgb.DMatrix.save
-xgb.DMatrix.save(dtrain, "dtrain.buffer")
-# to load it in, simply call xgb.DMatrix
-dtrain2 <- xgb.DMatrix("dtrain.buffer")
-bst <- xgb.train(data = dtrain2, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
-                 nthread = 2, objective = "binary:logistic")
-# information can be extracted from xgb.DMatrix using getinfo
-label <- getinfo(dtest, "label")
-pred <- predict(bst, dtest)
-err <- as.numeric(sum(as.integer(pred > 0.5) != label)) / length(label)
-print(paste("test-error=", err))
-
-# You can dump the tree you learned using xgb.dump into a text file
-dump_path <- file.path(tempdir(), 'dump.raw.txt')
-xgb.dump(bst, dump_path, with_stats = TRUE)
-
-# Finally, you can check which features are the most important.
-print("Most important features (look at column Gain):")
-imp_matrix <- xgb.importance(feature_names = colnames(train$data), model = bst)
-print(imp_matrix)
-
-# Feature importance bar plot by gain
-print("Feature importance Plot : ")
-print(xgb.plot.importance(importance_matrix = imp_matrix))
diff --git a/R-package/demo/boost_from_prediction.R b/R-package/demo/boost_from_prediction.R
deleted file mode 100644
index 75af70dba0d7..000000000000
--- a/R-package/demo/boost_from_prediction.R
+++ /dev/null
@@ -1,26 +0,0 @@
-require(xgboost)
-# load in the agaricus dataset
-data(agaricus.train, package = 'xgboost')
-data(agaricus.test, package = 'xgboost')
-dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
-dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
-
-evals <- list(eval = dtest, train = dtrain)
-###
-# advanced: start from a initial base prediction
-#
-print('start running example to start from a initial prediction')
-# train xgboost for 1 round
-param <- list(max_depth = 2, eta = 1, nthread = 2, objective = 'binary:logistic')
-bst <- xgb.train(param, dtrain, 1, evals)
-# Note: we need the margin value instead of transformed prediction in set_base_margin
-# do predict with output_margin=TRUE, will always give you margin values before logistic transformation
-ptrain <- predict(bst, dtrain, outputmargin = TRUE)
-ptest  <- predict(bst, dtest, outputmargin = TRUE)
-# set the base_margin property of dtrain and dtest
-# base margin is the base prediction we will boost from
-setinfo(dtrain, "base_margin", ptrain)
-setinfo(dtest, "base_margin", ptest)
-
-print('this is result of boost from initial prediction')
-bst <- xgb.train(params = param, data = dtrain, nrounds = 1, evals = evals)
diff --git a/R-package/demo/create_sparse_matrix.R b/R-package/demo/create_sparse_matrix.R
deleted file mode 100644
index 08a40608cdf8..000000000000
--- a/R-package/demo/create_sparse_matrix.R
+++ /dev/null
@@ -1,117 +0,0 @@
-require(xgboost)
-require(Matrix)
-require(data.table)
-if (!require(vcd)) {
-  install.packages('vcd') #Available in CRAN. Used for its dataset with categorical values.
-  require(vcd)
-}
-# According to its documentation, XGBoost works only on numbers.
-# Sometimes the dataset we have to work on have categorical data.
-# A categorical variable is one which have a fixed number of values.
-# By example, if for each observation a variable called "Colour" can have only
-# "red", "blue" or "green" as value, it is a categorical variable.
-#
-# In R, categorical variable is called Factor.
-# Type ?factor in console for more information.
-#
-# In this demo we will see how to transform a dense dataframe with categorical variables to a sparse matrix
-# before analyzing it in XGBoost.
-# The method we are going to see is usually called "one hot encoding".
-
-#load Arthritis dataset in memory.
-data(Arthritis)
-
-# create a copy of the dataset with data.table package
-# (data.table is 100% compliant with R dataframe but its syntax is a lot more consistent
-# and its performance are really good).
-df <- data.table(Arthritis, keep.rownames = FALSE)
-
-# Let's have a look to the data.table
-cat("Print the dataset\n")
-print(df)
-
-# 2 columns have factor type, one has ordinal type
-# (ordinal variable is a categorical variable with values which can be ordered, here: None > Some > Marked).
-cat("Structure of the dataset\n")
-str(df)
-
-# Let's add some new categorical features to see if it helps.
-# Of course these feature are highly correlated to the Age feature.
-# Usually it's not a good thing in ML, but Tree algorithms (including boosted trees) are able to select the best features,
-# even in case of highly correlated features.
-
-# For the first feature we create groups of age by rounding the real age.
-# Note that we transform it to factor (categorical data) so the algorithm treat them as independent values.
-df[, AgeDiscret := as.factor(round(Age / 10, 0))]
-
-# Here is an even stronger simplification of the real age with an arbitrary split at 30 years old.
-# I choose this value based on nothing.
-# We will see later if simplifying the information based on arbitrary values is a good strategy
-# (I am sure you already have an idea of how well it will work!).
-df[, AgeCat := as.factor(ifelse(Age > 30, "Old", "Young"))]
-
-# We remove ID as there is nothing to learn from this feature (it will just add some noise as the dataset is small).
-df[, ID := NULL]
-
-# List the different values for the column Treatment: Placebo, Treated.
-cat("Values of the categorical feature Treatment\n")
-print(levels(df[, Treatment]))
-
-# Next step, we will transform the categorical data to dummy variables.
-# This method is also called one hot encoding.
-# The purpose is to transform each value of each categorical feature in one binary feature.
-#
-# Let's take, the column Treatment will be replaced by two columns, Placebo, and Treated.
-# Each of them will be binary.
-# For example an observation which had the value Placebo in column Treatment before the transformation will have, after the transformation,
-# the value 1 in the new column Placebo and the value 0 in the new column  Treated.
-#
-# Formulae Improved~.-1 used below means transform all categorical features but column Improved to binary values.
-# Column Improved is excluded because it will be our output column, the one we want to predict.
-sparse_matrix <- sparse.model.matrix(Improved ~ . - 1, data = df)
-
-cat("Encoding of the sparse Matrix\n")
-print(sparse_matrix)
-
-# Create the output vector (not sparse)
-# 1. Set, for all rows, field in Y column to 0;
-# 2. set Y to 1 when Improved == Marked;
-# 3. Return Y column
-output_vector <- df[, Y := 0][Improved == "Marked", Y := 1][, Y]
-
-# Following is the same process as other demo
-cat("Learning...\n")
-bst <- xgb.train(data = xgb.DMatrix(sparse_matrix, label = output_vector), max_depth = 9,
-                 eta = 1, nthread = 2, nrounds = 10, objective = "binary:logistic")
-
-importance <- xgb.importance(feature_names = colnames(sparse_matrix), model = bst)
-print(importance)
-# According to the matrix below, the most important feature in this dataset to predict if the treatment will work is the Age.
-# The second most important feature is having received a placebo or not.
-# The sex is third.
-# Then we see our generated features (AgeDiscret). We can see that their contribution is very low (Gain column).
-
-# Does these result make sense?
-# Let's check some Chi2 between each of these features and the outcome.
-
-print(chisq.test(df$Age, df$Y))
-# Pearson correlation between Age and illness disappearing is 35
-
-print(chisq.test(df$AgeDiscret, df$Y))
-# Our first simplification of Age gives a Pearson correlation of 8.
-
-print(chisq.test(df$AgeCat, df$Y))
-# The perfectly random split I did between young and old at 30 years old have a low correlation of 2.
-# It's a result we may expect as may be in my mind > 30 years is being old (I am 32 and starting feeling old, this may explain that),
-# but for the illness we are studying, the age to be vulnerable is not the same.
-# Don't let your "gut" lower the quality of your model. In "data science", there is science :-)
-
-# As you can see, in general destroying information by simplifying it won't improve your model.
-# Chi2 just demonstrates that.
-# But in more complex cases, creating a new feature based on existing one which makes link with the outcome
-# more obvious may help the algorithm and improve the model.
-# The case studied here is not enough complex to show that. Check Kaggle forum for some challenging datasets.
-# However it's almost always worse when you add some arbitrary rules.
-# Moreover, you can notice that even if we have added some not useful new features highly correlated with
-# other features, the boosting tree algorithm have been able to choose the best one, which in this case is the Age.
-# Linear model may not be that strong in these scenario.
diff --git a/R-package/demo/cross_validation.R b/R-package/demo/cross_validation.R
deleted file mode 100644
index cf048c5ed600..000000000000
--- a/R-package/demo/cross_validation.R
+++ /dev/null
@@ -1,51 +0,0 @@
-require(xgboost)
-# load in the agaricus dataset
-data(agaricus.train, package = 'xgboost')
-data(agaricus.test, package = 'xgboost')
-dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
-dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
-
-nrounds <- 2
-param <- list(max_depth = 2, eta = 1, nthread = 2, objective = 'binary:logistic')
-
-cat('running cross validation\n')
-# do cross validation, this will print result out as
-# [iteration]  metric_name:mean_value+std_value
-# std_value is standard deviation of the metric
-xgb.cv(param, dtrain, nrounds, nfold = 5, metrics = 'error')
-
-cat('running cross validation, disable standard deviation display\n')
-# do cross validation, this will print result out as
-# [iteration]  metric_name:mean_value+std_value
-# std_value is standard deviation of the metric
-xgb.cv(param, dtrain, nrounds, nfold = 5,
-       metrics = 'error', showsd = FALSE)
-
-###
-# you can also do cross validation with customized loss function
-# See custom_objective.R
-##
-print('running cross validation, with customized loss function')
-
-logregobj <- function(preds, dtrain) {
-  labels <- getinfo(dtrain, "label")
-  preds <- 1 / (1 + exp(-preds))
-  grad <- preds - labels
-  hess <- preds * (1 - preds)
-  return(list(grad = grad, hess = hess))
-}
-evalerror <- function(preds, dtrain) {
-  labels <- getinfo(dtrain, "label")
-  err <- as.numeric(sum(labels != (preds > 0))) / length(labels)
-  return(list(metric = "error", value = err))
-}
-
-param <- list(max_depth = 2, eta = 1,
-              objective = logregobj, eval_metric = evalerror)
-# train with customized objective
-xgb.cv(params = param, data = dtrain, nrounds = nrounds, nfold = 5)
-
-# do cross validation with prediction values for each fold
-res <- xgb.cv(params = param, data = dtrain, nrounds = nrounds, nfold = 5, prediction = TRUE)
-res$evaluation_log
-length(res$pred)
diff --git a/R-package/demo/custom_objective.R b/R-package/demo/custom_objective.R
deleted file mode 100644
index 03d7b346471b..000000000000
--- a/R-package/demo/custom_objective.R
+++ /dev/null
@@ -1,65 +0,0 @@
-require(xgboost)
-# load in the agaricus dataset
-data(agaricus.train, package = 'xgboost')
-data(agaricus.test, package = 'xgboost')
-dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
-dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
-
-# note: for customized objective function, we leave objective as default
-# note: what we are getting is margin value in prediction
-# you must know what you are doing
-evals <- list(eval = dtest, train = dtrain)
-num_round <- 2
-
-# user define objective function, given prediction, return gradient and second order gradient
-# this is log likelihood loss
-logregobj <- function(preds, dtrain) {
-  labels <- getinfo(dtrain, "label")
-  preds <- 1 / (1 + exp(-preds))
-  grad <- preds - labels
-  hess <- preds * (1 - preds)
-  return(list(grad = grad, hess = hess))
-}
-
-# user defined evaluation function, return a pair metric_name, result
-# NOTE: when you do customized loss function, the default prediction value is margin
-# this may make builtin evaluation metric not function properly
-# for example, we are doing logistic loss, the prediction is score before logistic transformation
-# the builtin evaluation error assumes input is after logistic transformation
-# Take this in mind when you use the customization, and maybe you need write customized evaluation function
-evalerror <- function(preds, dtrain) {
-  labels <- getinfo(dtrain, "label")
-  err <- as.numeric(sum(labels != (preds > 0))) / length(labels)
-  return(list(metric = "error", value = err))
-}
-
-param <- list(max_depth = 2, eta = 1, nthread  =  2, verbosity = 0,
-              objective = logregobj, eval_metric = evalerror)
-print('start training with user customized objective')
-# training with customized objective, we can also do step by step training
-# simply look at xgboost.py's implementation of train
-bst <- xgb.train(param, dtrain, num_round, evals)
-
-#
-# there can be cases where you want additional information
-# being considered besides the property of DMatrix you can get by getinfo
-# you can set additional information as attributes if DMatrix
-
-# set label attribute of dtrain to be label, we use label as an example, it can be anything
-attr(dtrain, 'label') <- getinfo(dtrain, 'label')
-# this is new customized objective, where you can access things you set
-# same thing applies to customized evaluation function
-logregobjattr <- function(preds, dtrain) {
-  # now you can access the attribute in customized function
-  labels <- attr(dtrain, 'label')
-  preds <- 1 / (1 + exp(-preds))
-  grad <- preds - labels
-  hess <- preds * (1 - preds)
-  return(list(grad = grad, hess = hess))
-}
-param <- list(max_depth = 2, eta = 1, nthread  =  2, verbosity = 0,
-              objective = logregobjattr, eval_metric = evalerror)
-print('start training with user customized objective, with additional attributes in DMatrix')
-# training with customized objective, we can also do step by step training
-# simply look at xgboost.py's implementation of train
-bst <- xgb.train(param, dtrain, num_round, evals)
diff --git a/R-package/demo/early_stopping.R b/R-package/demo/early_stopping.R
deleted file mode 100644
index 057440882567..000000000000
--- a/R-package/demo/early_stopping.R
+++ /dev/null
@@ -1,40 +0,0 @@
-require(xgboost)
-# load in the agaricus dataset
-data(agaricus.train, package = 'xgboost')
-data(agaricus.test, package = 'xgboost')
-dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
-dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
-# note: for customized objective function, we leave objective as default
-# note: what we are getting is margin value in prediction
-# you must know what you are doing
-param <- list(max_depth = 2, eta = 1, nthread = 2, verbosity = 0)
-evals <- list(eval = dtest)
-num_round <- 20
-# user define objective function, given prediction, return gradient and second order gradient
-# this is log likelihood loss
-logregobj <- function(preds, dtrain) {
-  labels <- getinfo(dtrain, "label")
-  preds <- 1 / (1 + exp(-preds))
-  grad <- preds - labels
-  hess <- preds * (1 - preds)
-  return(list(grad = grad, hess = hess))
-}
-# user defined evaluation function, return a pair metric_name, result
-# NOTE: when you do customized loss function, the default prediction value is margin
-# this may make builtin evaluation metric not function properly
-# for example, we are doing logistic loss, the prediction is score before logistic transformation
-# the builtin evaluation error assumes input is after logistic transformation
-# Take this in mind when you use the customization, and maybe you need write customized evaluation function
-evalerror <- function(preds, dtrain) {
-  labels <- getinfo(dtrain, "label")
-  err <- as.numeric(sum(labels != (preds > 0))) / length(labels)
-  return(list(metric = "error", value = err))
-}
-print('start training with early Stopping setting')
-
-bst <- xgb.train(param, dtrain, num_round, evals,
-                 objective = logregobj, eval_metric = evalerror, maximize = FALSE,
-                 early_stopping_round = 3)
-bst <- xgb.cv(param, dtrain, num_round, nfold = 5,
-              objective = logregobj, eval_metric = evalerror,
-              maximize = FALSE, early_stopping_rounds = 3)
diff --git a/R-package/demo/generalized_linear_model.R b/R-package/demo/generalized_linear_model.R
deleted file mode 100644
index d29a6dc5be58..000000000000
--- a/R-package/demo/generalized_linear_model.R
+++ /dev/null
@@ -1,33 +0,0 @@
-require(xgboost)
-# load in the agaricus dataset
-data(agaricus.train, package = 'xgboost')
-data(agaricus.test, package = 'xgboost')
-dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
-dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
-##
-#  this script demonstrate how to fit generalized linear model in xgboost
-#  basically, we are using linear model, instead of tree for our boosters
-#  you can fit a linear regression, or logistic regression model
-##
-
-# change booster to gblinear, so that we are fitting a linear model
-# alpha is the L1 regularizer
-# lambda is the L2 regularizer
-# you can also set lambda_bias which is L2 regularizer on the bias term
-param <- list(objective = "binary:logistic", booster = "gblinear",
-              nthread = 2, alpha = 0.0001, lambda = 1)
-
-# normally, you do not need to set eta (step_size)
-# XGBoost uses a parallel coordinate descent algorithm (shotgun),
-# there could be affection on convergence with parallelization on certain cases
-# setting eta to be smaller value, e.g 0.5 can make the optimization more stable
-
-##
-# the rest of settings are the same
-##
-evals <- list(eval = dtest, train = dtrain)
-num_round <- 2
-bst <- xgb.train(param, dtrain, num_round, evals)
-ypred <- predict(bst, dtest)
-labels <- getinfo(dtest, 'label')
-cat('error of preds=', mean(as.numeric(ypred > 0.5) != labels), '\n')
diff --git a/R-package/demo/gpu_accelerated.R b/R-package/demo/gpu_accelerated.R
deleted file mode 100644
index 617a63e74542..000000000000
--- a/R-package/demo/gpu_accelerated.R
+++ /dev/null
@@ -1,45 +0,0 @@
-# An example of using GPU-accelerated tree building algorithms
-#
-# NOTE: it can only run if you have a CUDA-enable GPU and the package was
-#       specially compiled with GPU support.
-#
-# For the current functionality, see
-# https://xgboost.readthedocs.io/en/latest/gpu/index.html
-#
-
-library('xgboost')
-
-# Simulate N x p random matrix with some binomial response dependent on pp columns
-set.seed(111)
-N <- 1000000
-p <- 50
-pp <- 25
-X <- matrix(runif(N * p), ncol = p)
-betas <- 2 * runif(pp) - 1
-sel <- sort(sample(p, pp))
-m <- X[, sel] %*% betas - 1 + rnorm(N)
-y <- rbinom(N, 1, plogis(m))
-
-tr <- sample.int(N, N * 0.75)
-dtrain <- xgb.DMatrix(X[tr, ], label = y[tr])
-dtest <- xgb.DMatrix(X[-tr, ], label = y[-tr])
-evals <- list(train = dtrain, test = dtest)
-
-# An example of running 'gpu_hist' algorithm
-# which is
-# - similar to the 'hist'
-# - the fastest option for moderately large datasets
-# - current limitations: max_depth < 16, does not implement guided loss
-# You can use tree_method = 'gpu_hist' for another GPU accelerated algorithm,
-# which is slower, more memory-hungry, but does not use binning.
-param <- list(objective = 'reg:logistic', eval_metric = 'auc', subsample = 0.5, nthread = 4,
-              max_bin = 64, tree_method = 'gpu_hist')
-pt <- proc.time()
-bst_gpu <- xgb.train(param, dtrain, evals = evals, nrounds = 50)
-proc.time() - pt
-
-# Compare to the 'hist' algorithm:
-param$tree_method <- 'hist'
-pt <- proc.time()
-bst_hist <- xgb.train(param, dtrain, evals = evals, nrounds = 50)
-proc.time() - pt
diff --git a/R-package/demo/interaction_constraints.R b/R-package/demo/interaction_constraints.R
deleted file mode 100644
index 72287513eeeb..000000000000
--- a/R-package/demo/interaction_constraints.R
+++ /dev/null
@@ -1,113 +0,0 @@
-library(xgboost)
-library(data.table)
-
-set.seed(1024)
-
-# Function to obtain a list of interactions fitted in trees, requires input of maximum depth
-treeInteractions <- function(input_tree, input_max_depth) {
-  ID_merge <- i.id <- i.feature <- NULL  # Suppress warning "no visible binding for global variable"
-
-  trees <- data.table::copy(input_tree)  # copy tree input to prevent overwriting
-  if (input_max_depth < 2) return(list())  # no interactions if max depth < 2
-  if (nrow(input_tree) == 1) return(list())
-
-  # Attach parent nodes
-  for (i in 2:input_max_depth) {
-    if (i == 2) trees[, ID_merge := ID] else trees[, ID_merge := get(paste0('parent_', i - 2))]
-    parents_left <- trees[!is.na(Split), list(i.id = ID, i.feature = Feature, ID_merge = Yes)]
-    parents_right <- trees[!is.na(Split), list(i.id = ID, i.feature = Feature, ID_merge = No)]
-
-    data.table::setorderv(trees, 'ID_merge')
-    data.table::setorderv(parents_left, 'ID_merge')
-    data.table::setorderv(parents_right, 'ID_merge')
-
-    trees <- merge(trees, parents_left, by = 'ID_merge', all.x = TRUE)
-    trees[!is.na(i.id), c(paste0('parent_', i - 1), paste0('parent_feat_', i - 1))
-          := list(i.id, i.feature)]
-    trees[, c('i.id', 'i.feature') := NULL]
-
-    trees <- merge(trees, parents_right, by = 'ID_merge', all.x = TRUE)
-    trees[!is.na(i.id), c(paste0('parent_', i - 1), paste0('parent_feat_', i - 1))
-          := list(i.id, i.feature)]
-    trees[, c('i.id', 'i.feature') := NULL]
-  }
-
-  # Extract nodes with interactions
-  interaction_trees <- trees[!is.na(Split) & !is.na(parent_1),  # nolint: object_usage_linter
-                             c('Feature', paste0('parent_feat_', 1:(input_max_depth - 1))),
-                             with = FALSE]
-  interaction_trees_split <- split(interaction_trees, seq_len(nrow(interaction_trees)))
-  interaction_list <- lapply(interaction_trees_split, as.character)
-
-  # Remove NAs (no parent interaction)
-  interaction_list <- lapply(interaction_list, function(x) x[!is.na(x)])
-
-  # Remove non-interactions (same variable)
-  interaction_list <- lapply(interaction_list, unique)  # remove same variables
-  interaction_length <- lengths(interaction_list)
-  interaction_list <- interaction_list[interaction_length > 1]
-  interaction_list <- unique(lapply(interaction_list, sort))
-  return(interaction_list)
-}
-
-# Generate sample data
-x <- list()
-for (i in 1:10) {
-  x[[i]] <- i * rnorm(1000, 10)
-}
-x <- as.data.table(x)
-
-y <- -1 * x[, rowSums(.SD)] + x[['V1']] * x[['V2']] + x[['V3']] * x[['V4']] * x[['V5']]
-     + rnorm(1000, 0.001) + 3 * sin(x[['V7']])
-
-train <- as.matrix(x)
-
-# Interaction constraint list (column names form)
-interaction_list <- list(c('V1', 'V2'), c('V3', 'V4', 'V5'))
-
-# Convert interaction constraint list into feature index form
-cols2ids <- function(object, col_names) {
-  LUT <- seq_along(col_names) - 1
-  names(LUT) <- col_names
-  rapply(object, function(x) LUT[x], classes = "character", how = "replace")
-}
-interaction_list_fid <- cols2ids(interaction_list, colnames(train))
-
-# Fit model with interaction constraints
-bst <- xgb.train(data = xgb.DMatrix(train, label = y), max_depth = 4,
-                 eta = 0.1, nthread = 2, nrounds = 1000,
-                 interaction_constraints = interaction_list_fid)
-
-bst_tree <- xgb.model.dt.tree(colnames(train), bst)
-bst_interactions <- treeInteractions(bst_tree, 4)
-  # interactions constrained to combinations of V1*V2 and V3*V4*V5
-
-# Fit model without interaction constraints
-bst2 <- xgb.train(data = xgb.DMatrix(train, label = y), max_depth = 4,
-                  eta = 0.1, nthread = 2, nrounds = 1000)
-
-bst2_tree <- xgb.model.dt.tree(colnames(train), bst2)
-bst2_interactions <- treeInteractions(bst2_tree, 4)  # much more interactions
-
-# Fit model with both interaction and monotonicity constraints
-bst3 <- xgb.train(data = xgb.DMatrix(train, label = y), max_depth = 4,
-                  eta = 0.1, nthread = 2, nrounds = 1000,
-                  interaction_constraints = interaction_list_fid,
-                  monotone_constraints = c(-1, 0, 0, 0, 0, 0, 0, 0, 0, 0))
-
-bst3_tree <- xgb.model.dt.tree(colnames(train), bst3)
-bst3_interactions <- treeInteractions(bst3_tree, 4)
-  # interactions still constrained to combinations of V1*V2 and V3*V4*V5
-
-# Show monotonic constraints still apply by checking scores after incrementing V1
-x1 <- sort(unique(x[['V1']]))
-for (i in seq_along(x1)){
-  testdata <- copy(x[, - ('V1')])
-  testdata[['V1']] <- x1[i]
-  testdata <- testdata[, paste0('V', 1:10), with = FALSE]
-  pred <- predict(bst3, as.matrix(testdata))
-
-  # Should not print out anything due to monotonic constraints
-  if (i > 1) if (any(pred > prev_pred)) print(i)
-  prev_pred <- pred
-}
diff --git a/R-package/demo/poisson_regression.R b/R-package/demo/poisson_regression.R
deleted file mode 100644
index 685314b30e96..000000000000
--- a/R-package/demo/poisson_regression.R
+++ /dev/null
@@ -1,6 +0,0 @@
-data(mtcars)
-head(mtcars)
-bst <- xgb.train(data = xgb.DMatrix(as.matrix(mtcars[, -11]), label = mtcars[, 11]),
-                 objective = 'count:poisson', nrounds = 5)
-pred <- predict(bst, as.matrix(mtcars[, -11]))
-sqrt(mean((pred - mtcars[, 11]) ^ 2))
diff --git a/R-package/demo/predict_first_ntree.R b/R-package/demo/predict_first_ntree.R
deleted file mode 100644
index ba15ab39a74f..000000000000
--- a/R-package/demo/predict_first_ntree.R
+++ /dev/null
@@ -1,23 +0,0 @@
-require(xgboost)
-# load in the agaricus dataset
-data(agaricus.train, package = 'xgboost')
-data(agaricus.test, package = 'xgboost')
-dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
-dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
-
-param <- list(max_depth = 2, eta = 1, objective = 'binary:logistic')
-evals <- list(eval = dtest, train = dtrain)
-nrounds <- 2
-
-# training the model for two rounds
-bst <- xgb.train(param, dtrain, nrounds, nthread = 2, evals = evals)
-cat('start testing prediction from first n trees\n')
-labels <- getinfo(dtest, 'label')
-
-### predict using first 1 tree
-ypred1 <- predict(bst, dtest, iterationrange = c(1, 1))
-# by default, we predict using all the trees
-ypred2 <- predict(bst, dtest)
-
-cat('error of ypred1=', mean(as.numeric(ypred1 > 0.5) != labels), '\n')
-cat('error of ypred2=', mean(as.numeric(ypred2 > 0.5) != labels), '\n')
diff --git a/R-package/demo/predict_leaf_indices.R b/R-package/demo/predict_leaf_indices.R
deleted file mode 100644
index a57baf668896..000000000000
--- a/R-package/demo/predict_leaf_indices.R
+++ /dev/null
@@ -1,54 +0,0 @@
-require(xgboost)
-require(data.table)
-require(Matrix)
-
-set.seed(1982)
-
-# load in the agaricus dataset
-data(agaricus.train, package = 'xgboost')
-data(agaricus.test, package = 'xgboost')
-dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
-dtest <- xgb.DMatrix(data = agaricus.test$data, label = agaricus.test$label)
-
-param <- list(max_depth = 2, eta = 1, objective = 'binary:logistic')
-nrounds <- 4
-
-# training the model for two rounds
-bst <- xgb.train(params = param, data = dtrain, nrounds = nrounds, nthread = 2)
-
-# Model accuracy without new features
-accuracy.before <- (sum((predict(bst, agaricus.test$data) >= 0.5) == agaricus.test$label)
-                    / length(agaricus.test$label))
-
-# by default, we predict using all the trees
-pred_with_leaf <- predict(bst, dtest, predleaf = TRUE)
-head(pred_with_leaf)
-
-create.new.tree.features <- function(model, original.features) {
-  pred_with_leaf <- predict(model, original.features, predleaf = TRUE)
-  cols <- list()
-  for (i in 1:xgb.get.num.boosted.rounds(model)) {
-    # max is not the real max but it s not important for the purpose of adding features
-    leaf.id <- sort(unique(pred_with_leaf[, i]))
-    cols[[i]] <- factor(x = pred_with_leaf[, i], level = leaf.id)
-  }
-  cbind(original.features, sparse.model.matrix(~ . - 1, as.data.frame(cols)))
-}
-
-# Convert previous features to one hot encoding
-new.features.train <- create.new.tree.features(bst, agaricus.train$data)
-new.features.test <- create.new.tree.features(bst, agaricus.test$data)
-colnames(new.features.test) <- colnames(new.features.train)
-
-# learning with new features
-new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
-new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
-bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
-
-# Model accuracy with new features
-accuracy.after <- (sum((predict(bst, new.dtest) >= 0.5) == agaricus.test$label)
-                   / length(agaricus.test$label))
-
-# Here the accuracy was already good and is now perfect.
-cat(paste("The accuracy was", accuracy.before, "before adding leaf features and it is now",
-          accuracy.after, "!\n"))
diff --git a/R-package/demo/runall.R b/R-package/demo/runall.R
deleted file mode 100644
index ab1822a5b8ad..000000000000
--- a/R-package/demo/runall.R
+++ /dev/null
@@ -1,13 +0,0 @@
-# running all scripts in demo folder, removed during packaging.
-demo(basic_walkthrough, package = 'xgboost')
-demo(custom_objective, package = 'xgboost')
-demo(boost_from_prediction, package = 'xgboost')
-demo(predict_first_ntree, package = 'xgboost')
-demo(generalized_linear_model, package = 'xgboost')
-demo(cross_validation, package = 'xgboost')
-demo(create_sparse_matrix, package = 'xgboost')
-demo(predict_leaf_indices, package = 'xgboost')
-demo(early_stopping, package = 'xgboost')
-demo(poisson_regression, package = 'xgboost')
-demo(tweedie_regression, package = 'xgboost')
-#demo(gpu_accelerated, package = 'xgboost')  # can only run when built with GPU support
diff --git a/R-package/demo/tweedie_regression.R b/R-package/demo/tweedie_regression.R
deleted file mode 100644
index b07858e761fa..000000000000
--- a/R-package/demo/tweedie_regression.R
+++ /dev/null
@@ -1,49 +0,0 @@
-library(xgboost)
-library(data.table)
-library(cplm)
-
-data(AutoClaim)
-
-# auto insurance dataset analyzed by Yip and Yau (2005)
-dt <- data.table(AutoClaim)
-
-# exclude these columns from the model matrix
-exclude <- c('POLICYNO', 'PLCYDATE', 'CLM_FREQ5', 'CLM_AMT5', 'CLM_FLAG', 'IN_YY')
-
-# retains the missing values
-# NOTE: this dataset is comes ready out of the box
-options(na.action = 'na.pass')
-x <- sparse.model.matrix(~ . - 1, data = dt[, -exclude, with = FALSE])
-options(na.action = 'na.omit')
-
-# response
-y <- dt[, CLM_AMT5]
-
-d_train <- xgb.DMatrix(data = x, label = y, missing = NA)
-
-# the tweedie_variance_power parameter determines the shape of
-# distribution
-# - closer to 1 is more poisson like and the mass
-#   is more concentrated near zero
-# - closer to 2 is more gamma like and the mass spreads to the
-#   the right with less concentration near zero
-
-params <- list(
-  objective = 'reg:tweedie',
-  eval_metric = 'rmse',
-  tweedie_variance_power = 1.4,
-  max_depth = 6,
-  eta = 1)
-
-bst <- xgb.train(
-  data = d_train,
-  params = params,
-  maximize = FALSE,
-  evals = list(train = d_train),
-  nrounds = 20)
-
-var_imp <- xgb.importance(attr(x, 'Dimnames')[[2]], model = bst)
-
-preds <- predict(bst, d_train)
-
-rmse <- sqrt(sum(mean((y - preds) ^ 2)))
diff --git a/tests/ci_build/test_r_package.py b/tests/ci_build/test_r_package.py
index add31b97313c..735140a8099b 100644
--- a/tests/ci_build/test_r_package.py
+++ b/tests/ci_build/test_r_package.py
@@ -45,7 +45,6 @@ def pkgroot(path: str) -> None:
         )
 
     shutil.copytree("R-package", dest)
-    os.remove(dest / "demo" / "runall.R")
     # core
     shutil.copytree("src", dest / "src" / "src")
     shutil.copytree("include", dest / "src" / "include")
@@ -221,7 +220,6 @@ def test_with_autotools() -> None:
     subprocess.check_call(
         ["R.exe", "-q", "-e", "library(testthat); setwd('tests'); source('testthat.R')"]
     )
-    subprocess.check_call(["R.exe", "-q", "-e", "demo(runall, package = 'xgboost')"])
 
 
 @record_time
@@ -296,7 +294,6 @@ def test_with_cmake(args: argparse.Namespace) -> None:
                 "library(testthat); setwd('tests'); source('testthat.R')",
             ]
         )
-        subprocess.check_call([R, "-q", "-e", "demo(runall, package = 'xgboost')"])
 
 
 @record_time