diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
index 92b53a660e88..7c01d50c6811 100644
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -63,7 +63,8 @@ Imports:
     Matrix (>= 1.1-0),
     methods,
     data.table (>= 1.9.6),
-    jsonlite (>= 1.0),
+    jsonlite (>= 1.0)
+Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3
 Encoding: UTF-8
 SystemRequirements: GNU make, C++17
diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R
index aa5e65e40ee0..4e980641a17d 100644
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -79,36 +79,45 @@ xgb.get.handle <- function(object) {
   handle
 }
 
-#' Restore missing parts of an incomplete xgb.Booster object.
+#' Restore missing parts of an incomplete xgb.Booster object
 #'
-#' It attempts to complete an \code{xgb.Booster} object by restoring either its missing
-#' raw model memory dump (when it has no \code{raw} data but its \code{xgb.Booster.handle} is valid)
-#' or its missing internal handle (when its \code{xgb.Booster.handle} is not valid
+#' It attempts to complete an `xgb.Booster` object by restoring either its missing
+#' raw model memory dump (when it has no `raw` data but its `xgb.Booster.handle` is valid)
+#' or its missing internal handle (when its `xgb.Booster.handle` is not valid
 #' but it has a raw Booster memory dump).
 #'
-#' @param object object of class \code{xgb.Booster}
-#' @param saveraw a flag indicating whether to append \code{raw} Booster memory dump data
+#' @param object Object of class `xgb.Booster`.
+#' @param saveraw A flag indicating whether to append `raw` Booster memory dump data
 #'                when it doesn't already exist.
 #'
 #' @details
 #'
 #' While this method is primarily for internal use, it might be useful in some practical situations.
 #'
-#' E.g., when an \code{xgb.Booster} model is saved as an R object and then is loaded as an R object,
+#' E.g., when an `xgb.Booster` model is saved as an R object and then is loaded as an R object,
 #' its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods
 #' should still work for such a model object since those methods would be using
-#' \code{xgb.Booster.complete} internally. However, one might find it to be more efficient to call the
-#' \code{xgb.Booster.complete} function explicitly once after loading a model as an R-object.
+#' `xgb.Booster.complete()` internally. However, one might find it to be more efficient to call the
+#' `xgb.Booster.complete()` function explicitly once after loading a model as an R-object.
 #' That would prevent further repeated implicit reconstruction of an internal booster model.
 #'
 #' @return
-#' An object of \code{xgb.Booster} class.
+#' An object of `xgb.Booster` class.
 #'
 #' @examples
 #'
-#' data(agaricus.train, package='xgboost')
-#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
-#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+#' data(agaricus.train, package = "xgboost")
+#'
+#' bst <- xgboost(
+#'   data = agaricus.train$data,
+#'   label = agaricus.train$label,
+#'   max_depth = 2,
+#'   eta = 1,
+#'   nthread = 2,
+#'   nrounds = 2,
+#'   objective = "binary:logistic"
+#' )
+#'
 #' saveRDS(bst, "xgb.model.rds")
 #'
 #' # Warning: The resulting RDS file is only compatible with the current XGBoost version.
@@ -161,112 +170,100 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
   return(object)
 }
 
-#' Predict method for eXtreme Gradient Boosting model
+#' Predict method for XGBoost model
 #'
 #' Predicted values based on either xgboost model or model handle object.
 #'
-#' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}
-#' @param newdata takes \code{matrix}, \code{dgCMatrix}, \code{dgRMatrix}, \code{dsparseVector},
-#'        local data file or \code{xgb.DMatrix}.
-#'
-#'        For single-row predictions on sparse data, it's recommended to use CSR format. If passing
-#'        a sparse vector, it will take it as a row vector.
-#' @param missing Missing is only used when input is dense matrix. Pick a float value that represents
-#'        missing values in data (e.g., sometimes 0 or some other extreme value is used).
-#' @param outputmargin whether the prediction should be returned in the for of original untransformed
-#'        sum of predictions from boosting iterations' results. E.g., setting \code{outputmargin=TRUE} for
-#'        logistic regression would result in predictions for log-odds instead of probabilities.
-#' @param ntreelimit Deprecated, use \code{iterationrange} instead.
-#' @param predleaf whether predict leaf index.
-#' @param predcontrib whether to return feature contributions to individual predictions (see Details).
-#' @param approxcontrib whether to use a fast approximation for feature contributions (see Details).
-#' @param predinteraction whether to return contributions of feature interactions to individual predictions (see Details).
-#' @param reshape whether to reshape the vector of predictions to a matrix form when there are several
-#'        prediction outputs per case. This option has no effect when either of predleaf, predcontrib,
-#'        or predinteraction flags is TRUE.
-#' @param training whether is the prediction result used for training.  For dart booster,
+#' @param object Object of class `xgb.Booster` or `xgb.Booster.handle`.
+#' @param newdata Takes `matrix`, `dgCMatrix`, `dgRMatrix`, `dsparseVector`,
+#'        local data file, or `xgb.DMatrix`.
+#'        For single-row predictions on sparse data, it is recommended to use the CSR format.
+#'        If passing a sparse vector, it will take it as a row vector.
+#' @param missing Only used when input is a dense matrix. Pick a float value that represents
+#'        missing values in data (e.g., 0 or some other extreme value).
+#' @param outputmargin Whether the prediction should be returned in the form of original untransformed
+#'        sum of predictions from boosting iterations' results. E.g., setting `outputmargin=TRUE` for
+#'        logistic regression would return log-odds instead of probabilities.
+#' @param ntreelimit Deprecated, use `iterationrange` instead.
+#' @param predleaf Whether to predict pre-tree leaf indices.
+#' @param predcontrib Whether to return feature contributions to individual predictions (see Details).
+#' @param approxcontrib Whether to use a fast approximation for feature contributions (see Details).
+#' @param predinteraction Whether to return contributions of feature interactions to individual predictions (see Details).
+#' @param reshape Whether to reshape the vector of predictions to matrix form when there are several
+#'        prediction outputs per case. No effect if `predleaf`, `predcontrib`,
+#'        or `predinteraction` is `TRUE`.
+#' @param training Whether the predictions are used for training. For dart booster,
 #'        training predicting will perform dropout.
-#' @param iterationrange Specifies which layer of trees are used in prediction.  For
-#'        example, if a random forest is trained with 100 rounds.  Specifying
-#'        `iterationrange=(1, 21)`, then only the forests built during [1, 21) (half open set)
-#'        rounds are used in this prediction.  It's 1-based index just like R vector.  When set
-#'        to \code{c(1, 1)} XGBoost will use all trees.
-#' @param strict_shape  Default is \code{FALSE}. When it's set to \code{TRUE}, output
-#'        type and shape of prediction are invariant to model type.
-#'
+#' @param iterationrange Specifies which trees are used in prediction. For
+#'        example, take a random forest with 100 rounds.
+#'        With `iterationrange=c(1, 21)`, only the trees built during `[1, 21)` (half open set)
+#'        rounds are used in this prediction. The index is 1-based just like an R vector. When set
+#'        to `c(1, 1)`, XGBoost will use all trees.
+#' @param strict_shape Default is `FALSE`. When set to `TRUE`, the output
+#'        type and shape of predictions are invariant to the model type.
 #' @param ... Not used.
 #'
 #' @details
 #'
-#' Note that \code{iterationrange} would currently do nothing for predictions from gblinear,
-#' since gblinear doesn't keep its boosting history.
+#' Note that `iterationrange` would currently do nothing for predictions from "gblinear",
+#' since "gblinear" doesn't keep its boosting history.
 #'
-#' One possible practical applications of the \code{predleaf} option is to use the model
+#' One possible practical applications of the `predleaf` option is to use the model
 #' as a generator of new features which capture non-linearity and interactions,
-#' e.g., as implemented in \code{\link{xgb.create.features}}.
+#' e.g., as implemented in [xgb.create.features()].
 #'
-#' Setting \code{predcontrib = TRUE} allows to calculate contributions of each feature to
+#' Setting `predcontrib = TRUE` allows to calculate contributions of each feature to
 #' individual predictions. For "gblinear" booster, feature contributions are simply linear terms
 #' (feature_beta * feature_value). For "gbtree" booster, feature contributions are SHAP
 #' values (Lundberg 2017) that sum to the difference between the expected output
 #' of the model and the current prediction (where the hessian weights are used to compute the expectations).
-#' Setting \code{approxcontrib = TRUE} approximates these values following the idea explained
+#' Setting `approxcontrib = TRUE` approximates these values following the idea explained
 #' in \url{http://blog.datadive.net/interpreting-random-forests/}.
 #'
-#' With \code{predinteraction = TRUE}, SHAP values of contributions of interaction of each pair of features
+#' With `predinteraction = TRUE`, SHAP values of contributions of interaction of each pair of features
 #' are computed. Note that this operation might be rather expensive in terms of compute and memory.
 #' Since it quadratically depends on the number of features, it is recommended to perform selection
 #' of the most important features first. See below about the format of the returned results.
 #'
-#' The \code{predict()} method uses as many threads as defined in \code{xgb.Booster} object (all by default).
-#' If you want to change their number, then assign a new number to \code{nthread} using \code{\link{xgb.parameters<-}}.
-#' Note also that converting a matrix to \code{\link{xgb.DMatrix}} uses multiple threads too.
+#' The `predict()` method uses as many threads as defined in `xgb.Booster` object (all by default).
+#' If you want to change their number, assign a new number to `nthread` using [xgb.parameters<-()].
+#' Note that converting a matrix to [xgb.DMatrix()] uses multiple threads too.
 #'
 #' @return
-#' The return type is different depending whether \code{strict_shape} is set to \code{TRUE}.  By default,
-#' for regression or binary classification, it returns a vector of length \code{nrows(newdata)}.
-#' For multiclass classification, either a \code{num_class * nrows(newdata)} vector or
-#' a \code{(nrows(newdata), num_class)} dimension matrix is returned, depending on
-#' the \code{reshape} value.
-#'
-#' When \code{predleaf = TRUE}, the output is a matrix object with the
-#' number of columns corresponding to the number of trees.
-#'
-#' When \code{predcontrib = TRUE} and it is not a multiclass setting, the output is a matrix object with
-#' \code{num_features + 1} columns. The last "+ 1" column in a matrix corresponds to bias.
-#' For a multiclass case, a list of \code{num_class} elements is returned, where each element is
-#' such a matrix. The contribution values are on the scale of untransformed margin
-#' (e.g., for binary classification would mean that the contributions are log-odds deviations from bias).
-#'
-#' When \code{predinteraction = TRUE} and it is not a multiclass setting, the output is a 3d array with
-#' dimensions \code{c(nrow, num_features + 1, num_features + 1)}. The off-diagonal (in the last two dimensions)
-#' elements represent different features interaction contributions. The array is symmetric WRT the last
-#' two dimensions. The "+ 1" columns corresponds to bias. Summing this array along the last dimension should
-#' produce practically the same result as predict with \code{predcontrib = TRUE}.
-#' For a multiclass case, a list of \code{num_class} elements is returned, where each element is
-#' such an array.
-#'
-#' When \code{strict_shape} is set to \code{TRUE}, the output is always an array.  For
-#' normal prediction, the output is a 2-dimension array \code{(num_class, nrow(newdata))}.
-#'
-#' For \code{predcontrib = TRUE}, output is \code{(ncol(newdata) + 1, num_class, nrow(newdata))}
-#' For \code{predinteraction = TRUE}, output is \code{(ncol(newdata) + 1, ncol(newdata) + 1, num_class, nrow(newdata))}
-#' For \code{predleaf = TRUE}, output is \code{(n_trees_in_forest, num_class, n_iterations, nrow(newdata))}
-#'
-#' @seealso
-#' \code{\link{xgb.train}}.
-#'
+#' The return type depends on `strict_shape`. If `FALSE` (default):
+#' - For regression or binary classification: A vector of length `nrows(newdata)`.
+#' - For multiclass classification: A vector of length `num_class * nrows(newdata)` or
+#'   a `(nrows(newdata), num_class)` matrix, depending on the `reshape` value.
+#' - When `predleaf = TRUE`: A matrix with one column per tree.
+#' - When `predcontrib = TRUE`: When not multiclass, a matrix with
+#' ` num_features + 1` columns. The last "+ 1" column corresponds to the baseline value.
+#'   In the multiclass case, a list of `num_class` such matrices.
+#'   The contribution values are on the scale of untransformed margin
+#'   (e.g., for binary classification, the values are log-odds deviations from the baseline).
+#' - When `predinteraction = TRUE`: When not multiclass, the output is a 3d array of
+#'   dimension `c(nrow, num_features + 1, num_features + 1)`. The off-diagonal (in the last two dimensions)
+#'   elements represent different feature interaction contributions. The array is symmetric WRT the last
+#'   two dimensions. The "+ 1" columns corresponds to the baselines. Summing this array along the last dimension should
+#'   produce practically the same result as `predcontrib = TRUE`.
+#'   In the multiclass case, a list of `num_class` such arrays.
+#'
+#' When `strict_shape = TRUE`, the output is always an array:
+#' - For normal predictions, the output has dimension `(num_class, nrow(newdata))`.
+#' - For `predcontrib = TRUE`, the dimension is `(ncol(newdata) + 1, num_class, nrow(newdata))`.
+#' - For `predinteraction = TRUE`, the dimension is `(ncol(newdata) + 1, ncol(newdata) + 1, num_class, nrow(newdata))`.
+#' - For `predleaf = TRUE`, the dimension is `(n_trees_in_forest, num_class, n_iterations, nrow(newdata))`.
+#' @seealso [xgb.train()]
 #' @references
-#'
-#' Scott M. Lundberg, Su-In Lee, "A Unified Approach to Interpreting Model Predictions", NIPS Proceedings 2017, \url{https://arxiv.org/abs/1705.07874}
-#'
-#' Scott M. Lundberg, Su-In Lee, "Consistent feature attribution for tree ensembles", \url{https://arxiv.org/abs/1706.06060}
+#' 1. Scott M. Lundberg, Su-In Lee, "A Unified Approach to Interpreting Model Predictions",
+#'   NIPS Proceedings 2017, \url{https://arxiv.org/abs/1705.07874}
+#' 2. Scott M. Lundberg, Su-In Lee, "Consistent feature attribution for tree ensembles",
+#'   \url{https://arxiv.org/abs/1706.06060}
 #'
 #' @examples
 #' ## binary classification:
 #'
-#' data(agaricus.train, package='xgboost')
-#' data(agaricus.test, package='xgboost')
+#' data(agaricus.train, package = "xgboost")
+#' data(agaricus.test, package = "xgboost")
 #'
 #' ## Keep the number of threads to 2 for examples
 #' nthread <- 2
@@ -275,8 +272,16 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
 #' train <- agaricus.train
 #' test <- agaricus.test
 #'
-#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-#'                eta = 0.5, nthread = nthread, nrounds = 5, objective = "binary:logistic")
+#' bst <- xgboost(
+#'   data = train$data,
+#'   label = train$label,
+#'   max_depth = 2,
+#'   eta = 0.5,
+#'   nthread = nthread,
+#'   nrounds = 5,
+#'   objective = "binary:logistic"
+#' )
+#'
 #' # use all trees by default
 #' pred <- predict(bst, test$data)
 #' # use only the 1st tree
@@ -308,32 +313,53 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
 #'
 #' lb <- as.numeric(iris$Species) - 1
 #' num_class <- 3
+#'
 #' set.seed(11)
-#' bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
-#'                max_depth = 4, eta = 0.5, nthread = 2, nrounds = 10, subsample = 0.5,
-#'                objective = "multi:softprob", num_class = num_class)
+#'
+#' bst <- xgboost(
+#'   data = as.matrix(iris[, -5]),
+#'   label = lb,
+#'   max_depth = 4,
+#'   eta = 0.5,
+#'   nthread = 2,
+#'   nrounds = 10,
+#'   subsample = 0.5,
+#'   objective = "multi:softprob",
+#'   num_class = num_class
+#' )
+#'
 #' # predict for softmax returns num_class probability numbers per case:
 #' pred <- predict(bst, as.matrix(iris[, -5]))
 #' str(pred)
 #' # reshape it to a num_class-columns matrix
-#' pred <- matrix(pred, ncol=num_class, byrow=TRUE)
+#' pred <- matrix(pred, ncol = num_class, byrow = TRUE)
 #' # convert the probabilities to softmax labels
 #' pred_labels <- max.col(pred) - 1
 #' # the following should result in the same error as seen in the last iteration
-#' sum(pred_labels != lb)/length(lb)
+#' sum(pred_labels != lb) / length(lb)
 #'
-#' # compare that to the predictions from softmax:
+#' # compare with predictions from softmax:
 #' set.seed(11)
-#' bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
-#'                max_depth = 4, eta = 0.5, nthread = 2, nrounds = 10, subsample = 0.5,
-#'                objective = "multi:softmax", num_class = num_class)
+#'
+#' bst <- xgboost(
+#'   data = as.matrix(iris[, -5]),
+#'   label = lb,
+#'   max_depth = 4,
+#'   eta = 0.5,
+#'   nthread = 2,
+#'   nrounds = 10,
+#'   subsample = 0.5,
+#'   objective = "multi:softmax",
+#'   num_class = num_class
+#' )
+#'
 #' pred <- predict(bst, as.matrix(iris[, -5]))
 #' str(pred)
 #' all.equal(pred, pred_labels)
 #' # prediction from using only 5 iterations should result
 #' # in the same error as seen in iteration 5:
-#' pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange=c(1, 6))
-#' sum(pred5 != lb)/length(lb)
+#' pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 6))
+#' sum(pred5 != lb) / length(lb)
 #'
 #' @rdname predict.xgb.Booster
 #' @export
@@ -497,63 +523,69 @@ predict.xgb.Booster.handle <- function(object, ...) {
 }
 
 
-#' Accessors for serializable attributes of a model.
+#' Accessors for serializable attributes of a model
 #'
 #' These methods allow to manipulate the key-value attribute strings of an xgboost model.
 #'
-#' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.
-#' @param name a non-empty character string specifying which attribute is to be accessed.
-#' @param value a value of an attribute for \code{xgb.attr<-}; for \code{xgb.attributes<-}
-#'        it's a list (or an object coercible to a list) with the names of attributes to set
+#' @param object Object of class `xgb.Booster` or `xgb.Booster.handle`.
+#' @param name A non-empty character string specifying which attribute is to be accessed.
+#' @param value For `xgb.attr<-`, a value of an attribute; for `xgb.attributes<-`,
+#'        it is a list (or an object coercible to a list) with the names of attributes to set
 #'        and the elements corresponding to attribute values.
 #'        Non-character values are converted to character.
-#'        When attribute value is not a scalar, only the first index is used.
-#'        Use \code{NULL} to remove an attribute.
+#'        When an attribute value is not a scalar, only the first index is used.
+#'        Use `NULL` to remove an attribute.
 #'
 #' @details
-#' The primary purpose of xgboost model attributes is to store some meta-data about the model.
+#' The primary purpose of xgboost model attributes is to store some meta data about the model.
 #' Note that they are a separate concept from the object attributes in R.
 #' Specifically, they refer to key-value strings that can be attached to an xgboost model,
 #' stored together with the model's binary representation, and accessed later
 #' (from R or any other interface).
-#' In contrast, any R-attribute assigned to an R-object of \code{xgb.Booster} class
-#' would not be saved by \code{xgb.save} because an xgboost model is an external memory object
+#' In contrast, any R attribute assigned to an R object of `xgb.Booster` class
+#' would not be saved by [xgb.save()] because an xgboost model is an external memory object
 #' and its serialization is handled externally.
 #' Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't
 #' change the value of that parameter for a model.
-#' Use \code{\link{xgb.parameters<-}} to set or change model parameters.
+#' Use [xgb.parameters<-()] to set or change model parameters.
 #'
-#' The attribute setters would usually work more efficiently for \code{xgb.Booster.handle}
-#' than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied.
+#' The attribute setters would usually work more efficiently for `xgb.Booster.handle`
+#' than for `xgb.Booster`, since only just a handle (pointer) would need to be copied.
 #' That would only matter if attributes need to be set many times.
-#' Note, however, that when feeding a handle of an \code{xgb.Booster} object to the attribute setters,
-#' the raw model cache of an \code{xgb.Booster} object would not be automatically updated,
-#' and it would be user's responsibility to call \code{xgb.serialize} to update it.
+#' Note, however, that when feeding a handle of an `xgb.Booster` object to the attribute setters,
+#' the raw model cache of an `xgb.Booster` object would not be automatically updated,
+#' and it would be the user's responsibility to call [xgb.serialize()] to update it.
 #'
-#' The \code{xgb.attributes<-} setter either updates the existing or adds one or several attributes,
+#' The `xgb.attributes<-` setter either updates the existing or adds one or several attributes,
 #' but it doesn't delete the other existing attributes.
 #'
 #' @return
-#' \code{xgb.attr} returns either a string value of an attribute
-#' or \code{NULL} if an attribute wasn't stored in a model.
-#'
-#' \code{xgb.attributes} returns a list of all attribute stored in a model
-#' or \code{NULL} if a model has no stored attributes.
+#' - `xgb.attr()` returns either a string value of an attribute
+#'   or `NULL` if an attribute wasn't stored in a model.
+#' - `xgb.attributes()` returns a list of all attributes stored in a model
+#'   or `NULL` if a model has no stored attributes.
 #'
 #' @examples
-#' data(agaricus.train, package='xgboost')
+#' data(agaricus.train, package = "xgboost")
 #' train <- agaricus.train
 #'
-#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+#' bst <- xgboost(
+#'   data = train$data,
+#'   label = train$label,
+#'   max_depth = 2,
+#'   eta = 1,
+#'   nthread = 2,
+#'   nrounds = 2,
+#'   objective = "binary:logistic"
+#' )
 #'
 #' xgb.attr(bst, "my_attribute") <- "my attribute value"
 #' print(xgb.attr(bst, "my_attribute"))
 #' xgb.attributes(bst) <- list(a = 123, b = "abc")
 #'
-#' xgb.save(bst, 'xgb.model')
-#' bst1 <- xgb.load('xgb.model')
-#' if (file.exists('xgb.model')) file.remove('xgb.model')
+#' xgb.save(bst, "xgb.model")
+#' bst1 <- xgb.load("xgb.model")
+#' if (file.exists("xgb.model")) file.remove("xgb.model")
 #' print(xgb.attr(bst1, "my_attribute"))
 #' print(xgb.attributes(bst1))
 #'
@@ -632,22 +664,29 @@ xgb.attributes <- function(object) {
   object
 }
 
-#' Accessors for model parameters as JSON string.
+#' Accessors for model parameters as JSON string
 #'
-#' @param object Object of class \code{xgb.Booster}
+#' @param object Object of class `xgb.Booster`.
 #' @param value A JSON string.
 #'
 #' @examples
-#' data(agaricus.train, package='xgboost')
+#' data(agaricus.train, package = "xgboost")
+#'
 #' ## Keep the number of threads to 1 for examples
 #' nthread <- 1
 #' data.table::setDTthreads(nthread)
 #' train <- agaricus.train
 #'
 #' bst <- xgboost(
-#'   data = train$data, label = train$label, max_depth = 2,
-#'   eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
+#'   data = train$data,
+#'   label = train$label,
+#'   max_depth = 2,
+#'   eta = 1,
+#'   nthread = nthread,
+#'   nrounds = 2,
+#'   objective = "binary:logistic"
 #' )
+#'
 #' config <- xgb.config(bst)
 #'
 #' @rdname xgb.config
@@ -667,24 +706,31 @@ xgb.config <- function(object) {
   object
 }
 
-#' Accessors for model parameters.
+#' Accessors for model parameters
 #'
 #' Only the setter for xgboost parameters is currently implemented.
 #'
-#' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.
-#' @param value a list (or an object coercible to a list) with the names of parameters to set
+#' @param object Object of class `xgb.Booster` or `xgb.Booster.handle`.
+#' @param value A list (or an object coercible to a list) with the names of parameters to set
 #'        and the elements corresponding to parameter values.
 #'
 #' @details
-#' Note that the setter would usually work more efficiently for \code{xgb.Booster.handle}
-#' than for \code{xgb.Booster}, since only just a handle would need to be copied.
+#' Note that the setter would usually work more efficiently for `xgb.Booster.handle`
+#' than for `xgb.Booster`, since only just a handle would need to be copied.
 #'
 #' @examples
-#' data(agaricus.train, package='xgboost')
+#' data(agaricus.train, package = "xgboost")
 #' train <- agaricus.train
 #'
-#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+#' bst <- xgboost(
+#'   data = train$data,
+#'   label = train$label,
+#'   max_depth = 2,
+#'   eta = 1,
+#'   nthread = 2,
+#'   nrounds = 2,
+#'   objective = "binary:logistic"
+#' )
 #'
 #' xgb.parameters(bst) <- list(eta = 0.1)
 #'
@@ -724,23 +770,31 @@ xgb.ntree <- function(bst) {
 
 #' Print xgb.Booster
 #'
-#' Print information about xgb.Booster.
+#' Print information about `xgb.Booster`.
 #'
-#' @param x an xgb.Booster object
-#' @param verbose whether to print detailed data (e.g., attribute values)
-#' @param ... not currently used
+#' @param x An `xgb.Booster` object.
+#' @param verbose Whether to print detailed data (e.g., attribute values).
+#' @param ... Not currently used.
 #'
 #' @examples
-#' data(agaricus.train, package='xgboost')
+#' data(agaricus.train, package = "xgboost")
 #' train <- agaricus.train
-#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
-#' attr(bst, 'myattr') <- 'memo'
+#'
+#' bst <- xgboost(
+#'   data = train$data,
+#'   label = train$label,
+#'   max_depth = 2,
+#'   eta = 1,
+#'   nthread = 2,
+#'   nrounds = 2,
+#'   objective = "binary:logistic"
+#' )
+#'
+#' attr(bst, "myattr") <- "memo"
 #'
 #' print(bst)
-#' print(bst, verbose=TRUE)
+#' print(bst, verbose = TRUE)
 #'
-#' @method print xgb.Booster
 #' @export
 print.xgb.Booster <- function(x, verbose = FALSE, ...) {
   cat('##### xgb.Booster\n')
diff --git a/R-package/R/xgb.create.features.R b/R-package/R/xgb.create.features.R
index 98b03ea8a984..baef3bb03e28 100644
--- a/R-package/R/xgb.create.features.R
+++ b/R-package/R/xgb.create.features.R
@@ -51,7 +51,7 @@
 #' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 #' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
 #'
-#' param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
+#' param <- list(max_depth=2, eta=1, objective='binary:logistic')
 #' nrounds = 4
 #'
 #' bst = xgb.train(params = param, data = dtrain, nrounds = nrounds, nthread = 2)
diff --git a/R-package/R/xgb.plot.shap.R b/R-package/R/xgb.plot.shap.R
index d9afd55461dd..d61bd23d4684 100644
--- a/R-package/R/xgb.plot.shap.R
+++ b/R-package/R/xgb.plot.shap.R
@@ -7,7 +7,7 @@
 #'          \code{data}. When it is NULL, it is computed internally using \code{model} and \code{data}.
 #' @param features a vector of either column indices or of feature names to plot. When it is NULL,
 #'          feature importance is calculated, and \code{top_n} high ranked features are taken.
-#' @param top_n when \code{features} is NULL, top_n [1, 100] most important features in a model are taken.
+#' @param top_n when \code{features} is NULL, top_n `[1, 100]` most important features in a model are taken.
 #' @param model an \code{xgb.Booster} model. It has to be provided when either \code{shap_contrib}
 #'          or \code{features} is missing.
 #' @param trees passed to \code{\link{xgb.importance}} when \code{features = NULL}.
@@ -197,7 +197,7 @@ xgb.plot.shap <- function(data, shap_contrib = NULL, features = NULL, top_n = 1,
 #' hence allows us to see which features have a negative / positive contribution
 #' on the model prediction, and whether the contribution is different for larger
 #' or smaller values of the feature. We effectively try to replicate the
-#' \code{summary_plot} function from https://github.com/shap/shap.
+#' \code{summary_plot} function from <https://github.com/shap/shap>.
 #'
 #' @inheritParams xgb.plot.shap
 #'
diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R
index e60ea2de8aa7..f61c535e228f 100644
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -40,10 +40,10 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
 #' }
 #'
 #' @references
-#' https://archive.ics.uci.edu/ml/datasets/Mushroom
+#' <https://archive.ics.uci.edu/ml/datasets/Mushroom>
 #'
 #' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
-#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
+#' <http://archive.ics.uci.edu/ml>. Irvine, CA: University of California,
 #' School of Information and Computer Science.
 #'
 #' @docType data
@@ -67,10 +67,10 @@ NULL
 #' }
 #'
 #' @references
-#' https://archive.ics.uci.edu/ml/datasets/Mushroom
+#' <https://archive.ics.uci.edu/ml/datasets/Mushroom>
 #'
 #' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
-#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
+#' <http://archive.ics.uci.edu/ml>. Irvine, CA: University of California,
 #' School of Information and Computer Science.
 #'
 #' @docType data
diff --git a/R-package/man/agaricus.test.Rd b/R-package/man/agaricus.test.Rd
index e3694ae0dfe8..f90a5a0d23e0 100644
--- a/R-package/man/agaricus.test.Rd
+++ b/R-package/man/agaricus.test.Rd
@@ -19,15 +19,15 @@ UCI Machine Learning Repository.
 This data set includes the following fields:
 
 \itemize{
- \item \code{label} the label for each record
- \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
+\item \code{label} the label for each record
+\item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
 }
 }
 \references{
-https://archive.ics.uci.edu/ml/datasets/Mushroom
+\url{https://archive.ics.uci.edu/ml/datasets/Mushroom}
 
 Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
-[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
+\url{http://archive.ics.uci.edu/ml}. Irvine, CA: University of California,
 School of Information and Computer Science.
 }
 \keyword{datasets}
diff --git a/R-package/man/agaricus.train.Rd b/R-package/man/agaricus.train.Rd
index 92692c965438..dd05410ba832 100644
--- a/R-package/man/agaricus.train.Rd
+++ b/R-package/man/agaricus.train.Rd
@@ -19,15 +19,15 @@ UCI Machine Learning Repository.
 This data set includes the following fields:
 
 \itemize{
- \item \code{label} the label for each record
- \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
+\item \code{label} the label for each record
+\item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
 }
 }
 \references{
-https://archive.ics.uci.edu/ml/datasets/Mushroom
+\url{https://archive.ics.uci.edu/ml/datasets/Mushroom}
 
 Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
-[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
+\url{http://archive.ics.uci.edu/ml}. Irvine, CA: University of California,
 School of Information and Computer Science.
 }
 \keyword{datasets}
diff --git a/R-package/man/cb.save.model.Rd b/R-package/man/cb.save.model.Rd
index fd564b3e8a01..584fd69b7360 100644
--- a/R-package/man/cb.save.model.Rd
+++ b/R-package/man/cb.save.model.Rd
@@ -13,7 +13,7 @@ cb.save.model(save_period = 0, save_name = "xgboost.model")
 \item{save_name}{the name or path for the saved model file.
 It can contain a \code{\link[base]{sprintf}} formatting specifier
 to include the integer iteration number in the file name.
-E.g., with \code{save_name} = 'xgboost_%04d.model',
+E.g., with \code{save_name} = 'xgboost_\%04d.model',
 the file saved at iteration 50 would be named "xgboost_0050.model".}
 }
 \description{
diff --git a/R-package/man/getinfo.Rd b/R-package/man/getinfo.Rd
index 9503c2154632..71f855d8a756 100644
--- a/R-package/man/getinfo.Rd
+++ b/R-package/man/getinfo.Rd
@@ -23,15 +23,15 @@ Get information of an xgb.DMatrix object
 The \code{name} field can be one of the following:
 
 \itemize{
-    \item \code{label}
-    \item \code{weight}
-    \item \code{base_margin}
-    \item \code{label_lower_bound}
-    \item \code{label_upper_bound}
-    \item \code{group}
-    \item \code{feature_type}
-    \item \code{feature_name}
-    \item \code{nrow}
+\item \code{label}
+\item \code{weight}
+\item \code{base_margin}
+\item \code{label_lower_bound}
+\item \code{label_upper_bound}
+\item \code{group}
+\item \code{feature_type}
+\item \code{feature_name}
+\item \code{nrow}
 }
 See the documentation for \link{xgb.DMatrix} for more information about these fields.
 
diff --git a/R-package/man/predict.xgb.Booster.Rd b/R-package/man/predict.xgb.Booster.Rd
index c1e58f63baef..135177dda99b 100644
--- a/R-package/man/predict.xgb.Booster.Rd
+++ b/R-package/man/predict.xgb.Booster.Rd
@@ -3,7 +3,7 @@
 \name{predict.xgb.Booster}
 \alias{predict.xgb.Booster}
 \alias{predict.xgb.Booster.handle}
-\title{Predict method for eXtreme Gradient Boosting model}
+\title{Predict method for XGBoost model}
 \usage{
 \method{predict}{xgb.Booster}(
   object,
@@ -25,90 +25,86 @@
 \method{predict}{xgb.Booster.handle}(object, ...)
 }
 \arguments{
-\item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}}
+\item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.}
 
-\item{newdata}{takes \code{matrix}, \code{dgCMatrix}, \code{dgRMatrix}, \code{dsparseVector},
-       local data file or \code{xgb.DMatrix}.
+\item{newdata}{Takes \code{matrix}, \code{dgCMatrix}, \code{dgRMatrix}, \code{dsparseVector},
+local data file, or \code{xgb.DMatrix}.
+For single-row predictions on sparse data, it is recommended to use the CSR format.
+If passing a sparse vector, it will take it as a row vector.}
 
-       For single-row predictions on sparse data, it's recommended to use CSR format. If passing
-       a sparse vector, it will take it as a row vector.}
+\item{missing}{Only used when input is a dense matrix. Pick a float value that represents
+missing values in data (e.g., 0 or some other extreme value).}
 
-\item{missing}{Missing is only used when input is dense matrix. Pick a float value that represents
-missing values in data (e.g., sometimes 0 or some other extreme value is used).}
-
-\item{outputmargin}{whether the prediction should be returned in the for of original untransformed
+\item{outputmargin}{Whether the prediction should be returned in the form of original untransformed
 sum of predictions from boosting iterations' results. E.g., setting \code{outputmargin=TRUE} for
-logistic regression would result in predictions for log-odds instead of probabilities.}
+logistic regression would return log-odds instead of probabilities.}
 
 \item{ntreelimit}{Deprecated, use \code{iterationrange} instead.}
 
-\item{predleaf}{whether predict leaf index.}
+\item{predleaf}{Whether to predict pre-tree leaf indices.}
 
-\item{predcontrib}{whether to return feature contributions to individual predictions (see Details).}
+\item{predcontrib}{Whether to return feature contributions to individual predictions (see Details).}
 
-\item{approxcontrib}{whether to use a fast approximation for feature contributions (see Details).}
+\item{approxcontrib}{Whether to use a fast approximation for feature contributions (see Details).}
 
-\item{predinteraction}{whether to return contributions of feature interactions to individual predictions (see Details).}
+\item{predinteraction}{Whether to return contributions of feature interactions to individual predictions (see Details).}
 
-\item{reshape}{whether to reshape the vector of predictions to a matrix form when there are several
-prediction outputs per case. This option has no effect when either of predleaf, predcontrib,
-or predinteraction flags is TRUE.}
+\item{reshape}{Whether to reshape the vector of predictions to matrix form when there are several
+prediction outputs per case. No effect if \code{predleaf}, \code{predcontrib},
+or \code{predinteraction} is \code{TRUE}.}
 
-\item{training}{whether is the prediction result used for training.  For dart booster,
+\item{training}{Whether the predictions are used for training. For dart booster,
 training predicting will perform dropout.}
 
-\item{iterationrange}{Specifies which layer of trees are used in prediction.  For
-example, if a random forest is trained with 100 rounds.  Specifying
-`iterationrange=(1, 21)`, then only the forests built during [1, 21) (half open set)
-rounds are used in this prediction.  It's 1-based index just like R vector.  When set
-to \code{c(1, 1)} XGBoost will use all trees.}
+\item{iterationrange}{Specifies which trees are used in prediction. For
+example, take a random forest with 100 rounds.
+With \code{iterationrange=c(1, 21)}, only the trees built during \verb{[1, 21)} (half open set)
+rounds are used in this prediction. The index is 1-based just like an R vector. When set
+to \code{c(1, 1)}, XGBoost will use all trees.}
 
-\item{strict_shape}{Default is \code{FALSE}. When it's set to \code{TRUE}, output
-type and shape of prediction are invariant to model type.}
+\item{strict_shape}{Default is \code{FALSE}. When set to \code{TRUE}, the output
+type and shape of predictions are invariant to the model type.}
 
 \item{...}{Not used.}
 }
 \value{
-The return type is different depending whether \code{strict_shape} is set to \code{TRUE}.  By default,
-for regression or binary classification, it returns a vector of length \code{nrows(newdata)}.
-For multiclass classification, either a \code{num_class * nrows(newdata)} vector or
-a \code{(nrows(newdata), num_class)} dimension matrix is returned, depending on
-the \code{reshape} value.
-
-When \code{predleaf = TRUE}, the output is a matrix object with the
-number of columns corresponding to the number of trees.
-
-When \code{predcontrib = TRUE} and it is not a multiclass setting, the output is a matrix object with
-\code{num_features + 1} columns. The last "+ 1" column in a matrix corresponds to bias.
-For a multiclass case, a list of \code{num_class} elements is returned, where each element is
-such a matrix. The contribution values are on the scale of untransformed margin
-(e.g., for binary classification would mean that the contributions are log-odds deviations from bias).
-
-When \code{predinteraction = TRUE} and it is not a multiclass setting, the output is a 3d array with
-dimensions \code{c(nrow, num_features + 1, num_features + 1)}. The off-diagonal (in the last two dimensions)
-elements represent different features interaction contributions. The array is symmetric WRT the last
-two dimensions. The "+ 1" columns corresponds to bias. Summing this array along the last dimension should
-produce practically the same result as predict with \code{predcontrib = TRUE}.
-For a multiclass case, a list of \code{num_class} elements is returned, where each element is
-such an array.
-
-When \code{strict_shape} is set to \code{TRUE}, the output is always an array.  For
-normal prediction, the output is a 2-dimension array \code{(num_class, nrow(newdata))}.
-
-For \code{predcontrib = TRUE}, output is \code{(ncol(newdata) + 1, num_class, nrow(newdata))}
-For \code{predinteraction = TRUE}, output is \code{(ncol(newdata) + 1, ncol(newdata) + 1, num_class, nrow(newdata))}
-For \code{predleaf = TRUE}, output is \code{(n_trees_in_forest, num_class, n_iterations, nrow(newdata))}
+The return type depends on \code{strict_shape}. If \code{FALSE} (default):
+\itemize{
+\item For regression or binary classification: A vector of length \code{nrows(newdata)}.
+\item For multiclass classification: A vector of length \code{num_class * nrows(newdata)} or
+a \verb{(nrows(newdata), num_class)} matrix, depending on the \code{reshape} value.
+\item When \code{predleaf = TRUE}: A matrix with one column per tree.
+\item When \code{predcontrib = TRUE}: When not multiclass, a matrix with
+\code{ num_features + 1} columns. The last "+ 1" column corresponds to the baseline value.
+In the multiclass case, a list of \code{num_class} such matrices.
+The contribution values are on the scale of untransformed margin
+(e.g., for binary classification, the values are log-odds deviations from the baseline).
+\item When \code{predinteraction = TRUE}: When not multiclass, the output is a 3d array of
+dimension \code{c(nrow, num_features + 1, num_features + 1)}. The off-diagonal (in the last two dimensions)
+elements represent different feature interaction contributions. The array is symmetric WRT the last
+two dimensions. The "+ 1" columns corresponds to the baselines. Summing this array along the last dimension should
+produce practically the same result as \code{predcontrib = TRUE}.
+In the multiclass case, a list of \code{num_class} such arrays.
+}
+
+When \code{strict_shape = TRUE}, the output is always an array:
+\itemize{
+\item For normal predictions, the output has dimension \verb{(num_class, nrow(newdata))}.
+\item For \code{predcontrib = TRUE}, the dimension is \verb{(ncol(newdata) + 1, num_class, nrow(newdata))}.
+\item For \code{predinteraction = TRUE}, the dimension is \verb{(ncol(newdata) + 1, ncol(newdata) + 1, num_class, nrow(newdata))}.
+\item For \code{predleaf = TRUE}, the dimension is \verb{(n_trees_in_forest, num_class, n_iterations, nrow(newdata))}.
+}
 }
 \description{
 Predicted values based on either xgboost model or model handle object.
 }
 \details{
-Note that \code{iterationrange} would currently do nothing for predictions from gblinear,
-since gblinear doesn't keep its boosting history.
+Note that \code{iterationrange} would currently do nothing for predictions from "gblinear",
+since "gblinear" doesn't keep its boosting history.
 
 One possible practical applications of the \code{predleaf} option is to use the model
 as a generator of new features which capture non-linearity and interactions,
-e.g., as implemented in \code{\link{xgb.create.features}}.
+e.g., as implemented in \code{\link[=xgb.create.features]{xgb.create.features()}}.
 
 Setting \code{predcontrib = TRUE} allows to calculate contributions of each feature to
 individual predictions. For "gblinear" booster, feature contributions are simply linear terms
@@ -124,14 +120,14 @@ Since it quadratically depends on the number of features, it is recommended to p
 of the most important features first. See below about the format of the returned results.
 
 The \code{predict()} method uses as many threads as defined in \code{xgb.Booster} object (all by default).
-If you want to change their number, then assign a new number to \code{nthread} using \code{\link{xgb.parameters<-}}.
-Note also that converting a matrix to \code{\link{xgb.DMatrix}} uses multiple threads too.
+If you want to change their number, assign a new number to \code{nthread} using \code{\link[=xgb.parameters<-]{xgb.parameters<-()}}.
+Note that converting a matrix to \code{\link[=xgb.DMatrix]{xgb.DMatrix()}} uses multiple threads too.
 }
 \examples{
 ## binary classification:
 
-data(agaricus.train, package='xgboost')
-data(agaricus.test, package='xgboost')
+data(agaricus.train, package = "xgboost")
+data(agaricus.test, package = "xgboost")
 
 ## Keep the number of threads to 2 for examples
 nthread <- 2
@@ -140,8 +136,16 @@ data.table::setDTthreads(nthread)
 train <- agaricus.train
 test <- agaricus.test
 
-bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-               eta = 0.5, nthread = nthread, nrounds = 5, objective = "binary:logistic")
+bst <- xgboost(
+  data = train$data,
+  label = train$label,
+  max_depth = 2,
+  eta = 0.5,
+  nthread = nthread,
+  nrounds = 5,
+  objective = "binary:logistic"
+)
+
 # use all trees by default
 pred <- predict(bst, test$data)
 # use only the 1st tree
@@ -173,39 +177,63 @@ par(mar = old_mar)
 
 lb <- as.numeric(iris$Species) - 1
 num_class <- 3
+
 set.seed(11)
-bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
-               max_depth = 4, eta = 0.5, nthread = 2, nrounds = 10, subsample = 0.5,
-               objective = "multi:softprob", num_class = num_class)
+
+bst <- xgboost(
+  data = as.matrix(iris[, -5]),
+  label = lb,
+  max_depth = 4,
+  eta = 0.5,
+  nthread = 2,
+  nrounds = 10,
+  subsample = 0.5,
+  objective = "multi:softprob",
+  num_class = num_class
+)
+
 # predict for softmax returns num_class probability numbers per case:
 pred <- predict(bst, as.matrix(iris[, -5]))
 str(pred)
 # reshape it to a num_class-columns matrix
-pred <- matrix(pred, ncol=num_class, byrow=TRUE)
+pred <- matrix(pred, ncol = num_class, byrow = TRUE)
 # convert the probabilities to softmax labels
 pred_labels <- max.col(pred) - 1
 # the following should result in the same error as seen in the last iteration
-sum(pred_labels != lb)/length(lb)
+sum(pred_labels != lb) / length(lb)
 
-# compare that to the predictions from softmax:
+# compare with predictions from softmax:
 set.seed(11)
-bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
-               max_depth = 4, eta = 0.5, nthread = 2, nrounds = 10, subsample = 0.5,
-               objective = "multi:softmax", num_class = num_class)
+
+bst <- xgboost(
+  data = as.matrix(iris[, -5]),
+  label = lb,
+  max_depth = 4,
+  eta = 0.5,
+  nthread = 2,
+  nrounds = 10,
+  subsample = 0.5,
+  objective = "multi:softmax",
+  num_class = num_class
+)
+
 pred <- predict(bst, as.matrix(iris[, -5]))
 str(pred)
 all.equal(pred, pred_labels)
 # prediction from using only 5 iterations should result
 # in the same error as seen in iteration 5:
-pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange=c(1, 6))
-sum(pred5 != lb)/length(lb)
+pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 6))
+sum(pred5 != lb) / length(lb)
 
 }
 \references{
-Scott M. Lundberg, Su-In Lee, "A Unified Approach to Interpreting Model Predictions", NIPS Proceedings 2017, \url{https://arxiv.org/abs/1705.07874}
-
-Scott M. Lundberg, Su-In Lee, "Consistent feature attribution for tree ensembles", \url{https://arxiv.org/abs/1706.06060}
+\enumerate{
+\item Scott M. Lundberg, Su-In Lee, "A Unified Approach to Interpreting Model Predictions",
+NIPS Proceedings 2017, \url{https://arxiv.org/abs/1705.07874}
+\item Scott M. Lundberg, Su-In Lee, "Consistent feature attribution for tree ensembles",
+\url{https://arxiv.org/abs/1706.06060}
+}
 }
 \seealso{
-\code{\link{xgb.train}}.
+\code{\link[=xgb.train]{xgb.train()}}
 }
diff --git a/R-package/man/print.xgb.Booster.Rd b/R-package/man/print.xgb.Booster.Rd
index d684882f575b..4d09bb5ec00c 100644
--- a/R-package/man/print.xgb.Booster.Rd
+++ b/R-package/man/print.xgb.Booster.Rd
@@ -7,23 +7,32 @@
 \method{print}{xgb.Booster}(x, verbose = FALSE, ...)
 }
 \arguments{
-\item{x}{an xgb.Booster object}
+\item{x}{An \code{xgb.Booster} object.}
 
-\item{verbose}{whether to print detailed data (e.g., attribute values)}
+\item{verbose}{Whether to print detailed data (e.g., attribute values).}
 
-\item{...}{not currently used}
+\item{...}{Not currently used.}
 }
 \description{
-Print information about xgb.Booster.
+Print information about \code{xgb.Booster}.
 }
 \examples{
-data(agaricus.train, package='xgboost')
+data(agaricus.train, package = "xgboost")
 train <- agaricus.train
-bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
-attr(bst, 'myattr') <- 'memo'
+
+bst <- xgboost(
+  data = train$data,
+  label = train$label,
+  max_depth = 2,
+  eta = 1,
+  nthread = 2,
+  nrounds = 2,
+  objective = "binary:logistic"
+)
+
+attr(bst, "myattr") <- "memo"
 
 print(bst)
-print(bst, verbose=TRUE)
+print(bst, verbose = TRUE)
 
 }
diff --git a/R-package/man/xgb.Booster.complete.Rd b/R-package/man/xgb.Booster.complete.Rd
index 214694565c29..0adb1a69f6c7 100644
--- a/R-package/man/xgb.Booster.complete.Rd
+++ b/R-package/man/xgb.Booster.complete.Rd
@@ -2,14 +2,14 @@
 % Please edit documentation in R/xgb.Booster.R
 \name{xgb.Booster.complete}
 \alias{xgb.Booster.complete}
-\title{Restore missing parts of an incomplete xgb.Booster object.}
+\title{Restore missing parts of an incomplete xgb.Booster object}
 \usage{
 xgb.Booster.complete(object, saveraw = TRUE)
 }
 \arguments{
-\item{object}{object of class \code{xgb.Booster}}
+\item{object}{Object of class \code{xgb.Booster}.}
 
-\item{saveraw}{a flag indicating whether to append \code{raw} Booster memory dump data
+\item{saveraw}{A flag indicating whether to append \code{raw} Booster memory dump data
 when it doesn't already exist.}
 }
 \value{
@@ -27,15 +27,24 @@ While this method is primarily for internal use, it might be useful in some prac
 E.g., when an \code{xgb.Booster} model is saved as an R object and then is loaded as an R object,
 its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods
 should still work for such a model object since those methods would be using
-\code{xgb.Booster.complete} internally. However, one might find it to be more efficient to call the
-\code{xgb.Booster.complete} function explicitly once after loading a model as an R-object.
+\code{xgb.Booster.complete()} internally. However, one might find it to be more efficient to call the
+\code{xgb.Booster.complete()} function explicitly once after loading a model as an R-object.
 That would prevent further repeated implicit reconstruction of an internal booster model.
 }
 \examples{
 
-data(agaricus.train, package='xgboost')
-bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
-               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+data(agaricus.train, package = "xgboost")
+
+bst <- xgboost(
+  data = agaricus.train$data,
+  label = agaricus.train$label,
+  max_depth = 2,
+  eta = 1,
+  nthread = 2,
+  nrounds = 2,
+  objective = "binary:logistic"
+)
+
 saveRDS(bst, "xgb.model.rds")
 
 # Warning: The resulting RDS file is only compatible with the current XGBoost version.
diff --git a/R-package/man/xgb.DMatrix.Rd b/R-package/man/xgb.DMatrix.Rd
index a1ef39f0b21f..01436ba14122 100644
--- a/R-package/man/xgb.DMatrix.Rd
+++ b/R-package/man/xgb.DMatrix.Rd
@@ -38,7 +38,8 @@ so it doesn't make sense to assign weights to individual data points.}
 
 \item{base_margin}{Base margin used for boosting from existing model.
 
-       In the case of multi-output models, one can also pass multi-dimensional base_margin.}
+\if{html}{\out{<div class="sourceCode">}}\preformatted{   In the case of multi-output models, one can also pass multi-dimensional base_margin.
+}\if{html}{\out{</div>}}}
 
 \item{missing}{a float value to represents missing values in data (used only when input is a dense matrix).
 It is useful when a 0 or some other extreme value represents missing values in data.}
@@ -62,16 +63,17 @@ frame and matrix.}
 
 \item{enable_categorical}{Experimental support of specializing for categorical features.
 
-                          If passing 'TRUE' and 'data' is a data frame,
-                          columns of categorical types will automatically
-                          be set to be of categorical type (feature_type='c') in the resulting DMatrix.
+\if{html}{\out{<div class="sourceCode">}}\preformatted{                      If passing 'TRUE' and 'data' is a data frame,
+                      columns of categorical types will automatically
+                      be set to be of categorical type (feature_type='c') in the resulting DMatrix.
 
-                          If passing 'FALSE' and 'data' is a data frame with categorical columns,
-                          it will result in an error being thrown.
+                      If passing 'FALSE' and 'data' is a data frame with categorical columns,
+                      it will result in an error being thrown.
 
-                          If 'data' is not a data frame, this argument is ignored.
+                      If 'data' is not a data frame, this argument is ignored.
 
-                          JSON/UBJSON serialization format is required for this.}
+                      JSON/UBJSON serialization format is required for this.
+}\if{html}{\out{</div>}}}
 }
 \description{
 Construct xgb.DMatrix object from either a dense matrix, a sparse matrix, or a local file.
diff --git a/R-package/man/xgb.attr.Rd b/R-package/man/xgb.attr.Rd
index 03779e4202d9..9203e6281561 100644
--- a/R-package/man/xgb.attr.Rd
+++ b/R-package/man/xgb.attr.Rd
@@ -5,7 +5,7 @@
 \alias{xgb.attr<-}
 \alias{xgb.attributes}
 \alias{xgb.attributes<-}
-\title{Accessors for serializable attributes of a model.}
+\title{Accessors for serializable attributes of a model}
 \usage{
 xgb.attr(object, name)
 
@@ -18,62 +18,70 @@ xgb.attributes(object) <- value
 \arguments{
 \item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.}
 
-\item{name}{a non-empty character string specifying which attribute is to be accessed.}
+\item{name}{A non-empty character string specifying which attribute is to be accessed.}
 
-\item{value}{a value of an attribute for \code{xgb.attr<-}; for \code{xgb.attributes<-}
-it's a list (or an object coercible to a list) with the names of attributes to set
+\item{value}{For \verb{xgb.attr<-}, a value of an attribute; for \verb{xgb.attributes<-},
+it is a list (or an object coercible to a list) with the names of attributes to set
 and the elements corresponding to attribute values.
 Non-character values are converted to character.
-When attribute value is not a scalar, only the first index is used.
+When an attribute value is not a scalar, only the first index is used.
 Use \code{NULL} to remove an attribute.}
 }
 \value{
-\code{xgb.attr} returns either a string value of an attribute
+\itemize{
+\item \code{xgb.attr()} returns either a string value of an attribute
 or \code{NULL} if an attribute wasn't stored in a model.
-
-\code{xgb.attributes} returns a list of all attribute stored in a model
+\item \code{xgb.attributes()} returns a list of all attributes stored in a model
 or \code{NULL} if a model has no stored attributes.
 }
+}
 \description{
 These methods allow to manipulate the key-value attribute strings of an xgboost model.
 }
 \details{
-The primary purpose of xgboost model attributes is to store some meta-data about the model.
+The primary purpose of xgboost model attributes is to store some meta data about the model.
 Note that they are a separate concept from the object attributes in R.
 Specifically, they refer to key-value strings that can be attached to an xgboost model,
 stored together with the model's binary representation, and accessed later
 (from R or any other interface).
-In contrast, any R-attribute assigned to an R-object of \code{xgb.Booster} class
-would not be saved by \code{xgb.save} because an xgboost model is an external memory object
+In contrast, any R attribute assigned to an R object of \code{xgb.Booster} class
+would not be saved by \code{\link[=xgb.save]{xgb.save()}} because an xgboost model is an external memory object
 and its serialization is handled externally.
 Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't
 change the value of that parameter for a model.
-Use \code{\link{xgb.parameters<-}} to set or change model parameters.
+Use \code{\link[=xgb.parameters<-]{xgb.parameters<-()}} to set or change model parameters.
 
 The attribute setters would usually work more efficiently for \code{xgb.Booster.handle}
 than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied.
 That would only matter if attributes need to be set many times.
 Note, however, that when feeding a handle of an \code{xgb.Booster} object to the attribute setters,
 the raw model cache of an \code{xgb.Booster} object would not be automatically updated,
-and it would be user's responsibility to call \code{xgb.serialize} to update it.
+and it would be the user's responsibility to call \code{\link[=xgb.serialize]{xgb.serialize()}} to update it.
 
-The \code{xgb.attributes<-} setter either updates the existing or adds one or several attributes,
+The \verb{xgb.attributes<-} setter either updates the existing or adds one or several attributes,
 but it doesn't delete the other existing attributes.
 }
 \examples{
-data(agaricus.train, package='xgboost')
+data(agaricus.train, package = "xgboost")
 train <- agaricus.train
 
-bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+bst <- xgboost(
+  data = train$data,
+  label = train$label,
+  max_depth = 2,
+  eta = 1,
+  nthread = 2,
+  nrounds = 2,
+  objective = "binary:logistic"
+)
 
 xgb.attr(bst, "my_attribute") <- "my attribute value"
 print(xgb.attr(bst, "my_attribute"))
 xgb.attributes(bst) <- list(a = 123, b = "abc")
 
-xgb.save(bst, 'xgb.model')
-bst1 <- xgb.load('xgb.model')
-if (file.exists('xgb.model')) file.remove('xgb.model')
+xgb.save(bst, "xgb.model")
+bst1 <- xgb.load("xgb.model")
+if (file.exists("xgb.model")) file.remove("xgb.model")
 print(xgb.attr(bst1, "my_attribute"))
 print(xgb.attributes(bst1))
 
diff --git a/R-package/man/xgb.config.Rd b/R-package/man/xgb.config.Rd
index 35545cc77a50..83040b877396 100644
--- a/R-package/man/xgb.config.Rd
+++ b/R-package/man/xgb.config.Rd
@@ -3,31 +3,38 @@
 \name{xgb.config}
 \alias{xgb.config}
 \alias{xgb.config<-}
-\title{Accessors for model parameters as JSON string.}
+\title{Accessors for model parameters as JSON string}
 \usage{
 xgb.config(object)
 
 xgb.config(object) <- value
 }
 \arguments{
-\item{object}{Object of class \code{xgb.Booster}}
+\item{object}{Object of class \code{xgb.Booster}.}
 
 \item{value}{A JSON string.}
 }
 \description{
-Accessors for model parameters as JSON string.
+Accessors for model parameters as JSON string
 }
 \examples{
-data(agaricus.train, package='xgboost')
+data(agaricus.train, package = "xgboost")
+
 ## Keep the number of threads to 1 for examples
 nthread <- 1
 data.table::setDTthreads(nthread)
 train <- agaricus.train
 
 bst <- xgboost(
-  data = train$data, label = train$label, max_depth = 2,
-  eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
+  data = train$data,
+  label = train$label,
+  max_depth = 2,
+  eta = 1,
+  nthread = nthread,
+  nrounds = 2,
+  objective = "binary:logistic"
 )
+
 config <- xgb.config(bst)
 
 }
diff --git a/R-package/man/xgb.create.features.Rd b/R-package/man/xgb.create.features.Rd
index 64d4af158ef3..68b5619970f9 100644
--- a/R-package/man/xgb.create.features.Rd
+++ b/R-package/man/xgb.create.features.Rd
@@ -48,7 +48,7 @@ be the binary vector \code{[0, 1, 0, 1, 0]}, where the first 3 entries
 correspond to the leaves of the first subtree and last 2 to
 those of the second subtree.
 
-[...]
+\link{...}
 
 We can understand boosted decision tree
 based transformation as a supervised feature encoding that
@@ -62,7 +62,7 @@ data(agaricus.test, package='xgboost')
 dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
 
-param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
+param <- list(max_depth=2, eta=1, objective='binary:logistic')
 nrounds = 4
 
 bst = xgb.train(params = param, data = dtrain, nrounds = nrounds, nthread = 2)
diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd
index 9c69eb97ff3f..2d8508c4d1d5 100644
--- a/R-package/man/xgb.cv.Rd
+++ b/R-package/man/xgb.cv.Rd
@@ -29,22 +29,22 @@ xgb.cv(
 }
 \arguments{
 \item{params}{the list of parameters. The complete list of parameters is
-  available in the \href{http://xgboost.readthedocs.io/en/latest/parameter.html}{online documentation}. Below
-  is a shorter summary:
+available in the \href{http://xgboost.readthedocs.io/en/latest/parameter.html}{online documentation}. Below
+is a shorter summary:
 \itemize{
-  \item \code{objective} objective function, common ones are
-  \itemize{
-    \item \code{reg:squarederror} Regression with squared loss.
-    \item \code{binary:logistic} logistic regression for classification.
-    \item See \code{\link[=xgb.train]{xgb.train}()} for complete list of objectives.
-  }
-  \item \code{eta} step size of each boosting step
-  \item \code{max_depth} maximum depth of the tree
-  \item \code{nthread} number of thread used in training, if not set, all threads are used
+\item \code{objective} objective function, common ones are
+\itemize{
+\item \code{reg:squarederror} Regression with squared loss.
+\item \code{binary:logistic} logistic regression for classification.
+\item See \code{\link[=xgb.train]{xgb.train}()} for complete list of objectives.
+}
+\item \code{eta} step size of each boosting step
+\item \code{max_depth} maximum depth of the tree
+\item \code{nthread} number of thread used in training, if not set, all threads are used
 }
 
-  See \code{\link{xgb.train}} for further details.
-  See also demo/ for walkthrough example in R.}
+See \code{\link{xgb.train}} for further details.
+See also demo/ for walkthrough example in R.}
 
 \item{data}{takes an \code{xgb.DMatrix}, \code{matrix}, or \code{dgCMatrix} as the input.}
 
@@ -64,17 +64,17 @@ from each CV model. This parameter engages the \code{\link{cb.cv.predict}} callb
 \item{showsd}{\code{boolean}, whether to show standard deviation of cross validation}
 
 \item{metrics, }{list of evaluation metrics to be used in cross validation,
-  when it is not specified, the evaluation metric is chosen according to objective function.
-  Possible options are:
+when it is not specified, the evaluation metric is chosen according to objective function.
+Possible options are:
 \itemize{
-  \item \code{error} binary classification error rate
-  \item \code{rmse} Rooted mean square error
-  \item \code{logloss} negative log-likelihood function
-  \item \code{mae} Mean absolute error
-  \item \code{mape} Mean absolute percentage error
-  \item \code{auc} Area under curve
-  \item \code{aucpr} Area under PR curve
-  \item \code{merror} Exact matching error, used to evaluate multi-class classification
+\item \code{error} binary classification error rate
+\item \code{rmse} Rooted mean square error
+\item \code{logloss} negative log-likelihood function
+\item \code{mae} Mean absolute error
+\item \code{mape} Mean absolute percentage error
+\item \code{auc} Area under curve
+\item \code{aucpr} Area under PR curve
+\item \code{merror} Exact matching error, used to evaluate multi-class classification
 }}
 
 \item{obj}{customized objective function. Returns gradient and second order
@@ -120,26 +120,26 @@ to customize the training process.}
 \value{
 An object of class \code{xgb.cv.synchronous} with the following elements:
 \itemize{
-  \item \code{call} a function call.
-  \item \code{params} parameters that were passed to the xgboost library. Note that it does not
-        capture parameters changed by the \code{\link{cb.reset.parameters}} callback.
-  \item \code{callbacks} callback functions that were either automatically assigned or
-        explicitly passed.
-  \item \code{evaluation_log} evaluation history stored as a \code{data.table} with the
-        first column corresponding to iteration number and the rest corresponding to the
-        CV-based evaluation means and standard deviations for the training and test CV-sets.
-        It is created by the \code{\link{cb.evaluation.log}} callback.
-  \item \code{niter} number of boosting iterations.
-  \item \code{nfeatures} number of features in training data.
-  \item \code{folds} the list of CV folds' indices - either those passed through the \code{folds}
-        parameter or randomly generated.
-  \item \code{best_iteration} iteration number with the best evaluation metric value
-        (only available with early stopping).
-  \item \code{best_ntreelimit} and the \code{ntreelimit} Deprecated attributes, use \code{best_iteration} instead.
-  \item \code{pred} CV prediction values available when \code{prediction} is set.
-        It is either vector or matrix (see \code{\link{cb.cv.predict}}).
-  \item \code{models} a list of the CV folds' models. It is only available with the explicit
-        setting of the \code{cb.cv.predict(save_models = TRUE)} callback.
+\item \code{call} a function call.
+\item \code{params} parameters that were passed to the xgboost library. Note that it does not
+capture parameters changed by the \code{\link{cb.reset.parameters}} callback.
+\item \code{callbacks} callback functions that were either automatically assigned or
+explicitly passed.
+\item \code{evaluation_log} evaluation history stored as a \code{data.table} with the
+first column corresponding to iteration number and the rest corresponding to the
+CV-based evaluation means and standard deviations for the training and test CV-sets.
+It is created by the \code{\link{cb.evaluation.log}} callback.
+\item \code{niter} number of boosting iterations.
+\item \code{nfeatures} number of features in training data.
+\item \code{folds} the list of CV folds' indices - either those passed through the \code{folds}
+parameter or randomly generated.
+\item \code{best_iteration} iteration number with the best evaluation metric value
+(only available with early stopping).
+\item \code{best_ntreelimit} and the \code{ntreelimit} Deprecated attributes, use \code{best_iteration} instead.
+\item \code{pred} CV prediction values available when \code{prediction} is set.
+It is either vector or matrix (see \code{\link{cb.cv.predict}}).
+\item \code{models} a list of the CV folds' models. It is only available with the explicit
+setting of the \code{cb.cv.predict(save_models = TRUE)} callback.
 }
 }
 \description{
diff --git a/R-package/man/xgb.importance.Rd b/R-package/man/xgb.importance.Rd
index d9367b2116c6..12daca3653b8 100644
--- a/R-package/man/xgb.importance.Rd
+++ b/R-package/man/xgb.importance.Rd
@@ -35,20 +35,20 @@ is zero-based (e.g., use \code{trees = 0:4} for first 5 trees).}
 \value{
 For a tree model, a \code{data.table} with the following columns:
 \itemize{
-  \item \code{Features} names of the features used in the model;
-  \item \code{Gain} represents fractional contribution of each feature to the model based on
-       the total gain of this feature's splits. Higher percentage means a more important
-       predictive feature.
-  \item \code{Cover} metric of the number of observation related to this feature;
-  \item \code{Frequency} percentage representing the relative number of times
-       a feature have been used in trees.
+\item \code{Features} names of the features used in the model;
+\item \code{Gain} represents fractional contribution of each feature to the model based on
+the total gain of this feature's splits. Higher percentage means a more important
+predictive feature.
+\item \code{Cover} metric of the number of observation related to this feature;
+\item \code{Frequency} percentage representing the relative number of times
+a feature have been used in trees.
 }
 
 A linear model's importance \code{data.table} has the following columns:
 \itemize{
-  \item \code{Features} names of the features used in the model;
-  \item \code{Weight} the linear coefficient of this feature;
-  \item \code{Class} (only for multiclass models) class label.
+\item \code{Features} names of the features used in the model;
+\item \code{Weight} the linear coefficient of this feature;
+\item \code{Class} (only for multiclass models) class label.
 }
 
 If \code{feature_names} is not provided and \code{model} doesn't have \code{feature_names},
diff --git a/R-package/man/xgb.model.dt.tree.Rd b/R-package/man/xgb.model.dt.tree.Rd
index 5a17f9d90f62..131830bde686 100644
--- a/R-package/man/xgb.model.dt.tree.Rd
+++ b/R-package/man/xgb.model.dt.tree.Rd
@@ -41,18 +41,18 @@ A \code{data.table} with detailed information about model trees' nodes.
 The columns of the \code{data.table} are:
 
 \itemize{
- \item \code{Tree}: integer ID of a tree in a model (zero-based index)
- \item \code{Node}: integer ID of a node in a tree (zero-based index)
- \item \code{ID}: character identifier of a node in a model (only when \code{use_int_id=FALSE})
- \item \code{Feature}: for a branch node, it's a feature id or name (when available);
-             for a leaf note, it simply labels it as \code{'Leaf'}
- \item \code{Split}: location of the split for a branch node (split condition is always "less than")
- \item \code{Yes}: ID of the next node when the split condition is met
- \item \code{No}: ID of the next node when the split condition is not met
- \item \code{Missing}: ID of the next node when branch value is missing
- \item \code{Quality}: either the split gain (change in loss) or the leaf value
- \item \code{Cover}: metric related to the number of observation either seen by a split
-                     or collected by a leaf during training.
+\item \code{Tree}: integer ID of a tree in a model (zero-based index)
+\item \code{Node}: integer ID of a node in a tree (zero-based index)
+\item \code{ID}: character identifier of a node in a model (only when \code{use_int_id=FALSE})
+\item \code{Feature}: for a branch node, it's a feature id or name (when available);
+for a leaf note, it simply labels it as \code{'Leaf'}
+\item \code{Split}: location of the split for a branch node (split condition is always "less than")
+\item \code{Yes}: ID of the next node when the split condition is met
+\item \code{No}: ID of the next node when the split condition is not met
+\item \code{Missing}: ID of the next node when branch value is missing
+\item \code{Quality}: either the split gain (change in loss) or the leaf value
+\item \code{Cover}: metric related to the number of observation either seen by a split
+or collected by a leaf during training.
 }
 
 When \code{use_int_id=FALSE}, columns "Yes", "No", and "Missing" point to model-wide node identifiers
diff --git a/R-package/man/xgb.parameters.Rd b/R-package/man/xgb.parameters.Rd
index ab2695650e39..5305afa51248 100644
--- a/R-package/man/xgb.parameters.Rd
+++ b/R-package/man/xgb.parameters.Rd
@@ -2,14 +2,14 @@
 % Please edit documentation in R/xgb.Booster.R
 \name{xgb.parameters<-}
 \alias{xgb.parameters<-}
-\title{Accessors for model parameters.}
+\title{Accessors for model parameters}
 \usage{
 xgb.parameters(object) <- value
 }
 \arguments{
 \item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.}
 
-\item{value}{a list (or an object coercible to a list) with the names of parameters to set
+\item{value}{A list (or an object coercible to a list) with the names of parameters to set
 and the elements corresponding to parameter values.}
 }
 \description{
@@ -20,11 +20,18 @@ Note that the setter would usually work more efficiently for \code{xgb.Booster.h
 than for \code{xgb.Booster}, since only just a handle would need to be copied.
 }
 \examples{
-data(agaricus.train, package='xgboost')
+data(agaricus.train, package = "xgboost")
 train <- agaricus.train
 
-bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+bst <- xgboost(
+  data = train$data,
+  label = train$label,
+  max_depth = 2,
+  eta = 1,
+  nthread = 2,
+  nrounds = 2,
+  objective = "binary:logistic"
+)
 
 xgb.parameters(bst) <- list(eta = 0.1)
 
diff --git a/R-package/man/xgb.plot.deepness.Rd b/R-package/man/xgb.plot.deepness.Rd
index 9e23ac130ea0..12c5c68e247f 100644
--- a/R-package/man/xgb.plot.deepness.Rd
+++ b/R-package/man/xgb.plot.deepness.Rd
@@ -44,9 +44,9 @@ Visualizes distributions related to depth of tree leafs.
 When \code{which="2x1"}, two distributions with respect to the leaf depth
 are plotted on top of each other:
 \itemize{
- \item the distribution of the number of leafs in a tree model at a certain depth;
- \item the distribution of average weighted number of observations ("cover")
-       ending up in leafs at certain depth.
+\item the distribution of the number of leafs in a tree model at a certain depth;
+\item the distribution of average weighted number of observations ("cover")
+ending up in leafs at certain depth.
 }
 Those could be helpful in determining sensible ranges of the \code{max_depth}
 and \code{min_child_weight} parameters.
diff --git a/R-package/man/xgb.plot.shap.Rd b/R-package/man/xgb.plot.shap.Rd
index 6f2d0dfa6dcc..75f8d2d0f142 100644
--- a/R-package/man/xgb.plot.shap.Rd
+++ b/R-package/man/xgb.plot.shap.Rd
@@ -41,7 +41,7 @@ xgb.plot.shap(
 \item{features}{a vector of either column indices or of feature names to plot. When it is NULL,
 feature importance is calculated, and \code{top_n} high ranked features are taken.}
 
-\item{top_n}{when \code{features} is NULL, top_n [1, 100] most important features in a model are taken.}
+\item{top_n}{when \code{features} is NULL, top_n \verb{[1, 100]} most important features in a model are taken.}
 
 \item{model}{an \code{xgb.Booster} model. It has to be provided when either \code{shap_contrib}
 or \code{features} is missing.}
@@ -94,8 +94,8 @@ more than 5 distinct values.}
 \value{
 In addition to producing plots (when \code{plot=TRUE}), it silently returns a list of two matrices:
 \itemize{
- \item \code{data} the values of selected features;
- \item \code{shap_contrib} the contributions of selected features.
+\item \code{data} the values of selected features;
+\item \code{shap_contrib} the contributions of selected features.
 }
 }
 \description{
diff --git a/R-package/man/xgb.plot.shap.summary.Rd b/R-package/man/xgb.plot.shap.summary.Rd
index 3ff8af21cf30..910119e6ffe6 100644
--- a/R-package/man/xgb.plot.shap.summary.Rd
+++ b/R-package/man/xgb.plot.shap.summary.Rd
@@ -38,7 +38,7 @@ xgb.plot.shap.summary(
 \item{features}{a vector of either column indices or of feature names to plot. When it is NULL,
 feature importance is calculated, and \code{top_n} high ranked features are taken.}
 
-\item{top_n}{when \code{features} is NULL, top_n [1, 100] most important features in a model are taken.}
+\item{top_n}{when \code{features} is NULL, top_n \verb{[1, 100]} most important features in a model are taken.}
 
 \item{model}{an \code{xgb.Booster} model. It has to be provided when either \code{shap_contrib}
 or \code{features} is missing.}
@@ -67,12 +67,12 @@ Each point (observation) is coloured based on its feature value. The plot
 hence allows us to see which features have a negative / positive contribution
 on the model prediction, and whether the contribution is different for larger
 or smaller values of the feature. We effectively try to replicate the
-\code{summary_plot} function from https://github.com/shap/shap.
+\code{summary_plot} function from \url{https://github.com/shap/shap}.
 }
 \examples{
 # See \code{\link{xgb.plot.shap}}.
 }
 \seealso{
 \code{\link{xgb.plot.shap}}, \code{\link{xgb.ggplot.shap.summary}},
-  \url{https://github.com/shap/shap}
+\url{https://github.com/shap/shap}
 }
diff --git a/R-package/man/xgb.plot.tree.Rd b/R-package/man/xgb.plot.tree.Rd
index d419eb76a02f..224e393ce730 100644
--- a/R-package/man/xgb.plot.tree.Rd
+++ b/R-package/man/xgb.plot.tree.Rd
@@ -52,14 +52,14 @@ Read a tree model text dump and plot the model.
 The content of each node is organised that way:
 
 \itemize{
- \item Feature name.
- \item \code{Cover}: The sum of second order gradient of training data classified to the leaf.
-       If it is square loss, this simply corresponds to the number of instances seen by a split
-       or collected by a leaf during training.
-       The deeper in the tree a node is, the lower this metric will be.
- \item \code{Gain} (for split nodes): the information gain metric of a split
-       (corresponds to the importance of the node in the model).
- \item \code{Value} (for leafs): the margin value that the leaf may contribute to prediction.
+\item Feature name.
+\item \code{Cover}: The sum of second order gradient of training data classified to the leaf.
+If it is square loss, this simply corresponds to the number of instances seen by a split
+or collected by a leaf during training.
+The deeper in the tree a node is, the lower this metric will be.
+\item \code{Gain} (for split nodes): the information gain metric of a split
+(corresponds to the importance of the node in the model).
+\item \code{Value} (for leafs): the margin value that the leaf may contribute to prediction.
 }
 The tree root nodes also indicate the Tree index (0-based).
 
diff --git a/R-package/man/xgb.save.raw.Rd b/R-package/man/xgb.save.raw.Rd
index c7c93a734d22..0835519336a0 100644
--- a/R-package/man/xgb.save.raw.Rd
+++ b/R-package/man/xgb.save.raw.Rd
@@ -12,9 +12,9 @@ xgb.save.raw(model, raw_format = "deprecated")
 
 \item{raw_format}{The format for encoding the booster.  Available options are
 \itemize{
-    \item \code{json}: Encode the booster into JSON text document.
-    \item \code{ubj}:  Encode the booster into Universal Binary JSON.
-    \item \code{deprecated}: Encode the booster into old customized binary format.
+\item \code{json}: Encode the booster into JSON text document.
+\item \code{ubj}:  Encode the booster into Universal Binary JSON.
+\item \code{deprecated}: Encode the booster into old customized binary format.
 }
 
 Right now the default is \code{deprecated} but will be changed to \code{ubj} in upcoming release.}
diff --git a/R-package/man/xgb.shap.data.Rd b/R-package/man/xgb.shap.data.Rd
index 2f0e4adeaa87..6c4336cdedc4 100644
--- a/R-package/man/xgb.shap.data.Rd
+++ b/R-package/man/xgb.shap.data.Rd
@@ -27,7 +27,7 @@ xgb.shap.data(
 \item{features}{a vector of either column indices or of feature names to plot. When it is NULL,
 feature importance is calculated, and \code{top_n} high ranked features are taken.}
 
-\item{top_n}{when \code{features} is NULL, top_n [1, 100] most important features in a model are taken.}
+\item{top_n}{when \code{features} is NULL, top_n \verb{[1, 100]} most important features in a model are taken.}
 
 \item{model}{an \code{xgb.Booster} model. It has to be provided when either \code{shap_contrib}
 or \code{features} is missing.}
@@ -45,8 +45,8 @@ it is set so that up to 100K data points are used.}
 }
 \value{
 A list containing: 'data', a matrix containing sample observations
-  and their feature values; 'shap_contrib', a matrix containing the SHAP contribution
-  values for these observations.
+and their feature values; 'shap_contrib', a matrix containing the SHAP contribution
+values for these observations.
 }
 \description{
 Prepare data for SHAP plots. To be used in xgb.plot.shap, xgb.plot.shap.summary, etc.
diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd
index 105009cf8094..0ef2e2216d66 100644
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -43,111 +43,114 @@ xgboost(
 }
 \arguments{
 \item{params}{the list of parameters. The complete list of parameters is
-  available in the \href{http://xgboost.readthedocs.io/en/latest/parameter.html}{online documentation}. Below
-  is a shorter summary:
-
-1. General Parameters
+available in the \href{http://xgboost.readthedocs.io/en/latest/parameter.html}{online documentation}. Below
+is a shorter summary:
+\enumerate{
+\item General Parameters
+}
 
 \itemize{
-  \item \code{booster} which booster to use, can be \code{gbtree} or \code{gblinear}. Default: \code{gbtree}.
+\item \code{booster} which booster to use, can be \code{gbtree} or \code{gblinear}. Default: \code{gbtree}.
+}
+\enumerate{
+\item Booster Parameters
 }
-
-2. Booster Parameters
 
 2.1. Parameters for Tree Booster
 
 \itemize{
-  \item{ \code{eta} control the learning rate: scale the contribution of each tree by a factor of \code{0 < eta < 1}
-         when it is added to the current approximation.
-         Used to prevent overfitting by making the boosting process more conservative.
-         Lower value for \code{eta} implies larger value for \code{nrounds}: low \code{eta} value means model
-         more robust to overfitting but slower to compute. Default: 0.3}
-  \item{ \code{gamma} minimum loss reduction required to make a further partition on a leaf node of the tree.
-         the larger, the more conservative the algorithm will be.}
-  \item \code{max_depth} maximum depth of a tree. Default: 6
-  \item{\code{min_child_weight} minimum sum of instance weight (hessian) needed in a child.
-        If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight,
-        then the building process will give up further partitioning.
-        In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node.
-        The larger, the more conservative the algorithm will be. Default: 1}
-  \item{ \code{subsample} subsample ratio of the training instance.
-        Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees
-        and this will prevent overfitting. It makes computation shorter (because less data to analyse).
-        It is advised to use this parameter with \code{eta} and increase \code{nrounds}. Default: 1}
-  \item \code{colsample_bytree} subsample ratio of columns when constructing each tree. Default: 1
-  \item \code{lambda} L2 regularization term on weights. Default: 1
-  \item \code{alpha} L1 regularization term on weights. (there is no L1 reg on bias because it is not important). Default: 0
-  \item{ \code{num_parallel_tree} Experimental parameter. number of trees to grow per round.
-         Useful to test Random Forest through XGBoost
-         (set \code{colsample_bytree < 1}, \code{subsample  < 1}  and \code{round = 1}) accordingly.
-         Default: 1}
-  \item{ \code{monotone_constraints} A numerical vector consists of \code{1}, \code{0} and \code{-1} with its length
-         equals to the number of features in the training data.
-         \code{1} is increasing, \code{-1} is decreasing and \code{0} is no constraint.}
-  \item{ \code{interaction_constraints} A list of vectors specifying feature indices of permitted interactions.
-       Each item of the list represents one permitted interaction where specified features are allowed to interact with each other.
-       Feature index values should start from \code{0} (\code{0} references the first column).
-       Leave argument unspecified for no interaction constraints.}
+\item{ \code{eta} control the learning rate: scale the contribution of each tree by a factor of \code{0 < eta < 1}
+when it is added to the current approximation.
+Used to prevent overfitting by making the boosting process more conservative.
+Lower value for \code{eta} implies larger value for \code{nrounds}: low \code{eta} value means model
+more robust to overfitting but slower to compute. Default: 0.3}
+\item{ \code{gamma} minimum loss reduction required to make a further partition on a leaf node of the tree.
+the larger, the more conservative the algorithm will be.}
+\item \code{max_depth} maximum depth of a tree. Default: 6
+\item{\code{min_child_weight} minimum sum of instance weight (hessian) needed in a child.
+If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight,
+then the building process will give up further partitioning.
+In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node.
+The larger, the more conservative the algorithm will be. Default: 1}
+\item{ \code{subsample} subsample ratio of the training instance.
+Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees
+and this will prevent overfitting. It makes computation shorter (because less data to analyse).
+It is advised to use this parameter with \code{eta} and increase \code{nrounds}. Default: 1}
+\item \code{colsample_bytree} subsample ratio of columns when constructing each tree. Default: 1
+\item \code{lambda} L2 regularization term on weights. Default: 1
+\item \code{alpha} L1 regularization term on weights. (there is no L1 reg on bias because it is not important). Default: 0
+\item{ \code{num_parallel_tree} Experimental parameter. number of trees to grow per round.
+Useful to test Random Forest through XGBoost
+(set \code{colsample_bytree < 1}, \code{subsample  < 1}  and \code{round = 1}) accordingly.
+Default: 1}
+\item{ \code{monotone_constraints} A numerical vector consists of \code{1}, \code{0} and \code{-1} with its length
+equals to the number of features in the training data.
+\code{1} is increasing, \code{-1} is decreasing and \code{0} is no constraint.}
+\item{ \code{interaction_constraints} A list of vectors specifying feature indices of permitted interactions.
+Each item of the list represents one permitted interaction where specified features are allowed to interact with each other.
+Feature index values should start from \code{0} (\code{0} references the first column).
+Leave argument unspecified for no interaction constraints.}
 }
 
 2.2. Parameters for Linear Booster
 
 \itemize{
-  \item \code{lambda} L2 regularization term on weights. Default: 0
-  \item \code{lambda_bias} L2 regularization term on bias. Default: 0
-  \item \code{alpha} L1 regularization term on weights. (there is no L1 reg on bias because it is not important). Default: 0
+\item \code{lambda} L2 regularization term on weights. Default: 0
+\item \code{lambda_bias} L2 regularization term on bias. Default: 0
+\item \code{alpha} L1 regularization term on weights. (there is no L1 reg on bias because it is not important). Default: 0
+}
+\enumerate{
+\item Task Parameters
 }
-
-3. Task Parameters
 
 \itemize{
 \item{ \code{objective} specify the learning task and the corresponding learning objective, users can pass a self-defined function to it.
-       The default objective options are below:
-  \itemize{
-    \item \code{reg:squarederror} Regression with squared loss (Default).
-    \item{ \code{reg:squaredlogerror}: regression with squared log loss \eqn{1/2 * (log(pred + 1) - log(label + 1))^2}.
-           All inputs are required to be greater than -1.
-           Also, see metric rmsle for possible issue with this objective.}
-    \item \code{reg:logistic} logistic regression.
-    \item \code{reg:pseudohubererror}: regression with Pseudo Huber loss, a twice differentiable alternative to absolute loss.
-    \item \code{binary:logistic} logistic regression for binary classification. Output probability.
-    \item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
-    \item \code{binary:hinge}: hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities.
-    \item{ \code{count:poisson}: Poisson regression for count data, output mean of Poisson distribution.
-           \code{max_delta_step} is set to 0.7 by default in poisson regression (used to safeguard optimization).}
-    \item{ \code{survival:cox}: Cox regression for right censored survival time data (negative values are considered right censored).
-           Note that predictions are returned on the hazard ratio scale (i.e., as HR = exp(marginal_prediction) in the proportional
-           hazard function \code{h(t) = h0(t) * HR)}.}
-    \item{ \code{survival:aft}: Accelerated failure time model for censored survival time data. See
-           \href{https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html}{Survival Analysis with Accelerated Failure Time}
-           for details.}
-    \item \code{aft_loss_distribution}: Probability Density Function used by \code{survival:aft} and \code{aft-nloglik} metric.
-    \item{ \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective.
-           Class is represented by a number and should be from 0 to \code{num_class - 1}.}
-    \item{ \code{multi:softprob} same as softmax, but prediction outputs a vector of ndata * nclass elements, which can be
-           further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging
-           to each class.}
-    \item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss.
-    \item{ \code{rank:ndcg}: Use LambdaMART to perform list-wise ranking where
-           \href{https://en.wikipedia.org/wiki/Discounted_cumulative_gain}{Normalized Discounted Cumulative Gain (NDCG)} is maximized.}
-    \item{ \code{rank:map}: Use LambdaMART to perform list-wise ranking where
-           \href{https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Mean_average_precision}{Mean Average Precision (MAP)}
-           is maximized.}
-    \item{ \code{reg:gamma}: gamma regression with log-link.
-           Output is a mean of gamma distribution.
-           It might be useful, e.g., for modeling insurance claims severity, or for any outcome that might be
-           \href{https://en.wikipedia.org/wiki/Gamma_distribution#Applications}{gamma-distributed}.}
-    \item{ \code{reg:tweedie}: Tweedie regression with log-link.
-           It might be useful, e.g., for modeling total loss in insurance, or for any outcome that might be
-           \href{https://en.wikipedia.org/wiki/Tweedie_distribution#Applications}{Tweedie-distributed}.}
-  }
- }
-  \item \code{base_score} the initial prediction score of all instances, global bias. Default: 0.5
-  \item{ \code{eval_metric} evaluation metrics for validation data.
-         Users can pass a self-defined function to it.
-         Default: metric will be assigned according to objective
-         (rmse for regression, and error for classification, mean average precision for ranking).
-         List is provided in detail section.}
+The default objective options are below:
+\itemize{
+\item \code{reg:squarederror} Regression with squared loss (Default).
+\item{ \code{reg:squaredlogerror}: regression with squared log loss \eqn{1/2 * (log(pred + 1) - log(label + 1))^2}.
+All inputs are required to be greater than -1.
+Also, see metric rmsle for possible issue with this objective.}
+\item \code{reg:logistic} logistic regression.
+\item \code{reg:pseudohubererror}: regression with Pseudo Huber loss, a twice differentiable alternative to absolute loss.
+\item \code{binary:logistic} logistic regression for binary classification. Output probability.
+\item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
+\item \code{binary:hinge}: hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities.
+\item{ \code{count:poisson}: Poisson regression for count data, output mean of Poisson distribution.
+\code{max_delta_step} is set to 0.7 by default in poisson regression (used to safeguard optimization).}
+\item{ \code{survival:cox}: Cox regression for right censored survival time data (negative values are considered right censored).
+Note that predictions are returned on the hazard ratio scale (i.e., as HR = exp(marginal_prediction) in the proportional
+hazard function \code{h(t) = h0(t) * HR)}.}
+\item{ \code{survival:aft}: Accelerated failure time model for censored survival time data. See
+\href{https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html}{Survival Analysis with Accelerated Failure Time}
+for details.}
+\item \code{aft_loss_distribution}: Probability Density Function used by \code{survival:aft} and \code{aft-nloglik} metric.
+\item{ \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective.
+Class is represented by a number and should be from 0 to \code{num_class - 1}.}
+\item{ \code{multi:softprob} same as softmax, but prediction outputs a vector of ndata * nclass elements, which can be
+further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging
+to each class.}
+\item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss.
+\item{ \code{rank:ndcg}: Use LambdaMART to perform list-wise ranking where
+\href{https://en.wikipedia.org/wiki/Discounted_cumulative_gain}{Normalized Discounted Cumulative Gain (NDCG)} is maximized.}
+\item{ \code{rank:map}: Use LambdaMART to perform list-wise ranking where
+\href{https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Mean_average_precision}{Mean Average Precision (MAP)}
+is maximized.}
+\item{ \code{reg:gamma}: gamma regression with log-link.
+Output is a mean of gamma distribution.
+It might be useful, e.g., for modeling insurance claims severity, or for any outcome that might be
+\href{https://en.wikipedia.org/wiki/Gamma_distribution#Applications}{gamma-distributed}.}
+\item{ \code{reg:tweedie}: Tweedie regression with log-link.
+It might be useful, e.g., for modeling total loss in insurance, or for any outcome that might be
+\href{https://en.wikipedia.org/wiki/Tweedie_distribution#Applications}{Tweedie-distributed}.}
+}
+}
+\item \code{base_score} the initial prediction score of all instances, global bias. Default: 0.5
+\item{ \code{eval_metric} evaluation metrics for validation data.
+Users can pass a self-defined function to it.
+Default: metric will be assigned according to objective
+(rmse for regression, and error for classification, mean average precision for ranking).
+List is provided in detail section.}
 }}
 
 \item{data}{training dataset. \code{xgb.train} accepts only an \code{xgb.DMatrix} as the input.
@@ -218,24 +221,24 @@ This parameter is only used when input is a dense matrix.}
 \value{
 An object of class \code{xgb.Booster} with the following elements:
 \itemize{
-  \item \code{handle} a handle (pointer) to the xgboost model in memory.
-  \item \code{raw} a cached memory dump of the xgboost model saved as R's \code{raw} type.
-  \item \code{niter} number of boosting iterations.
-  \item \code{evaluation_log} evaluation history stored as a \code{data.table} with the
-        first column corresponding to iteration number and the rest corresponding to evaluation
-        metrics' values. It is created by the \code{\link{cb.evaluation.log}} callback.
-  \item \code{call} a function call.
-  \item \code{params} parameters that were passed to the xgboost library. Note that it does not
-        capture parameters changed by the \code{\link{cb.reset.parameters}} callback.
-  \item \code{callbacks} callback functions that were either automatically assigned or
-        explicitly passed.
-  \item \code{best_iteration} iteration number with the best evaluation metric value
-        (only available with early stopping).
-  \item \code{best_score} the best evaluation metric value during early stopping.
-        (only available with early stopping).
-  \item \code{feature_names} names of the training dataset features
-        (only when column names were defined in training data).
-  \item \code{nfeatures} number of features in training data.
+\item \code{handle} a handle (pointer) to the xgboost model in memory.
+\item \code{raw} a cached memory dump of the xgboost model saved as R's \code{raw} type.
+\item \code{niter} number of boosting iterations.
+\item \code{evaluation_log} evaluation history stored as a \code{data.table} with the
+first column corresponding to iteration number and the rest corresponding to evaluation
+metrics' values. It is created by the \code{\link{cb.evaluation.log}} callback.
+\item \code{call} a function call.
+\item \code{params} parameters that were passed to the xgboost library. Note that it does not
+capture parameters changed by the \code{\link{cb.reset.parameters}} callback.
+\item \code{callbacks} callback functions that were either automatically assigned or
+explicitly passed.
+\item \code{best_iteration} iteration number with the best evaluation metric value
+(only available with early stopping).
+\item \code{best_score} the best evaluation metric value during early stopping.
+(only available with early stopping).
+\item \code{feature_names} names of the training dataset features
+(only when column names were defined in training data).
+\item \code{nfeatures} number of features in training data.
 }
 }
 \description{
@@ -258,29 +261,29 @@ when the \code{eval_metric} parameter is not provided.
 User may set one or several \code{eval_metric} parameters.
 Note that when using a customized metric, only this single metric can be used.
 The following is the list of built-in metrics for which XGBoost provides optimized implementation:
-  \itemize{
-     \item \code{rmse} root mean square error. \url{https://en.wikipedia.org/wiki/Root_mean_square_error}
-     \item \code{logloss} negative log-likelihood. \url{https://en.wikipedia.org/wiki/Log-likelihood}
-     \item \code{mlogloss} multiclass logloss. \url{https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html}
-     \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
-           By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
-           Different threshold (e.g., 0.) could be specified as "error@0."
-     \item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
-     \item \code{mae} Mean absolute error
-     \item \code{mape} Mean absolute percentage error
-     \item{ \code{auc} Area under the curve.
-            \url{https://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.}
-     \item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation.
-     \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{https://en.wikipedia.org/wiki/NDCG}
-  }
+\itemize{
+\item \code{rmse} root mean square error. \url{https://en.wikipedia.org/wiki/Root_mean_square_error}
+\item \code{logloss} negative log-likelihood. \url{https://en.wikipedia.org/wiki/Log-likelihood}
+\item \code{mlogloss} multiclass logloss. \url{https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html}
+\item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
+By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
+Different threshold (e.g., 0.) could be specified as "error@0."
+\item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
+\item \code{mae} Mean absolute error
+\item \code{mape} Mean absolute percentage error
+\item{ \code{auc} Area under the curve.
+\url{https://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.}
+\item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation.
+\item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{https://en.wikipedia.org/wiki/NDCG}
+}
 
 The following callbacks are automatically created when certain parameters are set:
 \itemize{
-  \item \code{cb.print.evaluation} is turned on when \code{verbose > 0};
-        and the \code{print_every_n} parameter is passed to it.
-  \item \code{cb.evaluation.log} is on when \code{watchlist} is present.
-  \item \code{cb.early.stop}: when \code{early_stopping_rounds} is set.
-  \item \code{cb.save.model}: when \code{save_period > 0} is set.
+\item \code{cb.print.evaluation} is turned on when \code{verbose > 0};
+and the \code{print_every_n} parameter is passed to it.
+\item \code{cb.evaluation.log} is on when \code{watchlist} is present.
+\item \code{cb.early.stop}: when \code{early_stopping_rounds} is set.
+\item \code{cb.save.model}: when \code{save_period > 0} is set.
 }
 }
 \examples{
diff --git a/R-package/man/xgb.unserialize.Rd b/R-package/man/xgb.unserialize.Rd
index d191d77d4ac3..f83ee635dfb5 100644
--- a/R-package/man/xgb.unserialize.Rd
+++ b/R-package/man/xgb.unserialize.Rd
@@ -11,7 +11,7 @@ xgb.unserialize(buffer, handle = NULL)
 
 \item{handle}{An \code{xgb.Booster.handle} object which will be overwritten with
 the new deserialized object. Must be a null handle (e.g. when loading the model through
-`readRDS`). If not provided, a new handle will be created.}
+\code{readRDS}). If not provided, a new handle will be created.}
 }
 \value{
 An \code{xgb.Booster.handle} object.