From b60e94b2545b8b817919783d7099b58840512559 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 9 Mar 2020 21:25:42 -0500 Subject: [PATCH] added cbind() to undesirable_function_linter --- .ci/lint_r_code.R | 47 ++++++++++---------- R-package/demo/multiclass_custom_objective.R | 26 +++++++++-- 2 files changed, 46 insertions(+), 27 deletions(-) diff --git a/.ci/lint_r_code.R b/.ci/lint_r_code.R index 939e6eb91a41..938230e25183 100755 --- a/.ci/lint_r_code.R +++ b/.ci/lint_r_code.R @@ -29,31 +29,32 @@ interactive_text <- paste0( ) LINTERS_TO_USE <- list( - "absolute_path" = lintr::absolute_path_linter - , "assignment" = lintr::assignment_linter - , "closed_curly" = lintr::closed_curly_linter - , "commas" = lintr::commas_linter - , "equals_na" = lintr::equals_na_linter - , "function_left" = lintr::function_left_parentheses_linter - , "implicit_integers" = lintr::implicit_integer_linter - , "infix_spaces" = lintr::infix_spaces_linter - , "long_lines" = lintr::line_length_linter(length = 120L) - , "tabs" = lintr::no_tab_linter - , "non_portable_path" = lintr::nonportable_path_linter - , "open_curly" = lintr::open_curly_linter - , "paren_brace_linter" = lintr::paren_brace_linter - , "semicolon" = lintr::semicolon_terminator_linter - , "seq" = lintr::seq_linter - , "single_quotes" = lintr::single_quotes_linter - , "spaces_inside" = lintr::spaces_inside_linter - , "spaces_left_parens" = lintr::spaces_left_parentheses_linter - , "todo_comments" = lintr::todo_comment_linter(c("todo", "fixme", "to-do")) - , "trailing_blank" = lintr::trailing_blank_lines_linter - , "trailing_white" = lintr::trailing_whitespace_linter - , "true_false" = lintr::T_and_F_symbol_linter + "absolute_path" = lintr::absolute_path_linter + , "assignment" = lintr::assignment_linter + , "closed_curly" = lintr::closed_curly_linter + , "commas" = lintr::commas_linter + , "equals_na" = lintr::equals_na_linter + , "function_left" = lintr::function_left_parentheses_linter + , "implicit_integers" = lintr::implicit_integer_linter + , "infix_spaces" = lintr::infix_spaces_linter + , "long_lines" = lintr::line_length_linter(length = 120L) + , "no_tabs" = lintr::no_tab_linter + , "non_portable_path" = lintr::nonportable_path_linter + , "open_curly" = lintr::open_curly_linter + , "paren_brace_linter" = lintr::paren_brace_linter + , "semicolon" = lintr::semicolon_terminator_linter + , "seq" = lintr::seq_linter + , "single_quotes" = lintr::single_quotes_linter + , "spaces_inside" = lintr::spaces_inside_linter + , "spaces_left_parens" = lintr::spaces_left_parentheses_linter + , "todo_comments" = lintr::todo_comment_linter(c("todo", "fixme", "to-do")) + , "trailing_blank" = lintr::trailing_blank_lines_linter + , "trailing_white" = lintr::trailing_whitespace_linter + , "true_false" = lintr::T_and_F_symbol_linter , "undesirable_function" = lintr::undesirable_function_linter( fun = c( - "dyn.load" = "Directly loading/unloading .dll/.so files in package code should not be necessary." + "cbind" = "cbind() is an unsafe way to build up a data frame. merge() or direct column assignment is preferred" + , "dyn.load" = "Directly loading/unloading .dll/.so files in package code should not be necessary." , "dyn.unload" = "Directly loading/unloading .dll/.so files in package code should not be necessary." , "help" = interactive_text , "ifelse" = "The use of ifelse() is dangerous because it will silently allow mixing types." diff --git a/R-package/demo/multiclass_custom_objective.R b/R-package/demo/multiclass_custom_objective.R index f5ba9276c3b5..ec2ed90cdf64 100644 --- a/R-package/demo/multiclass_custom_objective.R +++ b/R-package/demo/multiclass_custom_objective.R @@ -43,16 +43,25 @@ probs_builtin <- exp(preds_builtin) / rowSums(exp(preds_builtin)) custom_multiclass_obj <- function(preds, dtrain) { labels <- getinfo(dtrain, "label") - # preds is a matrix with rows corresponding to samples and colums corresponding to choices + # preds is a matrix with rows corresponding to samples and columns corresponding to choices preds <- matrix(preds, nrow = length(labels)) # to prevent overflow, normalize preds by row - preds <- preds - apply(preds, 1L, max) + preds <- preds - apply(preds, MARGIN = 1L, max) prob <- exp(preds) / rowSums(exp(preds)) # compute gradient grad <- prob - grad[cbind(seq_len(length(labels)), labels + 1L)] <- grad[cbind(seq_len(length(labels)), labels + 1L)] - 1L + subset_index <- as.matrix( + data.frame( + seq_len(length(labels)) + , labels + 1L + , fix.empty.names = FALSE + ) + , nrow = length(labels) + , dimnames = NULL + ) + grad[subset_index] <- grad[subset_index] - 1L # compute hessian (approximation) hess <- 2.0 * prob * (1.0 - prob) @@ -67,9 +76,18 @@ custom_multiclass_metric <- function(preds, dtrain) { preds <- preds - apply(preds, 1L, max) prob <- exp(preds) / rowSums(exp(preds)) + subset_index <- as.matrix( + data.frame( + seq_len(length(labels)) + , labels + 1L + , fix.empty.names = FALSE + ) + , nrow = length(labels) + , dimnames = NULL + ) return(list( name = "error" - , value = -mean(log(prob[cbind(seq_len(length(labels)), labels + 1L)])) + , value = -mean(log(prob[subset_index])) , higher_better = FALSE )) }