microsoft · jameslamb · May 17, 2022 · May 13, 2022
@@ -6,13 +6,13 @@
 library(lightgbm)
 
 # We will train a model with the following scenarii:
-# - Run 1: sum of weights equal to 0.06513 without adjusted regularization (not learning)
-# - Run 2: sum of weights equal to 0.06513 with adjusted regularization (learning)
-# - Run 3: sum of weights equal to 6513 (x 1e5) with adjusted regularization (learning)
+# - Run 1: sum of weights equal to 6513 (x 1e-5) without adjusted regularization (not learning)
+# - Run 2: sum of weights equal to 6513 (x 1e-5) adjusted regularization (learning)
+# - Run 3: sum of weights equal to 6513 with adjusted regularization (learning)
 
 # Setup small weights
-weights1 <- rep(1.0 / 100000.0, 6513L)
-weights2 <- rep(1.0 / 100000.0, 1611L)
+weights1 <- rep(1e-5, 6513L)
+weights2 <- rep(1e-5, 1611L)
 
 # Load data and create datasets
 data(agaricus.train, package = "lightgbm")
@@ -23,7 +23,7 @@ test <- agaricus.test
 dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label, weight = weights2)
 valids <- list(test = dtest)
 
-# Run 1: sum of weights equal to 0.06513 without adjusted regularization (not learning)
+# Run 1: sum of weights equal to 6513 (x 1e-5) without adjusted regularization (not learning)
 # It cannot learn because regularization is too large!
 # min_sum_hessian alone is bigger than the sum of weights, thus you will never learn anything
 params <- list(
@@ -47,7 +47,7 @@ model <- lgb.train(
 weight_loss <- as.numeric(model$record_evals$test$l2$eval)
 plot(weight_loss) # Shows how poor the learning was: a straight line!
 
-# Run 2: sum of weights equal to 0.06513 with adjusted regularization (learning)
+# Run 2: sum of weights equal to 6513 (x 1e-5) with adjusted regularization (learning)
 # Adjusted regularization just consisting in multiplicating results by 1e4 (x10000)
 # Notice how it learns, there is no issue as we adjusted regularization ourselves
 params <- list(
@@ -71,15 +71,8 @@ model <- lgb.train(
 small_weight_loss <- as.numeric(model$record_evals$test$l2$eval)
 plot(small_weight_loss) # It learns!
 
-# Run 3: sum of weights equal to 6513 (x 1e5) with adjusted regularization (learning)
-
-# And now, we are doing as usual
-library(lightgbm)
-data(agaricus.train, package = "lightgbm")
-train <- agaricus.train
+# Run 3: sum of weights equal to 6513 with adjusted regularization (learning)
 dtrain <- lgb.Dataset(train$data, label = train$label)
-data(agaricus.test, package = "lightgbm")
-test <- agaricus.test
 dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
 valids <- list(test = dtest)