Update 0.4.0.9004

- Added optimal lambdas to the final output if lambdas are a range of values or are NULL and the optimal lambda must be selected using `cv.glmnet`. - To the console output specifying the optimal lambda, the specific rule used is mentioned. - Fixed error if lambda is only a single value. - Better alignment in command-line output if their are NaN values.
donishadsmith · Dec 30, 2024 · 4cfc28b · 4cfc28b
1 parent 9cf1007
commit 4cfc28b
Show file tree

Hide file tree

Showing 12 changed files with 147 additions and 66 deletions.
diff --git a/.github/workflows/testing.yaml b/.github/workflows/testing.yaml
@@ -51,7 +51,7 @@ jobs:
         if: ${{ matrix.os == 'windows-latest' }}
         run: |
           R CMD build --no-build-vignettes .
-          R CMD check --no-manual --as-cran vswift_0.4.0.9003.tar.gz
+          R CMD check --no-manual --as-cran vswift_0.4.0.9004.tar.gz
         shell: cmd
         working-directory: pkg/vswift
 

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,7 +14,18 @@ All notable future changes to vswift will be documented in this file.
 noted in the changelog (i.e new functions or parameters, changes in parameter defaults or function names, etc).
 - *.patch* : Contains no new features, simply fixes any identified bugs.
 
-## [0.4.0.9002] - 2024-12-28
+
+## [0.4.0.9004] - 2024-12-30
+### 🚀 New/Added
+- Added optimal lambdas to the final output if lambdas are a range of values or are NULL and the optimal lambda must
+be selected using `cv.glmnet`.
+### ♻ Changed
+- To the console output specifying the optimal lambda, the specific rule used is mentioned.
+### 🐛 Fixes
+- Fixed error if lambda is only a single value.
+- Better alignment in command-line output if their are NaN values.
+
+## [0.4.0.9003] - 2024-12-28
 ### 📖 Documentation
 - Name changes in documentation
 ### ♻ Changed

diff --git a/README.md b/README.md
@@ -70,7 +70,7 @@ help(package = "vswift")
 install.packages("remotes")
 
 # Install 'vswift' package
-remotes::install_url("/~https://github.com/donishadsmith/vswift/releases/download/0.4.0.9003/vswift_0.4.0.9003.tar.gz")
+remotes::install_url("/~https://github.com/donishadsmith/vswift/releases/download/0.4.0.9004/vswift_0.4.0.9004.tar.gz")
 
 # Display documentation for the 'vswift' package
 help(package = "vswift")
@@ -175,12 +175,22 @@ results <- classCV(
 
 **Output Message**
 ```
-Model: regularized_logistic | Partition: Train-Test Split | Optimal lambda: 0.06556 (nested 3-fold cross-validation) 
-Model: regularized_logistic | Partition: Fold 1 | Optimal lambda: 0.01357 (nested 3-fold cross-validation) 
-Model: regularized_logistic | Partition: Fold 2 | Optimal lambda: 0.04880 (nested 3-fold cross-validation) 
-Model: regularized_logistic | Partition: Fold 3 | Optimal lambda: 0.01226 (nested 3-fold cross-validation) 
-Model: regularized_logistic | Partition: Fold 4 | Optimal lambda: 0.06464 (nested 3-fold cross-validation) 
-Model: regularized_logistic | Partition: Fold 5 | Optimal lambda: 0.00847 (nested 3-fold cross-validation) 
+Model: regularized_logistic | Partition: Train-Test Split | Optimal lambda: 0.06556 (nested 3-fold cross-validation using '1se' rule) 
+Model: regularized_logistic | Partition: Fold 1 | Optimal lambda: 0.01357 (nested 3-fold cross-validation using '1se' rule) 
+Model: regularized_logistic | Partition: Fold 2 | Optimal lambda: 0.04880 (nested 3-fold cross-validation using '1se' rule) 
+Model: regularized_logistic | Partition: Fold 3 | Optimal lambda: 0.01226 (nested 3-fold cross-validation using '1se' rule) 
+Model: regularized_logistic | Partition: Fold 4 | Optimal lambda: 0.06464 (nested 3-fold cross-validation using '1se' rule) 
+Model: regularized_logistic | Partition: Fold 5 | Optimal lambda: 0.00847 (nested 3-fold cross-validation using '1se' rule)
+```
+
+Print optimal lambda values.
+```R
+print(results$metrics$regularized_logistic$optimal_lambdas)
+```
+**Output Message**
+```
+      split       fold1       fold2       fold3       fold4       fold5
+0.065562223 0.013572555 0.048797016 0.012261040 0.064639632 0.008467439
 ```
 
 `classCV` produces a vswift object which can be used for custom printing and plotting of performance metrics by using

diff --git a/pkg/vswift/DESCRIPTION b/pkg/vswift/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: vswift
 Title: Classification Model Evaluation
-Version: 0.4.0.9003
-Date: 2024-12-28
+Version: 0.4.0.9004
+Date: 2024-12-30
 Authors@R: person(given = "Donisha",
              family = "Smith",
              role = c("aut", "cre"),

diff --git a/pkg/vswift/NAMESPACE b/pkg/vswift/NAMESPACE
@@ -20,6 +20,7 @@ importFrom(kknn,contr.dummy)
 importFrom(stats,as.formula)
 importFrom(stats,complete.cases)
 importFrom(stats,glm)
+importFrom(stats,model.matrix)
 importFrom(stats,predict)
 importFrom(stats,sd)
 importFrom(utils,capture.output)
diff --git a/pkg/vswift/R/append_keys.R b/pkg/vswift/R/append_keys.R
@@ -70,7 +70,10 @@
       new_struct <- c(new_struct[!names(new_struct) == "logistic_threshold"], list(logistic_threshold = NULL))
     }
 
-    if (!any(c("regularized_logistic", "regularized_multinomial") %in% models)) {
+    has_lambda <- length(new_struct$map_args$regularized_logistic$lambda == 1)
+    has_lambda <- length(new_struct$map_args$regularized_multinomial$lambda == 1)
+
+    if (!any(c("regularized_logistic", "regularized_multinomial") %in% models) || has_lambda) {
       new_struct <- c(new_struct[!names(new_struct) %in% c("rule", "verbose")], list(rule = NULL, verbose = NULL))
     }
   }

diff --git a/pkg/vswift/R/classCV.R b/pkg/vswift/R/classCV.R
@@ -62,9 +62,9 @@
 #'  \code{models} with one of these objective functions: \code{"reg:logistic"}, \code{"binary:logistic"}, or
 #'  \code{"binary:logitraw"}. Default is \code{0.5}.
 #'  \item \code{"rule"}: A character that dictates the rule used to select the optimal lambda  when using
-#'  \code{"regularized_logistic"} or \code{"regularized_multinomial"}. Available options are: \code{"min"} or
+#'  \code{regularized_logistic} or \code{"regularized_multinomial"}. Available options are: \code{"min"} or
 #'  \code{"1se"}. Default is \code{"min"}.
-#'  \item \code{"verbose"}: A logical value indicating whether to state the optimal lambda based on the nested
+#'  \item \code{verbose}: A logical value indicating whether to state the optimal lambda based on the nested
 #'  cross-validation. \item \code{"final_model"}: A logical value indicating whether to use all complete observations
 #'  in the input data for model training. Default is \code{FALSE}.
 #'  }
@@ -226,13 +226,13 @@
 #'
 #'
 #' # Perform 5-fold cross-validation a train-test split w/multiple models
-#' args <- list("knn" = list(ks = 5), "nnet" = list(size = 20))
+#' map_args <- list("knn" = list(ks = 5), "nnet" = list(size = 20))
 #' result <- classCV(
 #'   data = iris,
 #'   target = 5,
 #'   predictors = c(1:3),
 #'   models = c("decisiontree", "knn", "nnet", "svm"),
-#'   model_params = list(map_args = args),
+#'   model_params = list(map_args = map_args),
 #'   train_params = list(
 #'     n_folds = 5,
 #'     stratified = TRUE,
@@ -245,7 +245,7 @@
 #'
 #' @author Donisha Smith
 #'
-#' @importFrom stats as.formula complete.cases glm predict sd
+#' @importFrom stats as.formula complete.cases glm model.matrix predict sd
 #' @importFrom data.table := data.table .SD
 #'
 #' @export
@@ -373,7 +373,6 @@ classCV <- function(data,
         train_out <- .parallel(kwargs, parallel_configs, iters[!iters == "final"])
       }
 
-
       # Add metrics information and model information
       if ("split" %in% iters) {
         final_output$metrics[[model]]$split <- train_out$metrics$split
@@ -391,6 +390,8 @@ classCV <- function(data,
       }
 
       if ("models" %in% names(train_out)) final_output$models[[model]] <- train_out$models
+
+      if ("optimal_lambdas" %in% names(train_out)) final_output$metrics[[model]]$optimal_lambdas <- train_out$optimal_lambdas
     }
 
     # Generate final model
@@ -408,7 +409,7 @@ classCV <- function(data,
 
       # Generate model depending on chosen models
       if (startsWith(model, "regularized")) {
-        final_output$models[[model]]$final <- .regularized(
+        final_out <- .regularized(
           id = "Final Model",
           model = model,
           vars = vars,
@@ -419,6 +420,14 @@ classCV <- function(data,
           rule = if (is.null(model_params$rule)) "min" else model_params$rule,
           verbose = if (is.null(model_params$verbose)) TRUE else model_params$verbose
         )
+
+        if ("optimal_lambda" %in% names(final_out)) {
+          vec <- c("final" = final_out$optimal_lambda)
+          final_output$metrics[[model]]$optimal_lambdas <- c(final_output$metrics[[model]]$optimal_lambdas, vec)
+          final_out$optimal_lambda <- NULL
+        }
+
+        final_output$models[[model]]$final <- final_out
       } else {
         final_output$models[[model]]$final <- .generate_model(
           model = model,

diff --git a/pkg/vswift/R/print_internals.R b/pkg/vswift/R/print_internals.R
@@ -26,8 +26,8 @@
     info <- info[!names(info) == "logistic_threshold"]
   }
 
-  if (!startsWith(model, "regularized")) {
-    info <- info[!names(info) == "rule"]
+  if (!startsWith(model, "regularized") || (startsWith(model, "regularized") && is.null(info$rule))) {
+    info <- info[!names(info) %in% c("rule", "verbose")]
   }
 
   info$map_args <- info$map_args[!names(info$map_args) != model]
@@ -93,13 +93,19 @@
       # Add spacing
       padding <- nchar(paste("Class:", "", "Pre"))
 
-      if (class_met[1] == "NaN") {
-        class_met <- c(class_met[1], rep("", 5), class_met[2], rep("", 5), class_met[3])
-      } else {
-        class_met <- c(class_met[1], rep("", 4), class_met[2], rep("", 5), class_met[3])
+      # Pad output with strings
+      formatted_class_met <- c()
+
+      for (i in seq_along(class_met)) {
+        formatted_class_met <- c(formatted_class_met, class_met[i])
+        if (i != length(class_met)) {
+          if (i == 1) space <- if (class_met[i] != "NaN") rep("", 4) else rep("", 5)
+          if (i == 2) space <- if (class_met[i] != "NaN") rep("", 5) else rep("", 6)
+          formatted_class_met <- c(formatted_class_met, space)
+        }
       }
 
-      cat(class, rep("", (padding + str_diff[class_pos])), paste(class_met, collapse = " "), "\n")
+      cat(class, rep("", (padding + str_diff[class_pos])), paste(formatted_class_met, collapse = " "), "\n")
       class_pos <- class_pos + 1
     }
   }
@@ -146,19 +152,21 @@
     class_met <- c()
 
     for (metric in mean_met) {
-      class_met <- c(class_met, sprintf("%s \U00B1 %s (SD)", metric, sd_met[sd_met_pos]))
+      class_met <- c(class_met, sprintf("%s \u00B1 %s (SD)", metric, sd_met[sd_met_pos]))
       sd_met_pos <- sd_met_pos + 1
     }
 
-    if (class_met[1] == "NaN (NA)") {
-      class_met <- c(rep("", 3), class_met[1], rep("", 6), class_met[2], rep("", 6), class_met[3])
-    } else {
-      class_met <- c(class_met[1], rep("", 6), class_met[2], rep("", 6), class_met[3])
+    # Pad output with strings
+    formatted_class_met <- c()
+    for (i in seq_along(class_met)) {
+      formatted_class_met <- c(formatted_class_met, class_met[i])
+      space <- if (class_met[i] == "NaN \u00B1 NA (SD)") rep("", 9) else rep("", 6)
+      if (i != length(class_met)) formatted_class_met <- c(formatted_class_met, space)
     }
 
     # Add spacing
     padding <- nchar(paste("Class:", "", "Ave"))
-    cat(class, rep("", (padding + str_diff[class_pos])), paste(class_met), "\n")
+    cat(class, rep("", (padding + str_diff[class_pos])), paste(formatted_class_met), "\n")
     # Update variable
     class_pos <- class_pos + 1
   }

diff --git a/pkg/vswift/R/utils.R b/pkg/vswift/R/utils.R
@@ -52,15 +52,21 @@
 }
 
 # Unnest parallel list
-.unnest <- function(par_list, iters, saved_mods = NULL) {
+.unnest <- function(par_list, iters, model, saved_mods) {
   targets <- c("metrics")
   metrics <- list()
+  lambdas <- c()
 
   if (saved_mods == TRUE) {
     targets <- c("metrics", "models")
     models <- list()
   }
 
+  # Append the optimal lambdas; use c() to retain names
+  if (startsWith(model, "regularized")) {
+    for (i in seq_along(iters)) lambdas <- c(lambdas, par_list[[i]]$optimal_lambda)
+  }
+
   for (target in targets) {
     for (i in seq_along(iters)) {
       if (target == "metrics") {
@@ -79,11 +85,13 @@
     }
   }
 
-  if (saved_mods == TRUE) {
-    return(list("metrics" = metrics, "models" = models))
-  } else {
-    return(list("metrics" = metrics))
-  }
+  out <- list("metrics" = metrics)
+
+  if (isTRUE(saved_mods)) out$models <- models
+
+  if (length(lambdas) > 0) out$optimal_lambdas <- lambdas
+
+  return(out)
 }
 
 
@@ -101,7 +109,7 @@
     }
 
     # Standardize
-    if (is_standardized == FALSE && kwargs$train_params$standardize == TRUE) {
+    if (is_standardized == FALSE && isTRUE(kwargs$train_params$standardize)) {
       df_list <- .standardize_train(train, test, kwargs$train_params$standardize, target = kwargs$vars$target)
       train <- df_list$train
       test <- df_list$test
@@ -120,7 +128,7 @@
     }
 
     # Standardize
-    if (is_standardized == FALSE && preproc_kwargs$standardize == TRUE) {
+    if (is_standardized == FALSE && isTRUE(preproc_kwargs$standardize)) {
       df_list <- .standardize(preprocessed_data, standardize = TRUE, preproc_kwargs$vars$target)
     }
 

diff --git a/pkg/vswift/R/validation.R b/pkg/vswift/R/validation.R
@@ -20,6 +20,12 @@
       stratified = if (is.null(stratified)) FALSE else stratified,
       rule = if (is.null(model_params$rule)) "min" else model_params$rule
     )
+
+    # Get optimal lambda
+    if ("optimal_lambda" %in% names(train_mod)) {
+      optimal_lambda <- train_mod$optimal_lambda
+      train_mod$optimal_lambda <- NULL
+    }
   } else {
     train_mod <- .generate_model(
       model = model, data = train, formula = formula, vars = vars,
@@ -50,13 +56,16 @@
     }
   }
 
-  met_df <- .populate_metrics_df(id, classes, vec, met_df)
+  metrics <- .populate_metrics_df(id, classes, vec, met_df)
 
-  if (save) {
-    return(list("met_df" = met_df, "train_mod" = train_mod))
-  } else {
-    return(list("met_df" = met_df))
-  }
+  # Output
+  out <- list("metrics" = metrics)
+
+  if (save) out$train_mod <- train_mod
+
+  if (exists("optimal_lambda")) out$optimal_lambda <- optimal_lambda
+
+  return(out)
 }
 
 # Helper function for classCV to create model
@@ -126,6 +135,7 @@
 
   mod_args <- list()
   cv_args <- list()
+  cv_flag <- FALSE
 
   base_kwargs <- list()
   # Create x and y matrices
@@ -152,7 +162,7 @@
   }
 
   # If lambda is NULL or greater then one, use CV to identify optimal lambda
-  if (length(mod_args) == 0 || length(mod_args) > 1) {
+  if (length(mod_args$lambda) == 0 || length(mod_args$lambda) > 1) {
     # Attempt to retain a similar stratification that is in the training sample if train_params$stratified is TRUE
     if (stratified) {
       class_info <- .get_class_info(data[, vars$target])
@@ -171,15 +181,17 @@
     # Select optimal lambda based on rule
     mod_args$lambda <- ifelse(rule == "min", cv.fit$lambda.min, cv.fit$lambda.1se)
 
+    cv_flag <- TRUE
+
     # State optimal lambda
     if (verbose) {
       if (id != "Final Model") {
         id <- ifelse(id == "split", "Train-Test Split", paste("Fold", unlist(strsplit(id, split = "fold"))[2]))
       }
       num <- ifelse(!is.null(mod_args$nfolds), mod_args$nfolds, 10)
       msg <- sprintf(
-        "Model: %s | Partition: %s | Optimal lambda: %.5f (nested %s-fold cross-validation)",
-        model, id, mod_args$lambda, num
+        "Model: %s | Partition: %s | Optimal lambda: %.5f (nested %s-fold cross-validation using '%s' rule)",
+        model, id, mod_args$lambda, num, rule
       )
       cat(msg, "\n")
     }
@@ -190,7 +202,13 @@
   # Get model
   model <- do.call(glmnet::glmnet, mod_args)
 
-  return(list("model" = model, "cv.fit" = cv.fit))
+  out <- list("model" = model)
+
+  if (cv_flag) {
+    out <- c(out, list("cv.fit" = cv.fit, "optimal_lambda" = mod_args$lambda))
+  }
+
+  return(out)
 }
 
 # Helper function for classCV to predict