fix: merging issues

Merge branch 'main' of /~https://github.com/laresbernardo/lares # Conflicts: # DESCRIPTION
laresbernardo · Oct 22, 2024 · 4e51ca8 · 4e51ca8
2 parents b7b9848 + de4b76c
commit 4e51ca8
Show file tree

Hide file tree

Showing 13 changed files with 100 additions and 64 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,16 +1,17 @@
 Package: lares
 Type: Package
 Title: Analytics & Machine Learning Sidekick
-Version: 5.2.8.9003
+Version: 5.2.8.9004
 Authors@R: c(
     person("Bernardo", "Lares", , "laresbernardo@gmail.com", c("aut", "cre")))
 Maintainer: Bernardo Lares <laresbernardo@gmail.com>
 Description: Auxiliary package for better/faster analytics, visualization, data mining, and machine
     learning tasks. With a wide variety of family functions, like Machine Learning, Data Wrangling,
-    Exploratory, API, and Scrapper, it helps the analyst or data scientist to get quick and robust
-    results, without the need of repetitive coding or extensive R programming skills.
+    Marketing Mix Modeling (Robyn), Exploratory, API, and Scrapper, it helps the analyst or 
+    data scientist to get quick and robust results, without the need of repetitive coding or 
+    advanced R programming skills.
 Depends:
-    R (>= 3.5)
+    R (>= 3.5.0)
 Imports:
     dplyr (>= 1.0.0),
     ggplot2,
@@ -38,7 +39,7 @@ Suggests:
     rmarkdown
 URL: /~https://github.com/laresbernardo/lares, https://laresbernardo.github.io/lares/
 BugReports: /~https://github.com/laresbernardo/lares/issues
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 License: AGPL-3
 Encoding: UTF-8
 LazyData: true

diff --git a/R/audio.R b/R/audio.R
@@ -129,7 +129,7 @@ get_mp3 <- function(id,
   if (open) {
     if (file.exists(mp3_file)) {
       message("Opening file: ", mp3_file)
-      browseURL(mp3_file) 
+      browseURL(mp3_file)
     } else {
       warning("Can't open file; possibly due to strange characters in title: ", mp3_file)
     }
@@ -139,7 +139,7 @@ get_mp3 <- function(id,
     if (file.exists(mp3_file)) {
       message("Deleting file: ", mp3_file)
       if (open) Sys.sleep(5)
-      file.remove(mp3_file) 
+      file.remove(mp3_file)
     } else {
       warning("Can't delete file; possibly due to strange characters in title: ", mp3_file)
     }

diff --git a/R/chatgpt.R b/R/chatgpt.R
@@ -129,8 +129,8 @@ gpt_ask <- function(ask,
   }
 
   # Save historical answers
-  cache <- bind_rows(
-    data.frame(ts = ts, reply = ret),
+  cache <- rbind(
+    data.frame(ts = ts, reply = toJSON(ret)),
     cache_read("GPT_HIST_REPLY", quiet = TRUE, ...)
   ) %>%
     as_tibble()

diff --git a/R/facebook.R b/R/facebook.R
@@ -1,7 +1,7 @@
 # Auxiliary constant values
 # META_GRAPH_URL <- "https://graph.intern.facebook.com"
 META_GRAPH_URL <- "https://graph.facebook.com"
-META_API_VER <- "v17.0"
+META_API_VER <- "v20.0"
 
 ####################################################################
 #' Paginate and Process Facebook's API Results

diff --git a/R/lasso.R b/R/lasso.R
@@ -41,6 +41,7 @@ lasso_vars <- function(df, variable,
                        quiet = FALSE,
                        seed = 123, ...) {
   tic("lasso_vars")
+  try_require("h2o")
   quiet(h2o.init(nthreads = -1, port = 54321))
   h2o.no_progress()
   on.exit(set.seed(seed))

diff --git a/R/model_functions.R b/R/model_functions.R
@@ -726,6 +726,7 @@ export_results <- function(results,
                            save = TRUE,
                            seed = 0) {
   if (save) {
+    try_require("h2o")
     quiet(h2o.init(nthreads = -1, port = 54321))
 
     pass <- !is.null(attr(results, "type"))

diff --git a/R/model_predict.R b/R/model_predict.R
@@ -17,6 +17,7 @@
 #' @export
 #' @rdname h2o_predict
 h2o_predict_MOJO <- function(df, model_path, method = "mojo", batch = 300) {
+  try_require("h2o")
   quiet(h2o.init(nthreads = -1, port = 54321))
 
   files <- list.files(model_path)
@@ -82,6 +83,7 @@ h2o_predict_MOJO <- function(df, model_path, method = "mojo", batch = 300) {
 #' @rdname h2o_predict
 h2o_predict_binary <- function(df, model_path, sample = NA) {
   message("Use of h2o_predict_MOJO instead highly recommended!")
+  try_require("h2o")
   quiet(h2o.init(nthreads = -1, port = 54321))
 
   if (!right(model_path, 4) == ".zip") {

diff --git a/R/robyn.R b/R/robyn.R
@@ -441,9 +441,9 @@ plot.robyn_modelselector <- function(x, ...) {
 #' @param marginals Boolean. Include mROAS or mCPA marginal performance metric
 #' as an additional column called "marginal". Calculations are based on
 #' mean spend and mean response with mean carryover results,
-#' between \code{start_date} and code{end_date}.
+#' between \code{start_date} and \code{end_date}.
 #' @param carryovers Boolean. Add mean percentage of carryover response for
-#' date range between \code{start_date} and code{end_date} on paid channels.
+#' date range between \code{start_date} and \code{end_date} on paid channels.
 #' Keep in mind organic variables also have carryover but currently not showing.
 #' @return data.frame with results on ROAS/CPA, spend, response, contribution
 #' per channel, with or without total rows.
@@ -592,8 +592,10 @@ robyn_performance <- function(
   if (carryovers) {
     try_require("Robyn")
     carrov <- robyn_immcarr(
-      InputCollect, OutputCollect, solID = solID, 
-      start_date = start_date, end_date = end_date, ...) %>%
+      InputCollect, OutputCollect,
+      solID = solID,
+      start_date = start_date, end_date = end_date, ...
+    ) %>%
       filter(.data$type == "Carryover")
     mean_carryovers <- data.frame(
       channel = carrov$rn,

diff --git a/R/scrabble.R b/R/scrabble.R
@@ -106,9 +106,10 @@ scrabble_score <- function(words, scores.df) {
 #' Dataframe for every letter and points given a language.
 #'
 #' @family Scrabble
-#' @param lang Character. Any of "en","es". Set to NULL
+#' @param lang Character. Any of "en","es" or "chars". Set to NULL
 #' if you wish to skip this step (and use \code{words} parameter in
-#' \code{scrabble_words()} instead).
+#' \code{scrabble_words()} instead). The "chars" parameter will
+#' score the number of characters a word has.
 #' @return data.frame with tiles and scores for each alphabet letter.
 #' @examples
 #' scrabble_points("es")
@@ -122,8 +123,8 @@ scrabble_points <- function(lang) {
     message(">>> Skipping points schema...")
     return(invisible(NULL))
   }
-  if (!lang %in% c("en", "es")) {
-    message("We do not have the points for this language yet!")
+  if (!lang %in% c("en", "es", "chars", "unique")) {
+    message("There are no points structure for this language/system yet")
     return(invisible(NULL))
   }
   if (lang == "es") {
@@ -132,17 +133,27 @@ scrabble_points <- function(lang) {
         tolower(LETTERS)[1:14], intToUtf8(241),
         tolower(LETTERS)[15:length(LETTERS)]
       ),
-      scores = c(1, 3, 2, 2, 1, 4, 3, 4, 1, 8, 10, 1, 3, 1, 8, 1, 3, 5, 1, 1, 1, 2, 4, 10, 10, 5, 10)
+      scores = c(
+        1, 3, 2, 2, 1, 4, 3, 4, 1, 8, 10, 1, 3, 1,
+        8, 1, 3, 5, 1, 1, 1, 2, 4, 10, 10, 5, 10
+      )
     )
   }
   if (lang == "en") {
     scores <- data.frame(
       tiles = tolower(LETTERS),
-      scores = c(1, 4, 4, 2, 1, 4, 3, 3, 1, 10, 5, 2, 4, 2, 1, 4, 10, 1, 1, 1, 2, 5, 4, 8, 3, 10)
+      scores = c(
+        1, 4, 4, 2, 1, 4, 3, 3, 1, 10, 5, 2, 4,
+        2, 1, 4, 10, 1, 1, 1, 2, 5, 4, 8, 3, 10
+      )
     )
   }
-
-  message(sprintf(">>> Loaded points for '%s'", lang))
+  if (lang %in% c("chars", "unique")) {
+    scores <- data.frame(
+      tiles = tolower(LETTERS)
+    ) %>% mutate(scores = 1)
+  }
+  message(sprintf(">>> Points system: '%s'", lang))
   return(scores)
 }
 
@@ -226,6 +237,9 @@ grepl_letters <- function(x, pattern, blank = "_") {
 #' these tiles (and positions). Not very relevant on Scrabble but for Wordle.
 #' @param force_n,force_max Integer. Force words to be n or max n characters
 #' long. Leave 0 to ignore parameter.
+#' @param pattern Character string. Custom regex patterns you'd like to match.
+#' @param repeated Boolean. By default, no replacement allowed. When activated,
+#' a single tile can be repeated and won't be "used and discarded".
 #' @param scores,language Character. Any of "en","es","de","fr".
 #' If scores is not any of those languages, must be a data.frame that
 #' contains two columns: "tiles" with every letter of the alphabet and
@@ -273,6 +287,8 @@ scrabble_words <- function(tiles = "",
                            exclude_here = "",
                            force_n = 0,
                            force_max = 0,
+                           pattern = "",
+                           repeated = FALSE,
                            language = Sys.getenv("LARES_LANG"),
                            scores = language,
                            words = NULL,
@@ -286,7 +302,7 @@ scrabble_words <- function(tiles = "",
       stop("Please, provide a valid scores data.frame with 'tiles' and 'scores' columns")
     }
   } else {
-    scores <- scrabble_points(scores)
+    scores.df <- scrabble_points(scores)
   }
 
   ### TILES
@@ -323,19 +339,19 @@ scrabble_words <- function(tiles = "",
     )))
   }
   words <- tolower(dictionary)
-  # Words can't have more letters than inputs
-  words <- words[nchar(words) <= ntiles]
+  # Words can't have more letters than inputs by default
+  if (!repeated) words <- words[nchar(words) <= ntiles]
   .temp_print(length(words))
   # Exclude specific tiles (Wordle)
   if (length(force_not) > 0) words <- words[!grepl(paste(force_not, collapse = "|"), words)]
   .temp_print(length(words))
   # You may want to force their lengths
   if (force_n > 0) words <- words[nchar(words) == force_n]
   .temp_print(length(words))
-  if (force_max > 0) words <- words[nchar(words) <= force_max]
+  if (force_max > 0 && !repeated) words <- words[nchar(words) <= force_max]
   .temp_print(length(words))
   # Words can't have different letters than inputs
-  words <- words[.all_tiles_present(words, tiles, free = 0)]
+  words <- words[.all_tiles_present(words, tiles, free = 0, repeated = repeated)]
   .temp_print(length(words))
   # Force start/end strings
   words <- .force_words(words, force_start)
@@ -349,20 +365,38 @@ scrabble_words <- function(tiles = "",
       .temp_print(length(words))
     }
   }
+  # Force custom patterns that must be contained
+  if (pattern[1] != "") {
+    for (str in pattern) {
+      words <- words[grep(tolower(str), words, perl = TRUE)]
+      .temp_print(length(words))
+    }
+  }
   # Exclude letters from positions (Wordle)
-  pos_tiles <- str_split_merge(tolower(exclude_here))
-  for (i in seq_along(pos_tiles)) {
-    these <- str_split(pos_tiles, "\\|")[i][[1]]
-    if (!any(these %in% letters)) next
-    located <- stringr::str_locate_all(words, pos_tiles[i])
-    these <- !unlist(lapply(located, function(x) sum(x[, 1] == i) > 0))
-    words <- words[these]
-    .temp_print(length(words))
+  if (exclude_here[1] != "") {
+    for (eh in exclude_here) {
+      pos_tiles <- str_split_merge(tolower(eh))
+      for (i in seq_along(pos_tiles)) {
+        these <- str_split(pos_tiles, "\\|")[i][[1]]
+        if (!any(these %in% letters)) next
+        located <- stringr::str_locate_all(words, pos_tiles[i])
+        these <- !unlist(lapply(located, function(x) sum(x[, 1] == i) > 0))
+        words <- words[these]
+        .temp_print(length(words))
+      }
+    }
   }
 
   .temp_print(length(words), last = TRUE)
   if (length(words) > 0) {
-    done <- scrabble_score(words, scores)
+    if ("unique" %in% scores) {
+      this <- lapply(words, function(x) sum(!!str_count(x, letters)))
+      done <- data.frame(word = words, scores = unlist(this)) %>%
+        mutate(length = str_length(.data$word)) %>%
+        arrange(desc(.data$scores), desc(.data$length))
+    } else {
+      done <- scrabble_score(words, scores.df)
+    }
     if (sum(done$scores) == 0) done$scores <- NULL
     return(as_tibble(done))
   } else {
@@ -374,10 +408,11 @@ scrabble_words <- function(tiles = "",
   if (print) if (!last) formatColoured(paste(x, "> ")) else formatColoured(paste(x, "\n"))
 }
 
-# Tile used, tile that must be skipped on next iterations
-.all_tiles_present <- function(words, tiles, free = 0) {
+# Tile used, tile that must be skipped on next iterations (when repeated is TRUE)
+.all_tiles_present <- function(words, tiles, free = 0, repeated = FALSE) {
   free <- free + sum(tiles == "_")
-  for (x in tiles) words <- sub(x, "", words)
+  fx <- ifelse(repeated, gsub, sub)
+  for (x in tiles) words <- fx(x, "", words)
   nchar(words) <= free
 }
 
@@ -404,15 +439,17 @@ scrabble_words <- function(tiles = "",
 }
 
 .add_letters <- function(str, tiles) {
-  if (str != "") {
+  if (str[1] != "") {
     str_tiles <- tolower(unlist(strsplit(str, "")))
+    # Get rid of non alpha-numeric values
+    str_tiles <- str_tiles[grepl("[[:alnum:]]", str_tiles)]
     which <- !str_tiles %in% c(tiles, "_")
     if (any(which)) {
       new <- str_tiles[which]
       tiles <- c(tiles, new)
       message(sprintf(
         "%s %s not in your tiles: now included",
-        v2t(new, and = "and"),
+        v2t(toupper(new), and = "and"),
         ifelse(length(new) > 1, "were", "was")
       ))
     }

diff --git a/R/trees.R b/R/trees.R
@@ -7,9 +7,9 @@
 #' @family Exploratory
 #' @family Visualization
 #' @inheritParams h2o_automl
-#' @inherit rpart::rpart
-#' @inherit rpart::rpart.control
-#' @inherit rpart.plot::rpart.plot
+#' @inheritParams rpart::rpart
+#' @inheritParams rpart::rpart.control
+#' @inheritParams rpart.plot::rpart.plot
 #' @param df Data frame
 #' @param max Integer. Maximal depth of the tree.
 #' @param min Integer. The minimum number of observations that must

diff --git a/man/robyn_performance.Rd b/man/robyn_performance.Rd
diff --git a/man/scrabble.Rd b/man/scrabble.Rd