diff --git a/R/forecastmodel.R b/R/forecastmodel.R index c1bfdca4e61a9a29c38544fc81cb5eed21d44d2e..fea0c84d9f2082657517fd7500b216b48a15e61f 100644 --- a/R/forecastmodel.R +++ b/R/forecastmodel.R @@ -23,7 +23,7 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( # # The horizons to fit for kseq = NA, - # The (transformation stage) parameters used for the fit + # The (transformation stage) parameters (only the ones set in last call of insert_prm()) prm = NA, # Stores the maximum lag for AR terms maxlagAR = NA, @@ -83,7 +83,7 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( #---------------------------------------------------------------- - # Get the transformation parameters + # Get the transformation parameters (set for optimization) get_prmbounds = function(nm){ if(nm == "init"){ if(is.null(dim(self$prmbounds))){ @@ -130,8 +130,7 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( } # MUST INCLUDE SOME checks here and print useful messages if something is not right if(any(is.na(prm))){ stop(pst("None of the parameters (in prm) must be NA: prm=",prm)) } - - # Keep the prm + # Keep the prm given self$prm <- prm # Find if any opt parameters, first the one with "__" hence for the inputs pinputs <- prm[grep("__",nams(prm))] @@ -152,7 +151,7 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( # Find if the input i have prefix match with the opt. parameter ii if(pnms[ii]==nams(self$inputs)[i]){ # if the opt. parameter is in the expr, then replace - self$inputs[[i]]$expr <- private$replace_value(name = pprm[ii], + self$inputs[[i]]$expr <- private$replace_prmvalue(name = pprm[ii], value = pinputs[ii], expr = self$inputs[[i]]$expr) } @@ -160,12 +159,12 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( } } # ################ - # For the fit parameters, insert from prm if any found + # For the regression parameters, insert from prm if any found if (length(preg) & any(!is.na(self$regprmexpr))) { nams(preg) for(i in 1:length(preg)){ # if the opt. parameter is in the expr, then replace - self$regprmexpr <- private$replace_value(name = nams(preg)[i], + self$regprmexpr <- private$replace_prmvalue(name = nams(preg)[i], value = preg[i], expr = self$regprmexpr) } @@ -175,6 +174,32 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( #---------------------------------------------------------------- + #---------------------------------------------------------------- + # Return the values of the parameter names given + get_prmvalues = function(prmnames){ + # + regprm <- eval(parse(text = self$regprmexpr)) + # From the input parameters + val <- sapply(prmnames, function(nm){ + if(length(grep("__",nm))){ + tmp <- strsplit(nm, "__")[[1]] + if(tmp[1] %in% names(self$inputs)){ + return(as.numeric(private$get_exprprmvalue(tmp[2], self$inputs[[tmp[1]]]$expr))) + }else{ + return(NA) + } + }else{ + if(nm %in% names(regprm)){ + return(as.numeric(regprm[nm])) + }else{ + return(NA) + } + } + }) + return(val) + }, + #---------------------------------------------------------------- + #---------------------------------------------------------------- # Function for transforming the input data to the regression data transform_data = function(data){ @@ -289,7 +314,7 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( #---------------------------------------------------------------- # Replace the value in "name=value" in expr - replace_value = function(name, value, expr){ + replace_prmvalue = function(name, value, expr){ # First make regex pattern <- gsub("\\.", ".*", name) # Try to find it in the input @@ -298,7 +323,7 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( if(pos>0){ pos <- c(pos+attr(pos,"match.length")) # Find the substr to replace with the prm value - (tmp <- substr(expr, pos, nchar(expr))) + tmp <- substr(expr, pos, nchar(expr)) pos2 <- regexpr(",|)", tmp) # Insert the prm value and return expr <- pst(substr(expr,1,pos-1), "=", value, substr(expr,pos+pos2-1,nchar(expr))) @@ -309,6 +334,30 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( }, #---------------------------------------------------------------- + #---------------------------------------------------------------- + get_exprprmvalue = function(name, expr){ + #name <- "degree" + #expr <- "bspline(tday, Boundary.knots = c(start=6,18), degree = 5, intercept=TRUE) %**% ones() + 2 + ones()" + #expr <- "one()" + expr <- gsub(" ", "", expr) + + # First make regex + pattern <- gsub("\\.", ".*", name) + # Try to find it in the input + pos <- regexpr(pattern, expr) + # Only replace if prm was found + if(pos>0){ + pos <- c(pos+attr(pos,"match.length")) + # Find the substr to replace with the prm value + (tmp <- substr(expr, pos, nchar(expr))) + pos2 <- regexpr(",|)", tmp) + return(substr(tmp, 2, pos2-1)) + }else{ + return(NA) + } + }, + #---------------------------------------------------------------- + #---------------------------------------------------------------- # For deep cloning, in order to get the inputs list of R6 objects copied deep_clone = function(name, value) { @@ -344,9 +393,9 @@ print.forecastmodel <- function(x, ...){ model <- x # cat("\nObject of class forecastmodel (R6::class)\n\n") cat("\nOutput:",model$output) - cat("Inputs: ") + cat("\nInputs: ") if(length(model$inputs) == 0 ){ - cat("No inputs\n") + cat("\nNo inputs") }else{ cat(names(model$inputs)[1],"=",model$inputs[[1]]$expr,"\n") for(i in 2:length(model$inputs)){ diff --git a/R/forecastmodel.R-documentation.R b/R/forecastmodel.R-documentation.R index 01a1bf7d89452c299ba0a71f0aa3282dc0e97562..9b7fdcf0abf2ed15db92a33c9c9005c881114e8f 100644 --- a/R/forecastmodel.R-documentation.R +++ b/R/forecastmodel.R-documentation.R @@ -135,7 +135,7 @@ #---------------------------------------------------------------- #' @section \code{$insert_prm(prm)}: -#' Insert the transformation parameters prm in the input expressions and regression expressions, and keep them (simply string manipulation). +#' Insert the transformation parameters prm in the input expressions and regression expressions, and keep them in $prm (simply string manipulation). #' #' @examples #' diff --git a/R/lm_optim.R b/R/lm_optim.R index e5dc48d53f7b710b21bf9c2d2f2036594d3e0967..567823e3ba578a04468db63a94f76907621d4ae4 100644 --- a/R/lm_optim.R +++ b/R/lm_optim.R @@ -12,6 +12,7 @@ #' @title Optimize parameters for onlineforecast model fitted with LM #' @param model The onlineforecast model, including inputs, output, kseq, p #' @param data The data.list including the variables used in the model. +#' @param kseq The horizons to fit for (if not set, then model$kseq is used) #' @param scorefun The function to be score used for calculating the score to be optimized. #' @param cachedir A character specifying the path (and prefix) of the cache file name. If set to \code{""}, then no cache will be loaded or written. See \url{https://onlineforecasting.org/vignettes/nice-tricks.html} for examples. #' @param printout A logical determining if the score function is printed out in each iteration of the optimization. @@ -55,7 +56,7 @@ #' #' @importFrom stats optim #' @export -lm_optim <- function(model, data, scorefun = rmse, cachedir="", printout=TRUE, method="L-BFGS-B", ...){ +lm_optim <- function(model, data, kseq = NA, scorefun = rmse, cachedir="", cachererun=FALSE, printout=TRUE, method="L-BFGS-B", ...){ ## Take the parameters bounds from the parameter bounds set in the model init <- model$get_prmbounds("init") lower <- model$get_prmbounds("lower") @@ -64,21 +65,33 @@ lm_optim <- function(model, data, scorefun = rmse, cachedir="", printout=TRUE, m if(any(is.na(lower))){ lower[is.na(lower)] <- -Inf} if(any(is.na(upper))){ lower[is.na(upper)] <- Inf} + # Clone the model no matter what (at least model$kseq should not be changed no matter if optimization is stopped) + m <- model$clone_deep() + if(!is.na(kseq[1])){ + m$kseq <- kseq + } ## Caching the results based on some of the function arguments if(cachedir != ""){ - ## Have to insert the parameters in the expressions - model$insert_prm(init) + # Have to insert the parameters in the expressions to get the right state of the model for unique checksum + m$insert_prm(init) + # Have to reset the state first to remove dependency of previous calls + m$reset_state() ## Give all the elements to calculate the unique cache name - cnm <- cache_name(lm_fit, lm_optim, model$outputrange, model$regprm, model$transform_data(data), - data[[model$output]], scorefun, init, lower, upper, cachedir = cachedir) - ## Maybe load the cached result - if(file.exists(cnm)){ return(readRDS(cnm)) } + cnm <- cache_name(lm_fit, lm_optim, m$outputrange, m$regprm, m$transform_data(data), + data[[m$output]], scorefun, init, lower, upper, cachedir = cachedir) + # Load the cached result if it exists + if(file.exists(cnm) & !cachererun){ + res <- readRDS(cnm) + # Set the optimized parameters into the model + model$insert_prm(res$par) + return(res) + } } - ## Run the optimization + # Run the optimization res <- optim(par = init, fn = lm_fit, - model = model, + model = m, data = data, scorefun = scorefun, printout = printout, @@ -87,8 +100,9 @@ lm_optim <- function(model, data, scorefun = rmse, cachedir="", printout=TRUE, m upper = upper, method = method, ...) - ## Save the result in the cachedir + # Save the result in the cachedir if(cachedir != ""){ cache_save(res, cnm) } - ## Return the result + # Set the optimized parameters into the model + model$insert_prm(res$par) return(res) } diff --git a/R/rls_fit.R b/R/rls_fit.R index 23cfb8642ec871fab7e26dfb53404ee16f6b77da..e1b129905d619f53aa805dae55ddb3bf6f46130a 100644 --- a/R/rls_fit.R +++ b/R/rls_fit.R @@ -174,7 +174,7 @@ rls_fit <- function(prm=NA, model, data, scorefun = NA, returnanalysis = TRUE, Yhat <- lapply_cbind_df(Lresult, function(x){ x$yhat }) - nams(Yhat) <- pst("k",model$kseq) + nams(Yhat) <- pst("k", model$kseq) # Maybe crop the output if(!is.na(model$outputrange[1])){ Yhat[Yhat < model$outputrange[1]] <- model$outputrange[1] } diff --git a/R/rls_optim.R b/R/rls_optim.R index a5458d335be10ffe63ed858e5b87b527d0a3d55b..4a5098091f2606763a4beae42d8e8aa6b06c586b 100644 --- a/R/rls_optim.R +++ b/R/rls_optim.R @@ -18,6 +18,7 @@ #' @title Optimize parameters for onlineforecast model fitted with RLS #' @param model The onlineforecast model, including inputs, output, kseq, p #' @param data The data.list including the variables used in the model. +#' @param kseq The horizons to fit for (if not set, then model$kseq is used) #' @param scorefun The function to be score used for calculating the score to be optimized. #' @param cachedir A character specifying the path (and prefix) of the cache file name. If set to \code{""}, then no cache will be loaded or written. See \url{https://onlineforecasting.org/vignettes/nice-tricks.html} for examples. #' @param printout A logical determining if the score function is printed out in each iteration of the optimization. @@ -59,7 +60,7 @@ #' #' #' @export -rls_optim <- function(model, data, scorefun = rmse, cachedir="", printout=TRUE, method="L-BFGS-B", ...){ +rls_optim <- function(model, data, kseq = NA, scorefun = rmse, cachedir="", cachererun=FALSE, printout=TRUE, method="L-BFGS-B", ...){ # Take the parameters bounds from the parameter bounds set in the model init <- model$get_prmbounds("init") lower <- model$get_prmbounds("lower") @@ -68,24 +69,35 @@ rls_optim <- function(model, data, scorefun = rmse, cachedir="", printout=TRUE, if(any(is.na(lower))){ lower[is.na(lower)] <- -Inf} if(any(is.na(upper))){ lower[is.na(upper)] <- Inf} + # Clone the model no matter what (at least model$kseq should not be changed no matter if optimization is stopped) + m <- model$clone_deep() + if(!is.na(kseq[1])){ + m$kseq <- kseq + } + # Caching the results based on some of the function arguments if(cachedir != ""){ # Have to insert the parameters in the expressions to get the right state of the model for unique checksum - model$insert_prm(init) - # Give all the elements needed to calculate the unique cache name - # This is maybe smarter, don't have to calculate the transformation of the data: cnm <- cache_name(model$regprm, getse(model$inputs, nms="expr"), model$output, model$prmbounds, model$kseq, data, objfun, init, lower, upper, cachedir = cachedir) + m$insert_prm(init) # Have to reset the state first to remove dependency of previous calls - model$reset_state() - cnm <- cache_name(rls_fit, rls_optim, model$outputrange, model$regprm, model$transform_data(data), data[[model$output]], scorefun, init, lower, upper, cachedir = cachedir) - # Maybe load the cached result - if(file.exists(cnm)){ return(readRDS(cnm)) } + m$reset_state() + # Give all the elements needed to calculate the unique cache name + # This is maybe smarter, don't have to calculate the transformation of the data: cnm <- cache_name(m$regprm, getse(m$inputs, nms="expr"), m$output, m$prmbounds, m$kseq, data, objfun, init, lower, upper, cachedir = cachedir) + cnm <- cache_name(rls_fit, rls_optim, m$outputrange, m$regprm, m$transform_data(data), data[[m$output]], scorefun, init, lower, upper, kseq, cachedir = cachedir) + # Load the cached result if it exists + if(file.exists(cnm) & !cachererun){ + res <- readRDS(cnm) + # Set the optimized parameters into the model + model$insert_prm(res$par) + return(res) + } } # Run the optimization res <- optim(par = init, fn = rls_fit, # Parameters to pass to rls_fit - model = model, + model = m, data = data, scorefun = scorefun, printout = printout, @@ -94,10 +106,10 @@ rls_optim <- function(model, data, scorefun = rmse, cachedir="", printout=TRUE, lower = lower, upper = upper, method = method, - ...) - + ...) # Save the result in the cachedir - if(cachedir != ""){ cache_save(res, cnm) } - # Return the result + if(cachedir != ""){ cache_save(res, cnm)} + # Set the optimized parameters into the model + model$insert_prm(res$par) return(res) } diff --git a/R/rls_reduce.R b/R/rls_reduce.R index ff981382a20d775d90a0fe0eb87e0e8197f9d944..a2caac77d8205d6a895b7728a164a05051ef47c4 100644 --- a/R/rls_reduce.R +++ b/R/rls_reduce.R @@ -1,74 +1,74 @@ -#' @importFrom parallel mclapply -rls_reduce <- function(model, data, preduce=list(NA), scorefun = rmse){ - ## prm test - ##preduce <- list(I__degree = c(min=1, init=7), mu_tday__nharmonics = c(min=1, init=7)) - prmin <- unlist(getse(preduce, 1)) - pr <- unlist(getse(preduce, 2)) - ##!! deep=TRUE didn't work, gave: "Error: C stack usage 9524532 is too close to the limit" - m <- model$clone_deep() - ## Insert the starting p reduction values - if(!is.na(preduce[1])){ - m$insert_prm(pr) - } - ## - valref <- rls_optim(m, data, printout=FALSE)$value - ## - while(TRUE){ - ## - message("------------------------------------") - message("Reference score value",valref) - ## -------- - ## Remove inputs one by one - message("\nRemoving inputs one by one") - valsrm <- mclapply(1:length(model$inputs), function(i){ - mr <- m$clone_deep() - mr$inputs[[i]] <- NULL - rls_optim(mr, data, printout=FALSE)$value - }) - valsrm <- unlist(valsrm) - names(valsrm) <- names(m$inputs) - message("Scores") - print(valsrm) - ## -------- - ## Reduce parameter values if specified - if(!is.na(pr[1])){ - message("\nReducing prm with -1 one by one") - valspr <- mclapply(1:length(pr), function(i){ - mr <- m$clone_deep() - p <- pr - ## Only count down if above minimum - if( p[i] >= prmin[i] ){ - p[i] <- p[i] - 1 - } - mr$insert_prm(p) - val <- rls_optim(mr, data, printout=FALSE)$value - ## - return(val) - }) - valspr <- unlist(valspr) - names(valspr) <- names(pr) - message("Scores") - print(valspr) - } - ## Is one the reduced smaller than the current ref? - if( min(c(valsrm,valspr)) < valref ){ - if(which.min(c(min(valsrm),min(valspr))) == 1){ - ## One of the models with one of the inputs removed is best - imin <- which.min(valsrm) - message("Removing input",names(m$inputs)[imin]) - m$inputs[[imin]] <- NULL - }else{ - ## One of the models with reduced parameter values is best - imin <- which.min(valspr) - pr[imin] <- pr[imin] - 1 - m$insert_prm(pr) - message("Reduced parameter",names(pr)[imin],"to:",pr[imin]) - } - valref <- min(c(valsrm,valspr)) - }else{ - ## No improvement obtained from reduction, so return the current model - message("------------------------------------\n\nDone") - return(m) - } - } -} +## #' @importFrom parallel mclapply +## rls_reduce <- function(model, data, prmreduce=list(NA), kseq = NA, scorefun = rmse){ +## ## prm test +## ##prmreduce <- list(I__degree = c(min=1, init=7), mu_tday__nharmonics = c(min=1, init=7)) +## prmin <- unlist(getse(prmreduce, 1)) +## pr <- unlist(getse(prmreduce, 2)) +## # +## m <- model$clone_deep() +## # Insert the starting p reduction values +## if(!is.na(prmreduce[1])){ +## m$insert_prm(pr) +## } +## # +## valref <- rls_optim(m, data, kseq, printout=FALSE)$value +## # +## while(TRUE){ +## # +## message("------------------------------------") +## message("Reference score value",valref) +## # -------- +## # Remove inputs one by one +## message("\nRemoving inputs one by one") +## valsrm <- mclapply(1:length(model$inputs), function(i){ +## mr <- m$clone_deep() +## mr$inputs[[i]] <- NULL +## rls_optim(mr, data, kseq, printout=FALSE)$value +## }) +## valsrm <- unlist(valsrm) +## names(valsrm) <- names(m$inputs) +## message("Scores") +## print(valsrm) +## # -------- +## # Reduce parameter values if specified +## if(!is.na(pr[1])){ +## message("\nReducing prm with -1 one by one") +## valspr <- mclapply(1:length(pr), function(i){ +## mr <- m$clone_deep() +## p <- pr +## # Only count down if above minimum +## if( p[i] >= prmin[i] ){ +## p[i] <- p[i] - 1 +## } +## mr$insert_prm(p) +## val <- rls_optim(mr, data, kseq, printout=FALSE)$value +## # +## return(val) +## }) +## valspr <- unlist(valspr) +## names(valspr) <- names(pr) +## message("Scores") +## print(valspr) +## } +## # Is one the reduced smaller than the current ref? +## if( min(c(valsrm,valspr)) < valref ){ +## if(which.min(c(min(valsrm),min(valspr))) == 1){ +## # One of the models with one of the inputs removed is best +## imin <- which.min(valsrm) +## message("Removing input",names(m$inputs)[imin]) +## m$inputs[[imin]] <- NULL +## }else{ +## # One of the models with reduced parameter values is best +## imin <- which.min(valspr) +## pr[imin] <- pr[imin] - 1 +## m$insert_prm(pr) +## message("Reduced parameter",names(pr)[imin],"to:",pr[imin]) +## } +## valref <- min(c(valsrm,valspr)) +## }else{ +## # No improvement obtained from reduction, so return the current model +## message("------------------------------------\n\nDone") +## return(m) +## } +## } +## } diff --git a/R/rls_summary.R b/R/rls_summary.R index c3659a1ede4f59b448a582bc3b4f5abba01d3c79..da057351343ccc936fef8d328db870e6ba36f3db 100644 --- a/R/rls_summary.R +++ b/R/rls_summary.R @@ -80,7 +80,7 @@ rls_summary <- function(object, scoreperiod = NA, scorefun = rmse, usecomplete = if(!printit){ return(retval) } - # Insert the optimized parameters + # Insert the optimized parameters (or actually $prm are just the last parameters given to insert_prm()) m <- fit$model$clone_deep() m$prm[names(m$prm)] <- signif(m$prm, digits=3) m$insert_prm(m$prm) diff --git a/R/step_backward.R b/R/step_backward.R new file mode 100644 index 0000000000000000000000000000000000000000..9bf5d438ee7231993e6b272c11bb09e2df8b5de0 --- /dev/null +++ b/R/step_backward.R @@ -0,0 +1,92 @@ +#' @importFrom parallel mclapply +#' + +step_backward <- function(object, data, kseq = NA, prm=list(NA), optimfun = rls_optim, scorefun = rmse, ...){ + # Do: + # - Maybe have "cloneit" argument in optimfun, then don't clone inside optim. + # - Add argument controlling how much is kept in each iteration (e.g all fitted models) + # + # - Help: prm <- list(I__degree = c(min=1, max=7), mu_tday__nharmonics = c(min=1, max=7)) + # - help: It's not checked that it's the score is calculated on the same values! WARNING should be printed if some models don't forecast same points + # + model <- object + # + m <- model$clone_deep() + # Insert the starting prm reduction values + if(!is.na(prm[1])){ + prmMin <- unlist(getse(prm, "min")) + # ??insert_prm should keep only the ones that can be changed + m$insert_prm(unlist(getse(prm, "max"))) + } + # For keeping all the results + L <- list() + istep <- 1 + # Optimize the reference model + res <- optimfun(m, data, kseq, printout=TRUE, ...) + valRef <- res$value + L[[istep]] <- list(model = m$clone_deep(), result = res) + # + done <- FALSE + while(!done){ + + # + istep <- istep + 1 + # Insert the optimized parameters from last step + m$prmbounds[names(res$par),"init"] <- res$par + # + message("------------------------------------") + message("Reference score value: ",valRef) + # -------- + # Generate the reduced models + mReduced <- mclapply(1:length(m$inputs), function(i){ + mr <- m$clone_deep() + # Insert the optimized parameters from the reference model + mr$inputs[[i]] <- NULL + return(mr) + }) + names(mReduced) <- names(m$inputs) + if(!is.na(prm[1])){ + tmp <- mclapply(1:length(prm), function(i){ + p <- m$get_prmvalues(names(prm[i])) + # If the input is not in model, then p is NA, so don't include it for fitting + if(!is.na(p)){ + # Only the ones with prms above minimum + if(p > prmMin[i]){ + p <- p - 1 + mr <- m$clone_deep() + mr$insert_prm(p) + return(mr) + } + } + return(NA) + }) + names(tmp) <- names(prm) + tmp <- tmp[!is.na(tmp)] + mReduced <- c(mReduced, tmp) + } + + resReduced <- lapply(1:length(mReduced), function(i, ...){ + res <- optimfun(mReduced[[i]], data, kseq, printout=FALSE, ...) + message(names(mReduced)[[i]], ": ", res$value) + return(res) + }, ...) + names(resReduced) <- names(mReduced) + valReduced <- unlist(getse(resReduced, "value")) + imin <- which.min(valReduced) + + # Is one the reduced smaller than the current ref? + if( valReduced[imin] < valRef ){ + # Keep the best model + m <- mReduced[[imin]] + res <- resReduced[[imin]] + valRef <- res$value + # Keep for the result + L[[istep]] <- list(model = m$clone_deep(), result = resReduced[[imin]]) + }else{ + # No improvement obtained from reduction, so return the current model (last in the list) + message("------------------------------------\n\nDone") + done <- TRUE + } + } + invisible(L) +}