From fc2994cf04c4de13308322480bc994212f67e889 Mon Sep 17 00:00:00 2001
From: Peder <pbac@dtu.dk>
Date: Thu, 27 May 2021 23:40:43 +0200
Subject: [PATCH] Now forward, backward and both stepping :)

---
 R/forecastmodel.R                 |   6 +-
 R/step_backward.R                 |  92 -----------------
 R/step_optim.R                    | 166 ++++++++++++++++++++++++++++++
 misc-R/reduce-test.R              |  50 ++++++---
 vignettes/online-updating.Rmd     |   5 +-
 vignettes/setup-and-use-model.Rmd |  15 ++-
 6 files changed, 215 insertions(+), 119 deletions(-)
 delete mode 100644 R/step_backward.R
 create mode 100644 R/step_optim.R

diff --git a/R/forecastmodel.R b/R/forecastmodel.R
index fea0c84..24dd10d 100644
--- a/R/forecastmodel.R
+++ b/R/forecastmodel.R
@@ -398,8 +398,10 @@ print.forecastmodel <- function(x, ...){
         cat("\nNo inputs")
     }else{
         cat(names(model$inputs)[1],"=",model$inputs[[1]]$expr,"\n")
-        for(i in 2:length(model$inputs)){
-            cat("       ",names(model$inputs)[i],"=",model$inputs[[i]]$expr,"\n")
+        if(length(model$inputs) > 1){
+            for(i in 2:length(model$inputs)){
+                cat("        ",names(model$inputs)[i],"=",model$inputs[[i]]$expr,"\n")
+            }
         }
         cat("\n")
     }
diff --git a/R/step_backward.R b/R/step_backward.R
deleted file mode 100644
index 9bf5d43..0000000
--- a/R/step_backward.R
+++ /dev/null
@@ -1,92 +0,0 @@
-#' @importFrom parallel mclapply
-#'
-
-step_backward <- function(object, data, kseq = NA, prm=list(NA), optimfun = rls_optim, scorefun = rmse, ...){
-    # Do:
-    # - Maybe have "cloneit" argument in optimfun, then don't clone inside optim.
-    # - Add argument controlling how much is kept in each iteration (e.g all fitted models)
-    #
-    # - Help: prm <- list(I__degree = c(min=1, max=7), mu_tday__nharmonics = c(min=1, max=7))
-    # - help: It's not checked that it's the score is calculated on the same values! WARNING should be printed if some models don't forecast same points
-    #
-    model <- object
-    #
-    m <- model$clone_deep()
-    # Insert the starting prm reduction values
-    if(!is.na(prm[1])){
-        prmMin <- unlist(getse(prm, "min"))
-        # ??insert_prm should keep only the ones that can be changed
-        m$insert_prm(unlist(getse(prm, "max")))
-    }
-    # For keeping all the results
-    L <- list()
-    istep <- 1
-    # Optimize the reference model
-    res <- optimfun(m, data, kseq, printout=TRUE, ...)
-    valRef <- res$value
-    L[[istep]] <- list(model = m$clone_deep(), result = res)
-    #
-    done <- FALSE
-    while(!done){
-
-        #
-        istep <- istep + 1
-        # Insert the optimized parameters from last step
-        m$prmbounds[names(res$par),"init"] <- res$par
-        #
-        message("------------------------------------")
-        message("Reference score value: ",valRef)
-        # --------
-        # Generate the reduced models
-        mReduced <- mclapply(1:length(m$inputs), function(i){
-            mr <- m$clone_deep()
-            # Insert the optimized parameters from the reference model
-            mr$inputs[[i]] <- NULL
-            return(mr)
-        })
-        names(mReduced) <- names(m$inputs)
-        if(!is.na(prm[1])){
-            tmp <- mclapply(1:length(prm), function(i){
-                p <- m$get_prmvalues(names(prm[i]))
-                # If the input is not in model, then p is NA, so don't include it for fitting
-                if(!is.na(p)){
-                    # Only the ones with prms above minimum
-                    if(p > prmMin[i]){
-                        p <- p - 1
-                        mr <- m$clone_deep()
-                        mr$insert_prm(p)
-                        return(mr)
-                    }
-                }                
-                return(NA)
-            })
-            names(tmp) <- names(prm)
-            tmp <- tmp[!is.na(tmp)]
-            mReduced <- c(mReduced, tmp)
-        }
-
-        resReduced <- lapply(1:length(mReduced), function(i, ...){
-            res <- optimfun(mReduced[[i]], data, kseq, printout=FALSE, ...)
-            message(names(mReduced)[[i]], ": ", res$value)
-            return(res)
-        }, ...)
-        names(resReduced) <- names(mReduced)
-        valReduced <- unlist(getse(resReduced, "value"))
-        imin <- which.min(valReduced)
-
-        # Is one the reduced smaller than the current ref?
-        if( valReduced[imin] < valRef ){
-            # Keep the best model
-            m <- mReduced[[imin]]
-            res <- resReduced[[imin]]
-            valRef <- res$value
-            # Keep for the result
-            L[[istep]] <- list(model = m$clone_deep(), result = resReduced[[imin]])
-        }else{
-            # No improvement obtained from reduction, so return the current model (last in the list)
-            message("------------------------------------\n\nDone")
-            done <- TRUE
-        }
-    }
-    invisible(L)
-}
diff --git a/R/step_optim.R b/R/step_optim.R
new file mode 100644
index 0000000..ea455ea
--- /dev/null
+++ b/R/step_optim.R
@@ -0,0 +1,166 @@
+#' @importFrom parallel mclapply
+#'
+
+step_optim <- function(model, data, kseq = NA, prm=list(NA), direction = c("backward","forward","backwardboth","forwardboth"), optimfun = rls_optim, scorefun = rmse, ...){
+    # Do:
+    # - change all lapply 
+    # - Maybe have "cloneit" argument in optimfun, then don't clone inside optim.
+    # - Add argument controlling how much is kept in each iteration (e.g all fitted models)
+    #
+    # - Help: prm <- list(I__degree = c(min=1, max=7), mu_tday__nharmonics = c(min=1, max=7))
+    # - help: It's not checked that it's the score is calculated on the same values! WARNING should be printed if some models don't forecast same points
+    #
+    # First direction is default
+    if(length(direction) > 1){ direction <- direction[1] }
+    # Don't change the given
+    mfull <- model$clone_deep()
+    # Insert the starting prm values
+    if(!is.na(prm[1])){
+        if( direction == "backward" ){
+            mfull$insert_prm(unlist(getse(prm, "max")))
+        }else if( direction == "forward" ){
+            mfull$insert_prm(unlist(getse(prm, "min")))
+        }else{
+            # Both directions, then start at init, or halfway between min and max
+            i <- which(sapply(prm, function(x){ "init" %in% names(x) }))
+            if(length(i)){
+                mfull$insert_prm(unlist(getse(prm[i], "init")))
+            }
+            i <- which(sapply(prm, function(x){ !"init" %in% names(x) }))
+            if(length(i)){
+                mfull$insert_prm(round(unlist(getse(prm[i], "max")) - unlist(getse(prm[i], "min")) / 2))
+            }
+        }
+    }
+    # For keeping all the results
+    L <- list()
+    # 
+    m <- mfull$clone_deep()
+    if(length(grep("backward",direction))){
+        # Optimize from the full model
+        res <- optimfun(m, data, kseq, printout=TRUE, ...)
+        # Keep it
+        istep <- 1
+        L[[istep]] <- list(model = m$clone_deep(), result = res)
+    }else{
+        # Optimize from the null model
+        m$inputs <- list()
+        # Must be set
+        istep <- 0
+        res <- list(value=Inf, par=m$get_prmbounds("init"))
+    }
+    # Helper
+    c_mStep <- function(l, nms){
+        names(l) <- nms
+        l <- l[!is.na(l)]
+        c(mStep, l)
+    }
+    # Go
+    done <- FALSE
+    while(!done){
+        # Next step
+        istep <- istep + 1
+        # Insert the optimized parameters from last step
+        m$prmbounds[names(res$par),"init"] <- res$par
+        #
+        message("------------------------------------")
+        message("Reference score value: ",res$value)
+        # --------
+        mStep <- list()
+        # Generate the input modified models
+        if(length(grep("backward|both", direction))){
+            # Remove input from the current model one by one
+            if(length(m$inputs) > 1){
+                tmp <- mclapply(1:length(m$inputs), function(i){
+                    ms <- m$clone_deep()
+                    # Remove one input
+                    ms$inputs[[i]] <- NULL
+                    return(ms)
+                })
+                mStep <- c_mStep(tmp, pst("-",names(m$inputs)))
+            }
+        }
+        if(length(grep("forward|both", direction))){
+            # Add input one by one
+            iin <- which(!names(mfull$inputs) %in% names(m$inputs))
+            if(length(iin)){
+                tmp <- mclapply(iin, function(i){
+                    ms <- m$clone_deep()
+                    # Add one input
+                    ms$inputs[[length(ms$inputs) + 1]] <- mfull$inputs[[i]]
+                    names(ms$inputs)[length(ms$inputs)] <- names(mfull$inputs)[i]
+                    return(ms)
+                })
+                mStep <- c_mStep(tmp, pst("+",names(mfull$inputs)[iin]))
+            }
+        }
+
+        # Step parameters
+        if(!is.na(prm[1])){
+            if(length(grep("backward|both", direction))){
+                # Count down the parameters one by one
+                tmp <- mclapply(1:length(prm), function(i){
+                    p <- m$get_prmvalues(names(prm[i]))
+                    # If the input is not in the current model, then p is NA, so don't include it for fitting
+                    if(!is.na(p)){
+                        # Only the ones with prms above minimum
+                        if(prm[[i]]["min"] < p){
+                            ms <- m$clone_deep()
+                            p <- p - 1
+                            ms$insert_prm(p)
+                            # Return both the prm value and the name of the model to be printed
+                            return(list(ms, pst(names(p),"=",p)))
+                        }
+                    }
+                    return(list(NA,NA))
+                })
+                mStep <- c_mStep(getse(tmp,1), getse(tmp,2))
+            }
+            if(length(grep("forward|both", direction))){
+                # Count up the parameters one by one
+                tmp <- mclapply(1:length(prm), function(i){
+                    p <- m$get_prmvalues(names(prm[i]))
+                    # If the input is not in the current model, then p is NA, so don't include it for fitting
+                    if(!is.na(p)){
+                        # Only the ones with prms above minimum
+                        if(p < prm[[i]]["max"]){
+                            ms <- m$clone_deep()
+                            p <- p + 1
+                            ms$insert_prm(p)
+                            # Return both the prm value and the name of the model to be printed
+                            return(list(ms, pst(names(p),"=",p)))
+                        }
+                    }
+                    return(list(NA,NA))
+                })
+                mStep <- c_mStep(getse(tmp,1), getse(tmp,2))
+            }
+        }
+        
+        # Optimize all the step models
+        resStep <- mclapply(1:length(mStep), function(i, ...){
+            res <- try(optimfun(mStep[[i]], data, kseq, printout=FALSE, ...))
+            if(class(res) == "try-error"){ browser() }
+            message(names(mStep)[[i]], ": ", res$value)
+            return(res)
+        }, ...)
+        names(resStep) <- names(mStep)
+        
+        # Is one the step models score smaller than the current ref?
+        valStep <- unlist(getse(resStep, "value"))
+        imin <- which.min(valStep)
+        if( valStep[imin] < res$value ){
+            # Keep the best model
+            m <- mStep[[imin]]
+            res <- resStep[[imin]]
+            # Keep for the result
+            L[[istep]] <- list(model = m$clone_deep(), result = resStep[[imin]])
+        }else{
+            # No improvement obtained from reduction, so return the current model (last in the list)
+            message("------------------------------------\n\nDone")
+            message(print(m))
+            done <- TRUE
+        }
+    }
+    invisible(L)
+}
diff --git a/misc-R/reduce-test.R b/misc-R/reduce-test.R
index 49a662d..78c9bec 100644
--- a/misc-R/reduce-test.R
+++ b/misc-R/reduce-test.R
@@ -1,19 +1,20 @@
-# Load the package
-library(onlineforecast)
-# Set the data in D to simplify notation
-D <- Dbuilding
 
 
-# Print the first time point
-D$t[1]
+
+# Load the current package
+library("devtools")
+pack <- as.package("../../onlineforecast")
+load_all(pack)
+
+# Set the data in D to simplify notation
+D <- Dbuilding
 # Set the score period 
 D$scoreperiod <- in_range("2010-12-22", D$t)
-# Plot to see it
-plot(D$t, D$scoreperiod, xlab="Time", ylab="Scoreperiod")
-
-# Exclude other points example
-scoreperiod2 <- D$scoreperiod
-scoreperiod2[in_range("2010-12-30",D$t,"2011-01-02")] <- FALSE
+#
+D$tday <- make_tday(D$t, 2)
+# Generate noise input
+set.seed(83792)
+D$noise <- make_input(rnorm(length(D$t)), 2)
 
 # Generate new object (R6 class)
 model <- forecastmodel$new()
@@ -21,12 +22,33 @@ model <- forecastmodel$new()
 model$output = "heatload"
 # Inputs (transformation step)
 model$add_inputs(Ta = "Ta",
+                 I = "bspline(tday, Boundary.knots = c(6,18), degree = 5, intercept=TRUE) %**% I",
+                 noise = "noise",
+                 mu_tday = "fs(tday/24, nharmonics=10)",
                  mu = "one()")
 # Regression step parameters
 model$add_regprm("rls_prm(lambda=0.9)")
 # Optimization bounds for parameters
 model$add_prmbounds(lambda = c(0.9, 0.99, 0.9999))
-# Set the horizons for which the model will be fitted
-model$kseq <- c(3,18)
+
+
+# Reduce the model
+object <- model
+data <- D
+kseq <- 2
+prm <- list(I__degree = c(min=1, max=7), mu_tday__nharmonics = c(min=1, max=7))
+optimfun = rls_optim
+scorefun = rmse
+
+
+L <- step_optim(object, data, kseq, prm, "forward", optimfun, scorefun, cachedir="cache", cachererun=FALSE)
+L <- step_optim(object, data, kseq, prm, "backward", optimfun, scorefun, cachedir="cache", cachererun=FALSE)
+L <- step_optim(object, data, kseq, prm, "backwardboth", optimfun, scorefun, cachedir="cache", cachererun=FALSE)
+L <- step_optim(object, data, kseq, prm, "forwardboth", optimfun, scorefun, cachedir="cache", cachererun=FALSE)
+
+unlist(getse(getse(L, "model"), "prm"))
+sum(unlist(getse(getse(L, "result"), "counts")))
+
+
 
 
diff --git a/vignettes/online-updating.Rmd b/vignettes/online-updating.Rmd
index 3f5a3f1..2d3fe5a 100644
--- a/vignettes/online-updating.Rmd
+++ b/vignettes/online-updating.Rmd
@@ -114,9 +114,9 @@ model$add_inputs(Ta = "lp(Ta, a1=0.9)",
 model$add_regprm("rls_prm(lambda=0.9)")
 model$add_prmbounds(Ta__a1 = c(0.5, 0.9, 0.9999),
                     lambda = c(0.9, 0.99, 0.9999))
-model$kseq <- c(3,18)
+model$kseq <- 1:36
 # Optimize the parameters
-model$prm <- rls_optim(model, subset(D,D$trainperiod))$par
+rls_optim(model, subset(D,D$trainperiod), kseq = c(3,18))
 ```
 
 
@@ -129,7 +129,6 @@ First fit on a period
 ```{r}
 iseq <- which(in_range("2010-12-15",D$t,"2011-01-01"))
 Dfit <- subset(D, iseq)
-model$kseq <- 1:36
 rls_fit(model$prm, model, Dfit)
 ```
 
diff --git a/vignettes/setup-and-use-model.Rmd b/vignettes/setup-and-use-model.Rmd
index 47e27f8..e05339d 100644
--- a/vignettes/setup-and-use-model.Rmd
+++ b/vignettes/setup-and-use-model.Rmd
@@ -148,7 +148,7 @@ model$add_regprm("rls_prm(lambda=0.9)")
 # Optimization bounds for parameters
 model$add_prmbounds(lambda = c(0.9, 0.99, 0.9999))
 # Set the horizons for which the model will be fitted
-model$kseq <- c(3,18)
+model$kseq <- 1:36
 ```
 
 
@@ -206,7 +206,7 @@ model$add_prmbounds(lambda = c(0.9, 0.99, 0.9999))
 Finally, we set the horizons for which to fit:
 ```{r}
 # Set the horizons for which the model will be fitted
-model$kseq <- c(3,18)
+model$kseq <- 1:36
 ```
 The horizons to fit for is actually not directly related to the model, but
 rather the fitting of the model. In principle, it would be more "clean" if the
@@ -220,25 +220,24 @@ We have set up the model and can now tune the `lambda` with the `rls_optim()`,
 which is a wrapper for the `optim()` function:
 ```{r, output.lines=15}
 # Call the optim() wrapper
-model$prm <- rls_optim(model, D)$par
+rls_optim(model, D, kseq = c(3,18))
 ```
 Note, how it only calculated a score for the 3 and 18 steps
-horizons - as we specified with `model$kseq` above. The parameters could be
+horizons - since we gave it `kseq` as an argument, which then overwrites
+`model$kseq` for the optimization only. The parameters could be
 optimized separately for each horizon, for example it is often such that for the
 first horizons a very low forgetting factor is optimal (e.g. 0.9). Currently,
 however, the parameters can only be optimized together. By optimizing for a
 short (3 steps) and a long horizon (18 steps), we obtain a balance - using less computations compared to optimizing on all horizons.
 
-The optimization converge and the tuned parameter becomes:
+The optimization converge and the tuned parameter was inserted:
 ```{r}
 # Optimized lambda
 model$prm
 ```
 
-Now we can fit with the optimized `lambda` on all horizons over the entire period:
+Now we can fit with the optimized `lambda` on the horizons in `model$kseq` over the entire period:
 ```{r}
-# Set to fit for all horizons
-model$kseq <- 1:36
 # Fit for all on entire period in D
 fit1 <- rls_fit(model$prm, model, D)
 ```
-- 
GitLab