diff --git a/.gitignore b/.gitignore index df6682cfe6e81f47b71796bd0107a7b011efb0e5..27fe0b93fa19c8dd5d819eec40f0480a24364095 100644 --- a/.gitignore +++ b/.gitignore @@ -3,19 +3,14 @@ .RData .Ruserdata -NAMESPACE - *.o src/onlineforecast\.so -inst/doc modifications_old_notstaged/ cache/ -man/ - misc-R/*cache* vignettes/*cache* diff --git a/DESCRIPTION b/DESCRIPTION index a90139af341ec0a9a35fb6a27cb2b634e35f68b4..553a7042d41dd4d5806b2beccbd9fabe93f3388a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: onlineforecast Type: Package Title: Forecast Modelling for Online Applications -Version: 0.9.3 +Version: 0.10.1 Description: A framework for fitting adaptive forecasting models. Provides a way to use forecasts as input to models, e.g. weather forecasts for energy related forecasting. The models can be fitted recursively and can easily be setup for updating parameters when new data arrives. See the included vignettes, the website <https://onlineforecasting.org> and the paper "Short-term heat load forecasting for single family houses" <doi:10.1016/j.enbuild.2013.04.022>. License: GPL-3 Encoding: UTF-8 @@ -21,10 +21,11 @@ Suggests: knitr, rmarkdown, R.rsp, - testthat (>= 2.1.0), + testthat (>= 3.0.0), data.table, plotly VignetteBuilder: knitr RoxygenNote: 7.1.1 URL: https://onlineforecasting.org BugReports: https://lab.compute.dtu.dk/packages/onlineforecast/-/issues +Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000000000000000000000000000000000000..8cdc590471f423d4a90d08079eff3579ffabce87 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,114 @@ +# Generated by roxygen2: do not edit by hand + +S3method("==",data.list) +S3method(as.data.frame,data.list) +S3method(as.data.list,data.frame) +S3method(aslt,POSIXct) +S3method(aslt,POSIXlt) +S3method(aslt,character) +S3method(aslt,numeric) +S3method(check,data.list) +S3method(complete_cases,data.frame) +S3method(complete_cases,list) +S3method(ct,POSIXct) +S3method(ct,POSIXlt) +S3method(ct,character) +S3method(ct,numeric) +S3method(lagdf,character) +S3method(lagdf,data.frame) +S3method(lagdf,factor) +S3method(lagdf,logical) +S3method(lagdf,matrix) +S3method(lagdf,numeric) +S3method(pairs,data.list) +S3method(plot_ts,data.frame) +S3method(plot_ts,data.list) +S3method(plot_ts,matrix) +S3method(plot_ts,rls_fit) +S3method(plotly_ts,data.frame) +S3method(plotly_ts,data.list) +S3method(print,forecastmodel) +S3method(resample,data.frame) +S3method(residuals,data.frame) +S3method(residuals,forecastmodel_fit) +S3method(residuals,list) +S3method(residuals,matrix) +S3method(score,data.frame) +S3method(score,list) +S3method(subset,data.list) +S3method(summary,rls_fit) +export("%**%") +export("nams<-") +export(AR) +export(as.data.list) +export(aslt) +export(bspline) +export(cache_name) +export(cache_save) +export(check) +export(complete_cases) +export(ct) +export(data.list) +export(forecastmodel) +export(fs) +export(getse) +export(gof) +export(in_range) +export(lagdf) +export(lagdl) +export(lagvec) +export(lapply_cbind) +export(lapply_cbind_df) +export(lapply_rbind) +export(lapply_rbind_df) +export(lm_fit) +export(lm_optim) +export(lm_predict) +export(long_format) +export(lp) +export(make_input) +export(make_periodic) +export(make_tday) +export(nams) +export(one) +export(par_ts) +export(pbspline) +export(persistence) +export(plot_ts) +export(plotly_ts) +export(pst) +export(resample) +export(rls_fit) +export(rls_optim) +export(rls_predict) +export(rls_prm) +export(rls_summary) +export(rls_update) +export(rmse) +export(score) +export(setpar) +export(stairs) +export(step_optim) +importFrom(Rcpp,sourceCpp) +importFrom(grDevices,colorRampPalette) +importFrom(grDevices,graphics.off) +importFrom(graphics,axis) +importFrom(graphics,axis.POSIXct) +importFrom(graphics,legend) +importFrom(graphics,lines) +importFrom(graphics,mtext) +importFrom(graphics,pairs) +importFrom(graphics,panel.smooth) +importFrom(graphics,par) +importFrom(graphics,plot) +importFrom(graphics,points) +importFrom(graphics,title) +importFrom(parallel,mclapply) +importFrom(stats,aggregate) +importFrom(stats,complete.cases) +importFrom(stats,lm) +importFrom(stats,optim) +importFrom(stats,predict) +importFrom(stats,residuals) +importFrom(stats,sd) +useDynLib(onlineforecast) diff --git a/R/as.data.list.R b/R/as.data.list.R index fc685a0610e9ce69a821f6b6a64837e01fbaba4b..202d711927db6d727b6b89860f59f2e89432e8d6 100644 --- a/R/as.data.list.R +++ b/R/as.data.list.R @@ -22,16 +22,14 @@ #' @param object The object to be converted into a data.list #' @return a value of class data.list #' @seealso \code{For specific detailed info see the children, e.g. \link{as.data.list.data.frame} } -#' @family as.data.list #' +#' @rdname as.data.list #' @export as.data.list <- function(object){ UseMethod("as.data.list") } - - #' Convert a data.frame into a data.list #' #' The convention is that columns with forecasts are postfixed with \code{.kxx} where @@ -41,7 +39,6 @@ as.data.list <- function(object){ #' @param object The data.frame to be converted. #' @return a data.list #' @seealso as.data.list -#' @family as.data.list #' @examples #' # Convert a dataframe with time and two observed variables #' X <- data.frame(t=1:10, x=1:10, y=1:10) @@ -55,7 +52,9 @@ as.data.list <- function(object){ #' X #' as.data.frame(as.data.list(X)) #' +#' @rdname as.data.list #' @export + as.data.list.data.frame <- function(object) { X <- object #TEST diff --git a/R/bspline.R b/R/bspline.R index 39103ed6961806577a5714b16f33bb4b5818dc8f..56ae11f859ce024c8859212e5f6cedc448ffa8ca 100644 --- a/R/bspline.R +++ b/R/bspline.R @@ -68,7 +68,7 @@ #' @export bspline <- function(X, Boundary.knots = NA, intercept = FALSE, df = NULL, knots = NULL, degree = 3, bknots = NA, periodic = FALSE) { # bknots is just a short for Boundary.knots and replace if Boundary.knots are not given. - if(is.na(Boundary.knots)){ + if(is.na(Boundary.knots[1])){ Boundary.knots <- bknots } # If a list, then call on each element @@ -81,9 +81,12 @@ bspline <- function(X, Boundary.knots = NA, intercept = FALSE, df = NULL, knots nams(val) <- nams(X) return(val) } - # X is a data.frame or matrix + # X must be a data.frame or matrix + if(!(class(X) %in% c("data.frame","matrix"))){ stop("X must be a data.frame or matrix") } # First find the horizons, they are used in the end nms <- nams(X) + # All columns must be like "k12" + #if(length(grep("[k|h][[:digit:]]+$", nms)) != length(nms)){ stop("All column names must indicate a horizon, so start with k and then an integer") } # Run for each horizon and calculate the basis splines L <- lapply(1:ncol(X), function(i) { if (is.na(Boundary.knots[1])) { diff --git a/R/cache_name.R b/R/cache_name.R index d9260d9b314e8c057025116876eea19f7b0638ec..9f76adad49e191481303683e864477b0a160fddb 100644 --- a/R/cache_name.R +++ b/R/cache_name.R @@ -39,17 +39,18 @@ #' # For this example use a temporary directory #' # In real use this should not be temporary! (changes between R sessions with tempdir()) #' cachedir <- tempdir() -#' +#' +#' # Uncomment to run: #' # First time it takes at least 1 sec. -#' fun(x=2,y=2) +#' #fun(x=2,y=2) #' # Second time it loads the cache and is much faster -#' fun(x=2,y=2) +#' #fun(x=2,y=2) #' # Try changing the arguments (x,y) and run again #' #' # See the cache file(s) -#' dir(cachedir) +#' #dir(cachedir) #' # Delete the cache folder -#' unlink(cachedir, recursive=TRUE) +#' #unlink(cachedir, recursive=TRUE) #' #' # Demonstrate how cache_name() is functioning #' # Cache using the all objects given in the function calling, i.e. both x and y @@ -81,7 +82,18 @@ cache_name <- function(..., cachedir = "cache"){ # Get the name, definition and arguments of the function from which cache_name was called funname <- strsplit(deparse(sys.calls()[[sys.nframe()-1]]), "\\(")[[1]][1] - fundef <- digest::digest(attr(eval(parse(text=funname)), "srcref")) + # Find the function in the nearest environment in the stack (i.e. parent calls) + ## for(i in rev(sys.parents())){ + ## browser() + ## if(funname %in% ls(parent.frame(i+1))){ + ## val <- mget(funname, parent.frame(i+1)) + ## break + ## } + ## } + ## fundef <- digest::digest(attr(eval(val[[funname]]), "srcref")) + # Somehow the above stopped working, don't know why! just take it, this should do the same I guess + fundef <- try(get(funname), silent=TRUE) + fundef <- digest::digest(fundef) # if no arguments were given, then use the arguments function from which cache_name was called if(length(list(...)) == 0){ funargs <- digest::digest(as.list( match.call(definition = sys.function( -1 ), call = sys.call(-1)))[-1]) diff --git a/R/complete_cases.R b/R/complete_cases.R new file mode 100644 index 0000000000000000000000000000000000000000..8dc673256877468809cf014e38cc2059b985a295 --- /dev/null +++ b/R/complete_cases.R @@ -0,0 +1,101 @@ +#' Returns a logical vector indicating the time points which +#' +#' Given a forecast matrix the forecasts are lagged "+k" steps to align them and +#' then 'complete.cases()' is run on that . +#' +#' Gieven a list of forecast matrices the points where all are complete (also all horizons) are complete are TRUE. +#' +#' @title Find complete cases in forecast matrices +#' @param object A data.frame (with columns named 'kxx') or a list of +#' data.frames. +#' @param kseq integer vector: If given then only these horizons are processed. +#' @return A logical vector specifying if there is no missing +#' values across all horizonsd. +#' @author Peder Bacher +#' +#' @examples +#' # Take a small data set +#' D <- subset(Dbuilding, 1:20, kseq=1:5) +#' # Check the forecast matrix of ambient temperature +#' D$Ta +#' # Which are complete over all horizons? The first are not since not all horizons +#' # have a value there (after lagging) +#' complete_cases(D$Ta) +#' # Same goes if given as a list +#' complete_cases(D["Ta"]) +#' # and if more than one is given +#' complete_cases(D[c("Ta","I")]) +#' +#' # Set some NA of some horizon +#' D$I$k3[8:9] <- NA +#' # Now they are recognized as not complete +#' complete_cases(D[c("Ta","I")]) +#' +#' # If we deal with residuals, which are observations and there for have column names "hxx" +#' Resid <- residuals(D$Ta, D$Taobs) +#' names(Resid) +#' # With columns with "h" instead of "k" no lagging occurs in complete_cases +#' complete_cases(Resid) +#' # +#' Resid2 <- Resid +#' Resid$h3[8:9] <- NA +#' complete_cases(list(Resid,Resid2)) +#' +#' @rdname complete_cases +#' @importFrom stats complete.cases +#' @export +complete_cases <- function(object, kseq = NA){ + UseMethod("complete_cases") +} + + +#' @rdname complete_cases +#' @importFrom stats complete.cases +#' @export +complete_cases.list <- function(object, kseq=NA){ + if(length(grep("[k][[:digit:]]+$", nams(object[[1]])))){ + prefix <- "k" + }else{ + prefix <- "h" + } + # Do it only for the given kseq horizons, if kseq is not set, then take it from the first + if(is.na(kseq[1])){ + kseq <- as.integer(gsub(prefix, "", nams(object[[1]]))) + } + # Check that they all have kseq horizons + # Init a logical matrix with each point + ok <- matrix(TRUE, nrow(object[[1]]), length(kseq)) + # + for(i in 1:length(object)){ + if(!all(kseq %in% as.integer(gsub(prefix, "", nams(object[[i]]))))){ + stop("Not all forecast matrices have kseq horizons: ",kseq) + } + ok <- ok & !is.na(object[[i]][ ,pst(prefix,kseq)]) + } + # + ok <- as.data.frame(ok) + if(prefix == "k"){ + # Lag to match resiuduals in time + names(ok) <- pst("k",kseq) + ok <- lagdf(ok, "+k") + } + # Finally, the vector with TRUE for all points with no NAs for any forecast + ok <- apply(ok, 1, all) + # Just set NAs to false + ok[is.na(ok)] <- FALSE + # + return(ok) +} + + +#' @rdname complete_cases +#' @importFrom stats complete.cases +#' @export +complete_cases.data.frame <- function(object, kseq=NA){ + # Make a distinction between "kxx" which are forecasts or "hxx", which are observations + if(length(grep("[k][[:digit:]]+$", nams(object)[1]))){ + # Forecasts, so lag them + object <- lagdf(object, "+k") + } + complete.cases(object) +} diff --git a/R/data.list.R b/R/data.list.R index 291ac436a361b88ca6b3e00a013f3843fa9f5501..98ba83ba931c8c017e9273a43aa4c399bd02f738 100644 --- a/R/data.list.R +++ b/R/data.list.R @@ -117,10 +117,10 @@ subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts = X <- D[[nms[i]]] if(class(X)[1] == "data.frame" ){ # Check if holds forecasts by checking if any name is "kxx" - if(length(grep("^k[[:digit:]]+$", names(X))) > 0){ + if(length(grep("k[[:digit:]]+$", names(X))) > 0){ # If it holds forecasts, check that they are all there if( !all(pst("k",kseq) %in% names(X)) ){ - warning(pst("The variable ",nms[i]," contain ",pst(names(X),collapse=",")," hence doesn't contain all k in kseq = ",pst(kseq,collapse=","))) + warning(pst("The variable ",nms[i]," contains ",pst(names(X),collapse=",")," hence doesn't contain all k in kseq = ",pst(kseq,collapse=","))) } } } @@ -160,7 +160,7 @@ subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts = val <- lapply(D[nms], function(X) { if (any(class(X) == "data.frame")) { # Check if holds forecasts by checking if any name is "kxx" - if(length(grep("^k[[:digit:]]+$", names(X))) > 0){ + if(length(grep("k[[:digit:]]+$", names(X))) > 0){ return(X[subset,pst("k",kseq), drop=FALSE]) }else{ return(X[subset, , drop=FALSE]) @@ -173,7 +173,7 @@ subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts = # Lag the forecasts k if specified if(lagforecasts){ val <- lapply(val, function(X){ - if(any(class(X) == "data.frame") & length(grep("^k[[:digit:]]+$",names(X))) > 0) { + if(any(class(X) == "data.frame") & length(grep("k[[:digit:]]+$",names(X))) > 0) { return(lagdf.data.frame(X, lagseq="+k")) }else{ return(X) @@ -213,7 +213,7 @@ as.data.frame.data.list <- function(x, row.names=NULL, optional=FALSE, ...){ val <- as.data.frame(val) } # Fix names of data.frames (i.e. forecasts, their names are now "kxx", but should be X.kxx) - i <- grep("^k[[:digit:]]+$", names(val)) + i <- grep("k[[:digit:]]+$", names(val)) if(length(i) > 0){ names(val)[i] <- pst(names(x)[i],".",names(val)[i]) } @@ -330,68 +330,73 @@ check.data.list <- function(object){ # Which is data.frame or matrix? dfOrMat <- sapply(D, function(x){ (class(x) %in% c("matrix","data.frame"))[1] }) # Vectors check - vecchecks <- c("ok","NAs","length","class") vecseq <- which(!dfOrMat & names(dfOrMat) != "t") - Observations <- data.frame(matrix("", nrow=length(vecseq), ncol=length(vecchecks), dimnames=list(names(vecseq),vecchecks)), stringsAsFactors=FALSE) - Observations$ok <- "V" - # - for(i in 1:length(vecseq)){ + Observations <- NA + if(length(vecseq) > 0){ + cat("Observation vectors:\n") + vecchecks <- c("ok","NAs","length","class") + Observations <- data.frame(matrix("", nrow=length(vecseq), ncol=length(vecchecks), dimnames=list(names(vecseq),vecchecks)), stringsAsFactors=FALSE) + Observations$ok <- "V" # - nm <- names(vecseq)[i] - # NAs - NAs <- round(max(sum(is.na(D[nm])) / length(D[nm]))) - Observations$NAs[i] <- pst(NAs,"%") - # Check the length - if(length(D[[nm]]) != length(D$t)){ - Observations$length[i] <- length(D[[nm]]) - } - # Its class - Observations$class[i] <- class(D[[nm]]) - # Not ok? - if(sum(Observations[i, 3] == "") < 1){ - Observations$ok[i] <- "" + for(i in 1:length(vecseq)){ + # + nm <- names(vecseq)[i] + # NAs + NAs <- round(max(sum(is.na(D[nm])) / length(D[nm]))) + Observations$NAs[i] <- pst(NAs,"%") + # Check the length + if(length(D[[nm]]) != length(D$t)){ + Observations$length[i] <- length(D[[nm]]) + } + # Its class + Observations$class[i] <- class(D[[nm]]) + # Not ok? + if(sum(Observations[i, 3] == "") < 1){ + Observations$ok[i] <- "" + } } + print(Observations) } # # For forecasts dfseq <- which(dfOrMat) - dfchecks <- c("ok","maxNAs","meanNAs","nrow","colnames","sameclass","class") - Forecasts <- data.frame(matrix("", nrow=length(dfseq), ncol=length(dfchecks), dimnames=list(names(dfseq),dfchecks)), stringsAsFactors=FALSE) - Forecasts$ok <- "V" - # - for(i in 1:length(dfseq)){ + Forecasts <- NA + if(length(dfseq) > 0){ + cat("\nForecast data.frames or matrices:\n") + dfchecks <- c("ok","maxNAs","meanNAs","nrow","colnames","sameclass","class") + Forecasts <- data.frame(matrix("", nrow=length(dfseq), ncol=length(dfchecks), dimnames=list(names(dfseq),dfchecks)), stringsAsFactors=FALSE) + Forecasts$ok <- "V" # - nm <- names(dfseq)[i] - colnms <- nams(D[[nm]]) - # max NAs - maxNAs <- round(max(sapply(colnms, function(colnm){ 100*sum(is.na(D[[nm]][ ,colnm])) / nrow(D[[nm]]) }))) - Forecasts$maxNAs[i] <- pst(maxNAs,"%") - # Mean NAs - meanNAs <- round(mean(sapply(colnms, function(colnm){ 100*sum(is.na(D[[nm]][ ,colnm])) / nrow(D[[nm]]) }))) - Forecasts$meanNAs[i] <- pst(meanNAs,"%") - # Check the number of rows - if(nrow(D[[nm]]) != length(D$t)){ - Forecasts$nrow[i] <- nrow(D[[nm]]) - } - # Check the colnames, are they unique and all k+integer? - if(!length(unique(grep("^k[[:digit:]]+$",colnms,value=TRUE))) == length(colnms)){ - Forecasts$colnames[i] <- "X" - } - if(!length(unique(sapply(colnms, function(colnm){ class(D[[nm]][ ,colnm]) }))) == 1){ - Forecasts$sameclass[i] <- "X" - }else{ - Forecasts$class[i] <- class(D[[nm]][ ,1]) - } - # Not ok? - if(sum(Forecasts[i, ] == "") < (length(dfchecks)-4)){ - Forecasts$ok[i] <- "" + for(i in 1:length(dfseq)){ + # + nm <- names(dfseq)[i] + colnms <- nams(D[[nm]]) + # max NAs + maxNAs <- round(max(sapply(colnms, function(colnm){ 100*sum(is.na(D[[nm]][ ,colnm])) / nrow(D[[nm]]) }))) + Forecasts$maxNAs[i] <- pst(maxNAs,"%") + # Mean NAs + meanNAs <- round(mean(sapply(colnms, function(colnm){ 100*sum(is.na(D[[nm]][ ,colnm])) / nrow(D[[nm]]) }))) + Forecasts$meanNAs[i] <- pst(meanNAs,"%") + # Check the number of rows + if(nrow(D[[nm]]) != length(D$t)){ + Forecasts$nrow[i] <- nrow(D[[nm]]) + } + # Check the colnames, are they unique and all k+integer? + if(!length(unique(grep("k[[:digit:]]+$",colnms,value=TRUE))) == length(colnms)){ + Forecasts$colnames[i] <- "X" + } + if(!length(unique(sapply(colnms, function(colnm){ class(D[[nm]][ ,colnm]) }))) == 1){ + Forecasts$sameclass[i] <- "X" + }else{ + Forecasts$class[i] <- class(D[[nm]][ ,1]) + } + # Not ok? + if(sum(Forecasts[i, ] == "") < (length(dfchecks)-4)){ + Forecasts$ok[i] <- "" + } } + print(Forecasts) } - # - message("Observation vectors:") - print(Observations) - message("\nForecast data.frames or matrices:") - print(Forecasts) invisible(list(Observations=Observations, Forecasts=Forecasts)) } diff --git a/R/depth.R b/R/depth.R new file mode 100644 index 0000000000000000000000000000000000000000..053875ec5c9d224f1efa46a0efcefb92f4256f0c --- /dev/null +++ b/R/depth.R @@ -0,0 +1,7 @@ +#' Depth of a list +#' +#' Returns the depth of a list +#' @title Depth of a list +#' @param this list +#' @return integer +depth <- function(this) ifelse(is.list(this), 1L + max(sapply(this, depth)), 0L) diff --git a/R/flattenlist.R b/R/flattenlist.R new file mode 100644 index 0000000000000000000000000000000000000000..cd411c656ded80c172d7850f8c4a74cb4376803b --- /dev/null +++ b/R/flattenlist.R @@ -0,0 +1,24 @@ +#' Flattens list in a single list of data.frames +#' +#' Flattens list. Can maybe be made better. It might end up copying data in +#' memory!? It might change the order of the elements. +#' @title Flattens list +#' @param x List to flatten. +#' @return A flatten list +flattenlist <- function(x){ + (n <- depth(x)) + if(n == 2){ + # Its fine + return(x) + }else if(n ==3){ + unlist(x, recursive=FALSE) + }else{ + morelists <- sapply(x, function(xprime) class(xprime)[1]=="list") + out <- c(x[!morelists], unlist(x[morelists], recursive=FALSE)) + if(sum(morelists)){ + Recall(out) + }else{ + return(out) + } + } +} diff --git a/R/forecastmodel.R b/R/forecastmodel.R index c1bfdca4e61a9a29c38544fc81cb5eed21d44d2e..1de0860a1e9aed3662ab4556a1fdd52f47e86635 100644 --- a/R/forecastmodel.R +++ b/R/forecastmodel.R @@ -23,7 +23,9 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( # # The horizons to fit for kseq = NA, - # The (transformation stage) parameters used for the fit + # The horizons to optimize for + kseqopt = NA, + # The (transformation stage) parameters (only the ones set in last call of insert_prm()) prm = NA, # Stores the maximum lag for AR terms maxlagAR = NA, @@ -83,7 +85,7 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( #---------------------------------------------------------------- - # Get the transformation parameters + # Get the transformation parameters (set for optimization) get_prmbounds = function(nm){ if(nm == "init"){ if(is.null(dim(self$prmbounds))){ @@ -130,8 +132,15 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( } # MUST INCLUDE SOME checks here and print useful messages if something is not right if(any(is.na(prm))){ stop(pst("None of the parameters (in prm) must be NA: prm=",prm)) } - - # Keep the prm + # If given without names, then set in same order as in the prm_bounds + if(is.null(nams(prm))){ + if(length(prm) != nrow(self$prmbounds)){ + stop("prm was given without names and length not same as prmbounds, so don't know what to do") + }else{ + nams(prm) <- row.names(self$prmbounds) + } + } + # Keep the prm given self$prm <- prm # Find if any opt parameters, first the one with "__" hence for the inputs pinputs <- prm[grep("__",nams(prm))] @@ -152,7 +161,7 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( # Find if the input i have prefix match with the opt. parameter ii if(pnms[ii]==nams(self$inputs)[i]){ # if the opt. parameter is in the expr, then replace - self$inputs[[i]]$expr <- private$replace_value(name = pprm[ii], + self$inputs[[i]]$expr <- private$replace_prmvalue(name = pprm[ii], value = pinputs[ii], expr = self$inputs[[i]]$expr) } @@ -160,12 +169,12 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( } } # ################ - # For the fit parameters, insert from prm if any found + # For the regression parameters, insert from prm if any found if (length(preg) & any(!is.na(self$regprmexpr))) { nams(preg) for(i in 1:length(preg)){ # if the opt. parameter is in the expr, then replace - self$regprmexpr <- private$replace_value(name = nams(preg)[i], + self$regprmexpr <- private$replace_prmvalue(name = nams(preg)[i], value = preg[i], expr = self$regprmexpr) } @@ -175,6 +184,32 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( #---------------------------------------------------------------- + #---------------------------------------------------------------- + # Return the values of the parameter names given + get_prmvalues = function(prmnames){ + # + regprm <- eval(parse(text = self$regprmexpr)) + # From the input parameters + val <- sapply(prmnames, function(nm){ + if(length(grep("__",nm))){ + tmp <- strsplit(nm, "__")[[1]] + if(tmp[1] %in% names(self$inputs)){ + return(as.numeric(private$get_exprprmvalue(tmp[2], self$inputs[[tmp[1]]]$expr))) + }else{ + return(NA) + } + }else{ + if(nm %in% names(regprm)){ + return(as.numeric(regprm[nm])) + }else{ + return(NA) + } + } + }) + return(val) + }, + #---------------------------------------------------------------- + #---------------------------------------------------------------- # Function for transforming the input data to the regression data transform_data = function(data){ @@ -187,11 +222,11 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( if(class(L)[1]=="data.frame"){ return(list(L)) } if(class(L)[1]!="list"){ stop(pst("The value returned from evaluating: ",input$expr,", was not a matrix, data.frame or a list of them."))} if(class(L[[1]])[1]=="matrix"){ return(lapply(L, function(mat){ return(as.data.frame(mat)) })) } - return(L) + return(flattenlist(L)) }) - # Put together in one data.list - L <- structure(do.call(c, L), class="data.list") - # + # Make it a data.list with no subsubelements (it's maybe not a data.list, since it miss "t", however to take subsets etc., it must be a data.list) + L <- flattenlist(L) + class(L) <- "data.list" return(L) }, #---------------------------------------------------------------- @@ -257,7 +292,9 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( stop("The input variable '",nm,"' doesn't have the same number of observations as time vector 't' in the data. It has ",length(data[[nm]]),", but 't' has ",length(data$t)) } }else{ - stop("The variable '",nm,"' is missing in data, or it has the wrong class.\nIt must be class: data.frame, matrix or vector.\nIt is needed for the input expression '",self$inputs[[i]]$expr[[1]],"'") + if(!nm == "pi"){ + stop("The variable '",nm,"' is missing in data, or it has the wrong class.\nIt must be class: data.frame, matrix or vector.\nIt is needed for the input expression '",self$inputs[[i]]$expr[[1]],"'") + } } } } @@ -289,7 +326,7 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( #---------------------------------------------------------------- # Replace the value in "name=value" in expr - replace_value = function(name, value, expr){ + replace_prmvalue = function(name, value, expr){ # First make regex pattern <- gsub("\\.", ".*", name) # Try to find it in the input @@ -298,7 +335,7 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( if(pos>0){ pos <- c(pos+attr(pos,"match.length")) # Find the substr to replace with the prm value - (tmp <- substr(expr, pos, nchar(expr))) + tmp <- substr(expr, pos, nchar(expr)) pos2 <- regexpr(",|)", tmp) # Insert the prm value and return expr <- pst(substr(expr,1,pos-1), "=", value, substr(expr,pos+pos2-1,nchar(expr))) @@ -309,6 +346,30 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( }, #---------------------------------------------------------------- + #---------------------------------------------------------------- + get_exprprmvalue = function(name, expr){ + #name <- "degree" + #expr <- "bspline(tday, Boundary.knots = c(start=6,18), degree = 5, intercept=TRUE) %**% ones() + 2 + ones()" + #expr <- "one()" + expr <- gsub(" ", "", expr) + + # First make regex + pattern <- gsub("\\.", ".*", name) + # Try to find it in the input + pos <- regexpr(pattern, expr) + # Only replace if prm was found + if(pos>0){ + pos <- c(pos+attr(pos,"match.length")) + # Find the substr to replace with the prm value + (tmp <- substr(expr, pos, nchar(expr))) + pos2 <- regexpr(",|)", tmp) + return(substr(tmp, 2, pos2-1)) + }else{ + return(NA) + } + }, + #---------------------------------------------------------------- + #---------------------------------------------------------------- # For deep cloning, in order to get the inputs list of R6 objects copied deep_clone = function(name, value) { @@ -344,13 +405,15 @@ print.forecastmodel <- function(x, ...){ model <- x # cat("\nObject of class forecastmodel (R6::class)\n\n") cat("\nOutput:",model$output) - cat("Inputs: ") + cat("\nInputs: ") if(length(model$inputs) == 0 ){ - cat("No inputs\n") + cat("\nNo inputs\n\n") }else{ cat(names(model$inputs)[1],"=",model$inputs[[1]]$expr,"\n") - for(i in 2:length(model$inputs)){ - cat(" ",names(model$inputs)[i],"=",model$inputs[[i]]$expr,"\n") + if(length(model$inputs) > 1){ + for(i in 2:length(model$inputs)){ + cat(" ",names(model$inputs)[i],"=",model$inputs[[i]]$expr,"\n") + } } cat("\n") } diff --git a/R/forecastmodel.R-documentation.R b/R/forecastmodel.R-documentation.R index 01a1bf7d89452c299ba0a71f0aa3282dc0e97562..8e075849653a73d43bd64a1d534b711f0ff1fdf6 100644 --- a/R/forecastmodel.R-documentation.R +++ b/R/forecastmodel.R-documentation.R @@ -48,6 +48,8 @@ #' #' - kseq = NA: The horizons to fit for. #' +#' - kseqopt = NA: The horizons to fit for when optimizing. +#' #' - p = NA: The (transformation stage) parameters used for the fit. #' #' - Lfits = list(): The regression fits, one for each k in kseq (simply a list with the latest fit). @@ -135,7 +137,7 @@ #---------------------------------------------------------------- #' @section \code{$insert_prm(prm)}: -#' Insert the transformation parameters prm in the input expressions and regression expressions, and keep them (simply string manipulation). +#' Insert the transformation parameters prm in the input expressions and regression expressions, and keep them in $prm (simply string manipulation). #' #' @examples #' diff --git a/R/in_range.R b/R/in_range.R index 368be5cd57ee826de75e0bf903ebd625e8c8987a..b09d0910383ed0086c991374093506adb83a55e8 100644 --- a/R/in_range.R +++ b/R/in_range.R @@ -43,15 +43,30 @@ #' # since it's covering to "2010-12-25 23:00:00" to "2010-12-26 00:00:00" #' D$t[in_range("2010-12-26", D$t, "2010-12-27")] #' +#' # When characters are given, then they are translated to the time zone of the time vector +#' D <- subset(Dbuilding, c("2010-12-15", "2010-12-16")) +#' D$t <- ct(D$t, tz="CET") +#' D$t[in_range("2010-12-15 02:00", D$t, "2010-12-15 05:00")] #' #' @export in_range <- function(tstart, time, tend=NA) { - if (class(tstart)[1] == "character") - tstart <- ct(tstart) - if (is.na(tend)) + if (is.na(tend)){ tend <- time[length(time)] - if (class(tend)[1] == "character") - tend <- ct(tend) - ct(tstart) < time & time <= ct(tend) + } + if(is.numeric(time)){ + return(tstart < time & time <= tend) + }else{ + # Use the time zone of the time when converting from character + tz <- attr(time, "tzone") + # + if (class(tstart)[1] == "character"){ + tstart <- ct(tstart, tz=tz) + } + if (class(tend)[1] == "character"){ + tend <- ct(tend, tz=tz) + } + #timezone <- attr(time, "tzone") + return(tstart < time & time <= tend) + } } diff --git a/R/lagdf.R b/R/lagdf.R index e56ec9cb2befb77539dad8a89ced43635adbb4e6..a72012cdf0a3ef4911a706a1609bd2a023afd565 100644 --- a/R/lagdf.R +++ b/R/lagdf.R @@ -22,9 +22,9 @@ #' lagdf(1:10, 3) #' # Back in time #' lagdf(1:10, -3) -#' # Works but returns a numric +#' # Works but returns a numeric column #' lagdf(as.factor(1:10), 3) -#' # Works and returns a character +#' # Works and returns a character column #' lagdf(as.character(1:10), 3) #' # Giving several lag values #' lagdf(1:10, c(1:3)) @@ -114,12 +114,12 @@ lagdf.data.frame <- function(x, lagseq) { if (lagseq %in% c("+k","+h")) { lagseq <- rep(0, length(nms)) ## lagseq according to the k value of the columnnames - i <- grep("^[k|h][[:digit:]]+$", nms) + i <- grep("[k|h][[:digit:]]+$", nms) lagseq[i] <- as.integer(sapply(strsplit(nms[i], "[k|h]"), function(x){ x[length(x)] })) } else if (lagseq %in% c("-k","-h")) { lagseq <- rep(0, length(nms)) ## lagseq according to the negative k value of the columnnames - i <- grep("^[k|h][[:digit:]]+$", nms) + i <- grep("[k|h][[:digit:]]+$", nms) lagseq[i] <- -as.integer(sapply(strsplit(nms[i], "[k|h]"), function(x){ x[length(x)] })) } } diff --git a/R/lagdl.R b/R/lagdl.R new file mode 100644 index 0000000000000000000000000000000000000000..92068f7a9d3c1fe6ef88a0f71c7312cc6acfc3a7 --- /dev/null +++ b/R/lagdl.R @@ -0,0 +1,27 @@ +## Do this in a separate file to see the generated help: +#library(devtools) +#document() +#load_all(as.package("../../onlineforecast")) +#?lagdl + +#' Lagging by shifting the values back or fourth always returning a data.list. +#' +#' This function lags (shifts) the values of the vector. A data.list is always returned with each data.frame lagged with \code{lagdf}. +#' +#' +#' @title Lagging which returns a data.list +#' @param DL The data.list to be lagged. +#' @param lagseq The integer(s) setting the lag steps. +#' @return A data.list. +#' @examples +#' # The values are simply shifted in each data.frame with lagdf +#' +#'@export + +lagdl <- function(DL, lagseq){ + iseq <- which(sapply(DL,class) %in% c("data.frame","matrix")) + DL[iseq] <- lapply(iseq, function(i){ + lagdf(DL[[i]], lagseq) + }) + return(DL) +} diff --git a/R/lapply.R b/R/lapply.R index ebead0f2acf4e99eadbda9ef2b264c22ad10ea65..6fb34ac169ffdbfd63e97e8fe85dcefc5cc2e45b 100644 --- a/R/lapply.R +++ b/R/lapply.R @@ -7,36 +7,40 @@ #' Helper which does lapply and then cbind #' @param X object to apply on #' @param FUN function to apply +#' @param ... passed on to lapply #' @export -lapply_cbind <- function(X, FUN){ - val <- lapply(X, FUN) +lapply_cbind <- function(X, FUN, ...){ + val <- lapply(X, FUN, ...) return(do.call("cbind", val)) } #' Helper which does lapply and then rbind #' @param X object to apply on #' @param FUN function to apply +#' @param ... passed on to lapply #' @export -lapply_rbind <- function(X, FUN){ - val <- lapply(X, FUN) +lapply_rbind <- function(X, FUN, ...){ + val <- lapply(X, FUN, ...) return(do.call("rbind", val)) } #' Helper which does lapply, cbind and then as.data.frame #' @param X object to apply on #' @param FUN function to apply +#' @param ... passed on to lapply #' @export -lapply_cbind_df <- function(X, FUN){ - val <- lapply(X, FUN) +lapply_cbind_df <- function(X, FUN, ...){ + val <- lapply(X, FUN, ...) return(as.data.frame(do.call("cbind", val))) } #' Helper which does lapply, rbind and then as.data.frame #' @param X object to apply on #' @param FUN function to apply +#' @param ... passed on to lapply #' @export -lapply_rbind_df <- function(X, FUN){ - val <- lapply(X, FUN) +lapply_rbind_df <- function(X, FUN, ...){ + val <- lapply(X, FUN, ...) return(as.data.frame(do.call("rbind", val))) } diff --git a/R/lm_fit.R b/R/lm_fit.R index d23d40f2af4372cbd3edc0cf0099b4a6e18397fa..4c3db31f5539b288fb5f21498d159268d89db4ad 100644 --- a/R/lm_fit.R +++ b/R/lm_fit.R @@ -37,9 +37,8 @@ #' # Define a simple model #' model <- forecastmodel$new() #' model$output <- "y" -#' model$add_inputs(Ta = "Ta", +#' model$add_inputs(Ta = "lp(Ta, a1=0.9)", #' mu = "one()") -#' model$add_regprm("rls_prm(lambda=0.99)") #' #' # Before fitting the model, define which points to include in the evaluation of the score function #' D$scoreperiod <- in_range("2010-12-20", D$t) @@ -53,13 +52,13 @@ #' class(fit) #' # The one-step forecast #' plot(D$y, type="l") -#' lines(fit$Yhat$k1, col=2) +#' lines(lagvec(fit$Yhat$k1,-1), col=2) #' # Get the residuals #' plot(residuals(fit)$h1) #' # Score for each horizon -#' score_fit(fit) +#' score(residuals(fit)) #' -#' # The lm_fit don't put anything in +#' # The lm_fit don't put anything in this field #' fit$Lfitval #' # Find the lm fits here #' model$Lfits @@ -68,6 +67,10 @@ #' # Some diurnal pattern is present #' acf(residuals(fit)$h1, na.action=na.pass, lag.max=96) #' +#' # Run with other parameters and return the RMSE +#' lm_fit(c(Ta__a1=0.8), model, D, scorefun=rmse, returnanalysis=FALSE) +#' lm_fit(c(Ta__a1=0.9), model, D, scorefun=rmse, returnanalysis=FALSE) +#' #' @importFrom stats lm residuals #' @export lm_fit <- function(prm=NA, model, data, scorefun = NA, returnanalysis = TRUE, printout = TRUE){ diff --git a/R/lm_optim.R b/R/lm_optim.R index e5dc48d53f7b710b21bf9c2d2f2036594d3e0967..70c293e241575d7fd597ac5fbd04acd0961826bd 100644 --- a/R/lm_optim.R +++ b/R/lm_optim.R @@ -12,8 +12,10 @@ #' @title Optimize parameters for onlineforecast model fitted with LM #' @param model The onlineforecast model, including inputs, output, kseq, p #' @param data The data.list including the variables used in the model. +#' @param kseq The horizons to fit for (if not set, then model$kseq is used) #' @param scorefun The function to be score used for calculating the score to be optimized. #' @param cachedir A character specifying the path (and prefix) of the cache file name. If set to \code{""}, then no cache will be loaded or written. See \url{https://onlineforecasting.org/vignettes/nice-tricks.html} for examples. +#' @param cachererun A logical controlling whether to run the optimization even if the cache exists. #' @param printout A logical determining if the score function is printed out in each iteration of the optimization. #' @param method The method argument for \code{\link{optim}}. #' @param ... Additional parameters to \code{\link{optim}} @@ -55,7 +57,7 @@ #' #' @importFrom stats optim #' @export -lm_optim <- function(model, data, scorefun = rmse, cachedir="", printout=TRUE, method="L-BFGS-B", ...){ +lm_optim <- function(model, data, kseq = NA, scorefun = rmse, cachedir="", cachererun=FALSE, printout=TRUE, method="L-BFGS-B", ...){ ## Take the parameters bounds from the parameter bounds set in the model init <- model$get_prmbounds("init") lower <- model$get_prmbounds("lower") @@ -64,21 +66,36 @@ lm_optim <- function(model, data, scorefun = rmse, cachedir="", printout=TRUE, m if(any(is.na(lower))){ lower[is.na(lower)] <- -Inf} if(any(is.na(upper))){ lower[is.na(upper)] <- Inf} + # Clone the model no matter what (at least model$kseq should not be changed no matter if optimization is stopped) + m <- model$clone_deep() + if(!is.na(kseq[1])){ + m$kseq <- kseq + }else if(!is.na(m$kseqopt[1])){ + m$kseq <- m$kseqopt + } + ## Caching the results based on some of the function arguments if(cachedir != ""){ - ## Have to insert the parameters in the expressions - model$insert_prm(init) + # Have to insert the parameters in the expressions to get the right state of the model for unique checksum + m$insert_prm(init) + # Have to reset the state first to remove dependency of previous calls + m$reset_state() ## Give all the elements to calculate the unique cache name - cnm <- cache_name(lm_fit, lm_optim, model$outputrange, model$regprm, model$transform_data(data), - data[[model$output]], scorefun, init, lower, upper, cachedir = cachedir) - ## Maybe load the cached result - if(file.exists(cnm)){ return(readRDS(cnm)) } + cnm <- cache_name(lm_fit, lm_optim, m$outputrange, m$regprm, m$transform_data(data), + data[[m$output]], scorefun, init, lower, upper, cachedir = cachedir) + # Load the cached result if it exists + if(file.exists(cnm) & !cachererun){ + res <- readRDS(cnm) + # Set the optimized parameters into the model + model$insert_prm(res$par) + return(res) + } } - ## Run the optimization + # Run the optimization res <- optim(par = init, fn = lm_fit, - model = model, + model = m, data = data, scorefun = scorefun, printout = printout, @@ -87,8 +104,9 @@ lm_optim <- function(model, data, scorefun = rmse, cachedir="", printout=TRUE, m upper = upper, method = method, ...) - ## Save the result in the cachedir + # Save the result in the cachedir if(cachedir != ""){ cache_save(res, cnm) } - ## Return the result + # Set the optimized parameters into the model + model$insert_prm(res$par) return(res) } diff --git a/R/make_input.R b/R/make_input.R index ff525d114e1b2d1c60182314a3340b39c9cb1c6c..b7ea05c30ee2a7f794ae0dd9548d8dd46476a0d5 100644 --- a/R/make_input.R +++ b/R/make_input.R @@ -28,7 +28,7 @@ make_input <- function(observations, kseq){ val <- sapply(kseq, function(k){ observations }) - ## set row and column names + # set row and column names nams(val) <- paste0('k', kseq) return( as.data.frame(val) ) } diff --git a/R/make_periodic.R b/R/make_periodic.R new file mode 100644 index 0000000000000000000000000000000000000000..77cc7bdd6116224182df6b0f92c723ba4244a5d2 --- /dev/null +++ b/R/make_periodic.R @@ -0,0 +1,46 @@ +## Do this in a separate tmp.R file to check the documentation +#library(devtools) +#document() +#load_all(as.package("../../onlineforecast")) +#?make_periodic + +#' Make an forecast matrix with a periodic time signal. +#' +#' This function creates a data.frame with k-steps-ahead values of a periodic time signal, +#' such that it can be added to a data.list and used inputs to a forecast model. +#' +#' @param time vector of times of class "POSIXct" "POSIXt". +#' @param kseq vector of integers, representing the desired "k-steps ahead". +#' @param period a numeric setting the length of the period in seconds. +#' @param offset a numeric setting an offset in the period start in seconds. +#' @param tstep step time of k in seconds. +#' @return Returns a forecast matrix (data.frame) with rownames = times, colnames = k1, k2, k3, ... +#' The content of the data frame is the hour of day. +#' @keywords periodic lags data.frame +#' @seealso make_tday +#' @examples +#' # Create a time sequence of 30 min sample period +#' tseq <- seq(ct("2019-01-01"), ct("2019-02-01 12:00"), by=1800) +#' +#' # Make the three hourly periodic sequence +#' make_periodic(tseq, 1:10, 3*3600) +#' +#' # With an offset of one hour +#' make_periodic(tseq, 1:10, 3*3600, 3600) +#' +#' @export + +make_periodic <- function(time, kseq, period, offset=0, tstep=NA){ + # If tstep not given, then take it from time assuming same k-step is the sampling period + if(is.na(tstep)){ + tstep <- as.numeric(time[2] - time[1], units="secs") + } + # The time of day (in the specified units) + tday <- sapply(kseq, function(k){ + tk <- time + k * tstep - offset + as.numeric(tk,units="secs") %% period + }) + # set row and column names + nams(tday) <- paste0('k', kseq) + return( as.data.frame(tday) ) +} diff --git a/R/make_tday.R b/R/make_tday.R index ab21fd5fe3f39f1ca67eea060911100bff5b50d5..049736a3c118ef4900b297b4a1265dfbf1a3c2bd 100644 --- a/R/make_tday.R +++ b/R/make_tday.R @@ -4,38 +4,42 @@ #load_all(as.package("../../onlineforecast")) #?make_tday -#' Make an hour-of-day data.frame with k-step ahead columns. +#' Make an hour-of-day forecast matrix #' #' This function creates a data.frame with k-steps-ahead values of hour of day, #' such that it can be added to a data.list and used inputs to a forecast model. #' #' @param time vector of times of class "POSIXct" "POSIXt". -#' @param kseq vector of integers, respresenting the desired "k-steps ahead". +#' @param kseq vector of integers, representing the desired "k-steps ahead". #' @param tstep step time of k in seconds. -#' @param units to return in, e.g. "hours" or "mins" -#' @return Returns a data.frame with rownames = times, colnames = k1, k2, k5, ... -#' The content of the data frame is the hour of day, following the setup in "onlineforecast" setup. +#' @return Returns a forecast matrix (data.frame) with rownames = times, colnames = k1, k2, k3, ... +#' The content of the data frame is the hour of day. #' @keywords hourofday lags data.frame +#' @seealso make_periodic #' @examples -#' # Create a time sequence +#' # Create a time sequence of 30 min sample period #' tseq <- seq(ct("2019-01-01"), ct("2019-02-01 12:00"), by=1800) #' -#' # Make the time of day sequence +#' # Make the time of day sequence (assuming time between k steps is same as for tseq) #' make_tday(tseq, 1:10) #' -#' # With 0.5 hour steps and in minutes -#' make_tday(tseq, 1:10, tstep=1800, units="mins") +#' # With 0.5 hour steps, kstep in hours +#' make_tday(tseq, 1:10, tstep=3600) #' #' #' @export -make_tday <- function(time, kseq, tstep=3600, units="hours"){ - ## The time of day (in the specified units) +make_tday <- function(time, kseq, tstep=NA){ + # If tstep not given, then take it from time assuming same k-step is the sampling period + if(is.na(tstep)){ + tstep <- as.numeric(time[2] - time[1], units="secs") + } + # The time of day (in the specified units) tday <- sapply(kseq, function(k){ tk <- time + k * tstep - as.numeric( tk - trunc(tk, units="days"), units=units) + as.numeric( tk - trunc(tk, units="days"), units="hours") }) - ## set row and column names + # set row and column names nams(tday) <- paste0('k', kseq) return( as.data.frame(tday) ) } diff --git a/R/operator_multiply.R b/R/operator_multiply.R index 2ce598f0d54a20521110444b2659c834264555a9..fc3e0ca4c02c31b3e89f4ffa14afd34950db9ce6 100644 --- a/R/operator_multiply.R +++ b/R/operator_multiply.R @@ -49,35 +49,39 @@ #' @export "%**%" <- function(x, y) { - if( is.null(dim(y)) ){ - ## y is not matrix like - lapply(x, function(xx) { - xx * y - }) + # If any of them is a list: do recursive calls + if( class(x)[1] == "list" ){ + return(flattenlist(lapply(x, "%**%", y=y))) + }else if(class(y)[1] == "list"){ + return(flattenlist(lapply(y, "%**%", y=x))) + } + + # Do the multiplication + # If either is just a vector + if(is.null(dim(x)) | is.null(dim(y))){ + return(x * y) }else{ - ## y is matrix like - lapply(x, function(xx) { - ## Check if different horizon k columns - colmatch <- TRUE - if (ncol(xx) != ncol(y)) { - colmatch <- FALSE - }else if(any(nams(xx) != nams(y))){ - colmatch <- FALSE - } - if(!colmatch){ - ## Not same columns, take only the k in both - nms <- nams(xx)[nams(xx) %in% nams(y)] - xx <- xx[, nms] - y <- y[, nms] - } - ## Now multiply - val <- xx * y - ## Must be data.frame - if( is.null(dim(val)) ){ - val <- data.frame(val) - nams(val) <- nms - } - return(val) - }) + # Both are matrices + # Check if they have different horizon k columns + colmatch <- TRUE + if (ncol(x) != ncol(y)) { + colmatch <- FALSE + }else if(any(nams(x) != nams(y))){ + colmatch <- FALSE + } + if(!colmatch){ + # Not same columns, take only the k in both + nms <- nams(x)[nams(x) %in% nams(y)] + x <- x[, nms] + y <- y[, nms] + } + # Now multiply + val <- x * y + # Must be data.frame + if( is.null(dim(val)) ){ + val <- data.frame(val) + nams(val) <- nms + } + return(val) } } diff --git a/R/plot_ts.R b/R/plot_ts.R index a05b6af63b8ec6934265a6b4f59e4770658b5d31..52904003945b3160265560ae76863b3e438f7fcd 100644 --- a/R/plot_ts.R +++ b/R/plot_ts.R @@ -24,6 +24,7 @@ #' @param mains A character vector with the main for each plot. #' @param mainouter A character with the main at the top of the plot (can also be added afterwards with \code{title(main, outer=TRUE)}). #' @param legendtexts A list with the legend texts for each plot (replaces the names of the variables). +#' @param colormaps A list of colormaps, which will be used in each plot. #' @param xat POSIXt specifying where the ticks on x-axis should be put. #' @param usely If TRUE then plotly will be used. #' @param plotit If FALSE then the plot will not be generated, only data returned. @@ -63,7 +64,7 @@ #' @rdname plot_ts #' @export plot_ts <- function(object, patterns=".*", xlim = NA, ylims = NA, xlab = "", ylabs = NA, - mains = "", mainouter="", legendtexts = NA, xat = NA, usely = FALSE, plotit = TRUE, p = NA, ...){ + mains = "", mainouter="", legendtexts = NA, colormaps = NA, xat = NA, usely = FALSE, plotit = TRUE, p = NA, ...){ UseMethod("plot_ts") } @@ -73,7 +74,7 @@ plot_ts <- function(object, patterns=".*", xlim = NA, ylims = NA, xlab = "", yla #' @rdname plot_ts #' @export plot_ts.data.list <- function(object, patterns=".*", xlim = NA, ylims = NA, xlab = "", ylabs = NA, - mains = "", mainouter="", legendtexts = NA, xat = NA, usely=FALSE, plotit = TRUE, p=NA, kseq = NA, ...) { + mains = "", mainouter="", legendtexts = NA, colormaps = NA, xat = NA, usely=FALSE, plotit = TRUE, p=NA, kseq = NA, ...) { # Take par_ts setup parameters from options if there p <- par_ts(fromoptions=TRUE, p=p, ...) # @@ -91,7 +92,7 @@ plot_ts.data.list <- function(object, patterns=".*", xlim = NA, ylims = NA, xlab # set kseq if not specified if( is.na(kseq[1]) ){ tmp <- unique(unlist(lapply(DL, function(x){names(x)}))) - tmp <- tmp[grep("^[k|h][[:digit:]]+$", tmp)] + tmp <- tmp[grep("[k|h][[:digit:]]+$", tmp)] if( length(tmp) > 0 ){ kseq <- sort(as.integer(gsub("[k|h]","",tmp))) } } # Generate a data.frame with the series to be plotted @@ -163,7 +164,7 @@ plot_ts.data.list <- function(object, patterns=".*", xlim = NA, ylims = NA, xlab namesdata <- unlist(getse(strsplit(nams(X), "_k|_h"), 1)) # Use the plot_ts function which takes the data.frame plot_ts.data.frame(X, patterns, ylims = ylims, xlab = xlab, ylabs = ylabs, mains = mains, mainouter = mainouter, - legendtexts = legendtexts, xat = xat, usely=usely, plotit = plotit, p=p, namesdata=namesdata, ...) + legendtexts = legendtexts, colormaps=colormaps, xat = xat, usely=usely, plotit = plotit, p=p, namesdata=namesdata, ...) } @@ -173,7 +174,8 @@ plot_ts.data.list <- function(object, patterns=".*", xlim = NA, ylims = NA, xlab #' @importFrom graphics par title #' @export plot_ts.data.frame <- function(object, patterns=".*", xlim = NA, ylims = NA, xlab = "", ylabs = NA, - mains = NA, mainouter="", legendtexts = NA, xat = NA, usely=FALSE, plotit = TRUE, p = NA, namesdata=NA, ...) { + mains = NA, mainouter="", legendtexts = NA, colormaps = NA, xat = NA, + usely=FALSE, plotit = TRUE, p = NA, namesdata=NA, ...) { # Take par_ts setup parameters from options if there p <- par_ts(fromoptions=TRUE, p=p, ...) # @@ -183,19 +185,20 @@ plot_ts.data.frame <- function(object, patterns=".*", xlim = NA, ylims = NA, xla if(is.null(data[ ,p$xnm])){ warning("No 't' or xnm found. If time is not in 't', then specify it in xnm (either as argument or in options(\"par_ts\").")} # if(!is.null(data[ ,p$xnm])){ - if(is.na(xlim[1])) { xlim[1] <- data[1,p$xnm] } + if(is.na(xlim[1])) { xlim[1] <- data[1,p$xnm]-1 } # if the xlim min is na, then take all points, so -1 since time point is always end of sampling (assumed in in_range) if(length(xlim)==1) { xlim[2] <- data[nrow(data),p$xnm] } data <- data[in_range(xlim[1], data[ ,p$xnm], xlim[2]), ] } # More checking if(nrow(data) == 0){ stop(pst("No data in the time range. ",xlim[1]," to ",xlim[2]))} # Extend all individual plots vars, if not set - if(is.na(mains[1])){ mains <- rep(NA,length(patterns)) } + if(is.na(mains[1]) & length(mains)==1){ mains <- rep(NA,length(patterns)) } mainsline <- p$mainsline - if(is.na(mainsline[1])){ mainsline <- rep(NA,length(patterns)) } - if(is.na(ylims[1])){ ylims <- as.list(rep(NA,length(patterns))) } - if(is.na(ylabs[1])){ ylabs <- rep(NA,length(patterns)) } - if(is.na(legendtexts[1])){ legendtexts <- as.list(rep(NA,length(patterns))) } + if(is.na(mainsline[1]) & length(mainsline)==1){ mainsline <- rep(NA,length(patterns)) } + if(is.na(ylims[1]) & length(ylims)==1){ ylims <- as.list(rep(NA,length(patterns))) } + if(is.na(ylabs[1]) & length(ylabs)==1){ ylabs <- rep(NA,length(patterns)) } + if(is.na(legendtexts[1]) & length(legendtexts)==1){ legendtexts <- as.list(rep(NA,length(patterns))) } + if(is.na(colormaps[1]) & length(colormaps)==1){ colormaps <- as.list(rep(NA,length(patterns))) } # if(usely){ # with plotly @@ -256,7 +259,7 @@ plot_ts.data.frame <- function(object, patterns=".*", xlim = NA, ylims = NA, xla # L <- lapply(1:length(patterns), function(i){ df <- plot_ts_series(data, patterns[i], iplot=i, ylim=ylims[[i]], xlab=xlab, legendtext = legendtexts[[i]], - main=mains[i], mainline=mainsline[i], xat = xat, plotit=plotit, p=p, namesdata=namesdata, + main=mains[i], mainline=mainsline[i], colormap=colormaps[[i]], xat = xat, plotit=plotit, p=p, namesdata=namesdata, xaxis=(i==length(patterns)), ...) title(mainouter, outer=TRUE) if (!is.na(ylabs[1])){ @@ -300,7 +303,8 @@ plot_ts_iseq <- function(data, pattern, xnm, namesdata){ # Plot all columns found with regex pattern #' @importFrom graphics plot lines axis title axis.POSIXct mtext par legend plot_ts_series <- function(data, pattern, iplot = 1, - ylim = NA, xlab = "", main = "", mainline = -1.2, legendtext = NA, xat = NA, plotit = TRUE, p = NA, namesdata = NA, xaxis = TRUE, ...) { + ylim = NA, xlab = "", main = "", mainline = -1.2, colormap = NA, legendtext = NA, + xat = NA, plotit = TRUE, p = NA, namesdata = NA, xaxis = TRUE, ...) { # # Take par_ts setup parameters from options or defaults p <- par_ts(fromoptions=TRUE, p=p, ...) @@ -333,7 +337,7 @@ plot_ts_series <- function(data, pattern, iplot = 1, ylim <- c(0,1) }else{ ylim <- range(data[, iseq], na.rm = TRUE) - if(any(is.na(ylim))){ + if(any(is.na(ylim)) | any(is.infinite(ylim))){ legendtext <- pst(pattern," all NA") colormap <- 1 ylim <- c(0,1) @@ -342,7 +346,9 @@ plot_ts_series <- function(data, pattern, iplot = 1, } } # - colormap <- p$colorramp(length(iseq)) + if(is.na(colormap[1])){ + colormap <- p$colorramp(length(iseq)) + } # # EXTEND THE YLIM: to make room for multiple plots ylim <- ylim + c(-1,1) * diff(ylim) * p$ylimextend @@ -473,7 +479,7 @@ plot_ts_series <- function(data, pattern, iplot = 1, #' @export plot_ts.rls_fit <- function(object, patterns = c("^y$|^Yhat$","^Residuals$","CumAbsResiduals$",pst("^",names(fit$Lfitval[[1]]),"$")), xlim = NA, ylims = NA, xlab = "", ylabs = NA, mains = "", mainouter="", legendtexts = NA, - xat = NA, usely=FALSE, plotit=TRUE, p=NA, kseq = NA, ...){ + colormaps = NA, xat = NA, usely=FALSE, plotit=TRUE, p=NA, kseq = NA, ...){ fit <- object # Calculate the residuals Residuals <- residuals(fit) @@ -528,7 +534,7 @@ plot_ts.rls_fit <- function(object, patterns = c("^y$|^Yhat$","^Residuals$","Cum #patterns[patterns == "!!RLSinputs!!"] <- pst("^",nmsinput,"$")) # Make a plot of the RLS coefficients for each horizon plot_ts(data, patterns, xlim = xlim, ylims = ylims, xlab = xlab, ylabs = ylabs, - mains = mains, mainouter=mainouter, legendtexts = legendtexts, xat = xat, usely=usely, p=p, kseq = kseq, ...) + mains = mains, mainouter=mainouter, legendtexts = legendtexts, colormaps=colormaps, xat = xat, usely=usely, p=p, kseq = kseq, ...) } # Return the data invisible(data) diff --git a/R/plotly_ts.R b/R/plotly_ts.R index f06b1170cb8ec3cf41cf0ddd5eac4681f3fc0061..58dc3465824ea28a74d169e281a9dd4a9382d9b8 100644 --- a/R/plotly_ts.R +++ b/R/plotly_ts.R @@ -23,22 +23,22 @@ #' @export plotly_ts <- function(object, patterns=".*", xlim = NA, ylims = NA, xlab = "", ylabs = NA, - mains = "", mainouter="", legendtexts = NA, xat = NA, usely = FALSE, p = NA, ...){ + mains = "", mainouter="", legendtexts = NA, colormaps = NA, xat = NA, usely = FALSE, p = NA, ...){ UseMethod("plotly_ts") } #' @export plotly_ts.data.list <- function(object, patterns=".*", xlim = NA, ylims = NA, xlab = "", ylabs = NA, - mains = "", mainouter="", legendtexts = NA, xat = NA, usely=TRUE, p=NA, kseq = NA, ...) { + mains = "", mainouter="", legendtexts = NA, colormaps = NA, xat = NA, usely=TRUE, p=NA, kseq = NA, ...) { plot_ts.data.list(object=object, patterns=patterns, xlim = xlim, ylims = ylims, xlab = xlab, ylabs = ylabs, - mains = mains, mainouter=mainouter, legendtexts = legendtexts, xat = xat, usely = usely, p = p, kseq=kseq, ...) + mains = mains, mainouter=mainouter, legendtexts = legendtexts, colormaps = colormaps, xat = xat, usely = usely, p = p, kseq=kseq, ...) } #' @export plotly_ts.data.frame <- function(object, patterns=".*", xlim = NA, ylims = NA, xlab = "", ylabs = NA, - mains = "", mainouter="", legendtexts = NA, xat = NA, usely=TRUE, p=NA, namesdata=NA, ...) { + mains = "", mainouter="", legendtexts = NA, colormaps = NA, xat = NA, usely=TRUE, p=NA, namesdata=NA, ...) { plot_ts.data.frame(object=object, patterns=patterns, xlim = xlim, ylims = ylims, xlab = xlab, ylabs = ylabs, - mains = mains, mainouter=mainouter, legendtexts = legendtexts, xat = xat, usely = usely, p = p, namesdata=namesdata, ...) + mains = mains, mainouter=mainouter, legendtexts = legendtexts, colormaps = colormaps, xat = xat, usely = usely, p = p, namesdata=namesdata, ...) } ## plotly_ts.rls_fit <- function(fit, xlim=NA, kseq=NA, plotit=TRUE){ diff --git a/R/residuals.R b/R/residuals.R index a87a87be0fe34879c1762ed1ea2d0c2143ae193e..16b2dbfe51dc83d58aa6851b4b9c9e660fec7d9e 100644 --- a/R/residuals.R +++ b/R/residuals.R @@ -8,9 +8,11 @@ #' @param object The forecast matrix (a data.frame with kxx as column names, Yhat in returned fits). #' @param y The observations vector. #' @param ... Not used. -#' @return A data.frame with the residuals for each horizon. +#' @return If object is a matrix or data.frame: a data.frame with the residuals for each horizon. +#' If object is a list: A list with residuals from each element. #' #' @examples +#' # ?? list example #' # Just a vector to be forecasted #' n <- 100 #' D <- data.list() @@ -41,11 +43,11 @@ #' @rdname residuals #' @export residuals.data.frame <- function(object, y, ...){ - Yhat <- object - # Add some checking at some point - Residuals <- y - lagdf(Yhat, "+k") + # Improvements: Add some checking at some point + # Calculate the residuals + Residuals <- y - lagdf(object, "+k") # Named with hxx (it's not a forecast, but an observation available at t) - names(Residuals) <- gsub("k","h",names(Residuals)) + nams(Residuals) <- gsub("k","h",nams(Residuals)) # return(Residuals) } @@ -55,10 +57,16 @@ residuals.data.frame <- function(object, y, ...){ residuals.matrix <- residuals.data.frame #' @rdname residuals -#' @param object The value from a fit a forecastmodel (currently \code{\link{lm_fit}} or \code{\link{rls_fit}}. -#' @param ... Not used. +#' @export +residuals.list <- function(object, y, ...){ + # Each element is a forecast matrix, so do it on each + return(lapply(object, residuals, y=y)) +} + +#' @rdname residuals #' @export residuals.forecastmodel_fit <- function(object, ...){ fit <- object - residuals(fit$Yhat, fit$data[[fit$model$output]]) + return(residuals(fit$Yhat, fit$data[[fit$model$output]])) } + diff --git a/R/rls_fit.R b/R/rls_fit.R index 72db60e4000120921bb32b46769177861274c7d9..794f3271be992c23142c8e2a26032c97b56119b3 100644 --- a/R/rls_fit.R +++ b/R/rls_fit.R @@ -96,7 +96,7 @@ #' # See rmse as a function of horizon #' fit <- rls_fit(val$par, model, D, scorefun = rmse) #' plot(fit$scoreval, xlab="Horizon k", ylab="RMSE") -#' # See ?score_fit for a little more consistent way of calculating this +#' # See ?score for a little more consistent way of calculating this #' #' #' # Try adding a low-pass filter to Ta @@ -136,7 +136,7 @@ rls_fit <- function(prm=NA, model, data, scorefun = NA, returnanalysis = TRUE, } } - # First insert the prm into the model input expressions + # First insert the prm into the model input expressions (if prm is NA nothing is inserted) model$insert_prm(prm) # Since rls_fit is run from scratch, the init the stored inputs data (only needed when running iteratively) @@ -174,7 +174,7 @@ rls_fit <- function(prm=NA, model, data, scorefun = NA, returnanalysis = TRUE, Yhat <- lapply_cbind_df(Lresult, function(x){ x$yhat }) - nams(Yhat) <- pst("k",model$kseq) + nams(Yhat) <- pst("k", model$kseq) # Maybe crop the output if(!is.na(model$outputrange[1])){ Yhat[Yhat < model$outputrange[1]] <- model$outputrange[1] } diff --git a/R/rls_optim.R b/R/rls_optim.R index a5458d335be10ffe63ed858e5b87b527d0a3d55b..40a30660acc40bf9fb2c0ee43befd9f33d8ebb24 100644 --- a/R/rls_optim.R +++ b/R/rls_optim.R @@ -13,13 +13,15 @@ #' \code{cachedir} argument (relative to the current working directory). #' E.g. \code{rls_optim(model, D, cachedir="cache")} will write a file in the folder 'cache', such that #' next time the same call is carried out, then the file is read instead of running the optimization again. -#' See the example in url{https://onlineforecasting.org/vignettes/nice-tricks.html}. +#' See the example in \url{https://onlineforecasting.org/vignettes/nice-tricks.html}. #' #' @title Optimize parameters for onlineforecast model fitted with RLS #' @param model The onlineforecast model, including inputs, output, kseq, p -#' @param data The data.list including the variables used in the model. +#' @param data The data.list which holds the data on which the model is fitted. +#' @param kseq The horizons to fit for (if not set, then model$kseq is used) #' @param scorefun The function to be score used for calculating the score to be optimized. #' @param cachedir A character specifying the path (and prefix) of the cache file name. If set to \code{""}, then no cache will be loaded or written. See \url{https://onlineforecasting.org/vignettes/nice-tricks.html} for examples. +#' @param cachererun A logical controlling whether to run the optimization even if the cache exists. #' @param printout A logical determining if the score function is printed out in each iteration of the optimization. #' @param method The method argument for \code{\link{optim}}. #' @param ... Additional parameters to \code{\link{optim}} @@ -59,7 +61,7 @@ #' #' #' @export -rls_optim <- function(model, data, scorefun = rmse, cachedir="", printout=TRUE, method="L-BFGS-B", ...){ +rls_optim <- function(model, data, kseq = NA, scorefun = rmse, cachedir="", cachererun=FALSE, printout=TRUE, method="L-BFGS-B", ...){ # Take the parameters bounds from the parameter bounds set in the model init <- model$get_prmbounds("init") lower <- model$get_prmbounds("lower") @@ -68,24 +70,38 @@ rls_optim <- function(model, data, scorefun = rmse, cachedir="", printout=TRUE, if(any(is.na(lower))){ lower[is.na(lower)] <- -Inf} if(any(is.na(upper))){ lower[is.na(upper)] <- Inf} + # Clone the model no matter what (at least model$kseq should not be changed no matter if optimization is stopped) + m <- model$clone_deep() + if(!is.na(kseq[1])){ + m$kseq <- kseq + }else if(!is.na(m$kseqopt[1])){ + m$kseq <- m$kseqopt + } + + # Caching the results based on some of the function arguments if(cachedir != ""){ # Have to insert the parameters in the expressions to get the right state of the model for unique checksum - model$insert_prm(init) - # Give all the elements needed to calculate the unique cache name - # This is maybe smarter, don't have to calculate the transformation of the data: cnm <- cache_name(model$regprm, getse(model$inputs, nms="expr"), model$output, model$prmbounds, model$kseq, data, objfun, init, lower, upper, cachedir = cachedir) + m$insert_prm(init) # Have to reset the state first to remove dependency of previous calls - model$reset_state() - cnm <- cache_name(rls_fit, rls_optim, model$outputrange, model$regprm, model$transform_data(data), data[[model$output]], scorefun, init, lower, upper, cachedir = cachedir) - # Maybe load the cached result - if(file.exists(cnm)){ return(readRDS(cnm)) } + m$reset_state() + # Give all the elements needed to calculate the unique cache name + # This is maybe smarter, don't have to calculate the transformation of the data: cnm <- cache_name(m$regprm, getse(m$inputs, nms="expr"), m$output, m$prmbounds, m$kseq, data, objfun, init, lower, upper, cachedir = cachedir) + cnm <- cache_name(rls_fit, rls_optim, m$outputrange, m$regprm, m$transform_data(data), data[[m$output]], scorefun, init, lower, upper, kseq, cachedir = cachedir) + # Load the cached result if it exists + if(file.exists(cnm) & !cachererun){ + res <- readRDS(cnm) + # Set the optimized parameters into the model + model$insert_prm(res$par) + return(res) + } } # Run the optimization res <- optim(par = init, fn = rls_fit, # Parameters to pass to rls_fit - model = model, + model = m, data = data, scorefun = scorefun, printout = printout, @@ -94,10 +110,10 @@ rls_optim <- function(model, data, scorefun = rmse, cachedir="", printout=TRUE, lower = lower, upper = upper, method = method, - ...) - + ...) # Save the result in the cachedir - if(cachedir != ""){ cache_save(res, cnm) } - # Return the result + if(cachedir != ""){ cache_save(res, cnm)} + # Set the optimized parameters into the model + model$insert_prm(res$par) return(res) } diff --git a/R/rls_reduce.R b/R/rls_reduce.R deleted file mode 100644 index ff981382a20d775d90a0fe0eb87e0e8197f9d944..0000000000000000000000000000000000000000 --- a/R/rls_reduce.R +++ /dev/null @@ -1,74 +0,0 @@ -#' @importFrom parallel mclapply -rls_reduce <- function(model, data, preduce=list(NA), scorefun = rmse){ - ## prm test - ##preduce <- list(I__degree = c(min=1, init=7), mu_tday__nharmonics = c(min=1, init=7)) - prmin <- unlist(getse(preduce, 1)) - pr <- unlist(getse(preduce, 2)) - ##!! deep=TRUE didn't work, gave: "Error: C stack usage 9524532 is too close to the limit" - m <- model$clone_deep() - ## Insert the starting p reduction values - if(!is.na(preduce[1])){ - m$insert_prm(pr) - } - ## - valref <- rls_optim(m, data, printout=FALSE)$value - ## - while(TRUE){ - ## - message("------------------------------------") - message("Reference score value",valref) - ## -------- - ## Remove inputs one by one - message("\nRemoving inputs one by one") - valsrm <- mclapply(1:length(model$inputs), function(i){ - mr <- m$clone_deep() - mr$inputs[[i]] <- NULL - rls_optim(mr, data, printout=FALSE)$value - }) - valsrm <- unlist(valsrm) - names(valsrm) <- names(m$inputs) - message("Scores") - print(valsrm) - ## -------- - ## Reduce parameter values if specified - if(!is.na(pr[1])){ - message("\nReducing prm with -1 one by one") - valspr <- mclapply(1:length(pr), function(i){ - mr <- m$clone_deep() - p <- pr - ## Only count down if above minimum - if( p[i] >= prmin[i] ){ - p[i] <- p[i] - 1 - } - mr$insert_prm(p) - val <- rls_optim(mr, data, printout=FALSE)$value - ## - return(val) - }) - valspr <- unlist(valspr) - names(valspr) <- names(pr) - message("Scores") - print(valspr) - } - ## Is one the reduced smaller than the current ref? - if( min(c(valsrm,valspr)) < valref ){ - if(which.min(c(min(valsrm),min(valspr))) == 1){ - ## One of the models with one of the inputs removed is best - imin <- which.min(valsrm) - message("Removing input",names(m$inputs)[imin]) - m$inputs[[imin]] <- NULL - }else{ - ## One of the models with reduced parameter values is best - imin <- which.min(valspr) - pr[imin] <- pr[imin] - 1 - m$insert_prm(pr) - message("Reduced parameter",names(pr)[imin],"to:",pr[imin]) - } - valref <- min(c(valsrm,valspr)) - }else{ - ## No improvement obtained from reduction, so return the current model - message("------------------------------------\n\nDone") - return(m) - } - } -} diff --git a/R/rls_summary.R b/R/rls_summary.R index c3659a1ede4f59b448a582bc3b4f5abba01d3c79..15e1610442543b6c21f489bfc61ffe6fdc34af17 100644 --- a/R/rls_summary.R +++ b/R/rls_summary.R @@ -27,7 +27,6 @@ #' @param object of class \code{rls_fit}, so a fit calculated by \code{\link{rls_fit}}. #' @param scoreperiod logical (or index). If this scoreperiod is given, then it will be used over the one in the fit. #' @param scorefun The score function to be applied on each horizon. -#' @param usecomplete Use on the set of observations which is complete on all horizons. #' @param printit Print the result. #' @param ... Not used. #' @return A list of: @@ -67,20 +66,23 @@ #' #' @importFrom stats sd #' @export -rls_summary <- function(object, scoreperiod = NA, scorefun = rmse, usecomplete = TRUE, printit = TRUE, ...){ +rls_summary <- function(object, scoreperiod = NA, scorefun = rmse, printit = TRUE, ...){ fit <- object # + if(is.na(scoreperiod[1])){ + scoreperiod <- fit$data$scoreperiod + } + # scipen <- options(scipen=10)$scipen - # - tmp <- score_fit(fit, scoreperiod, usecomplete, scorefun) - scoreval <- tmp$scoreval - scoreperiodused <- tmp$scoreperiod - retval <- list(scorefun = scorefun, scoreval = scoreval, scoreperiod = scoreperiodused) + # Calculate the score for each horizon + scoreval <- score(residuals(fit), scoreperiod, scorefun=scorefun) + # The result to return + retval <- list(scorefun = scorefun, scoreval = scoreval, scoreperiod = scoreperiod) # Return the result before print? if(!printit){ return(retval) } - # Insert the optimized parameters + # Insert the optimized parameters (or actually $prm are just the last parameters given to insert_prm()) m <- fit$model$clone_deep() m$prm[names(m$prm)] <- signif(m$prm, digits=3) m$insert_prm(m$prm) @@ -91,12 +93,12 @@ rls_summary <- function(object, scoreperiod = NA, scorefun = rmse, usecomplete = cat(" ",names(m$regprm)[i],"=",unlist(m$regprm[i]),"\n") } # - cat("\nScoreperiod:",sum(scoreperiodused),"observations are included.\n") + cat("\nScoreperiod:",sum(scoreperiod),"observations are included.\n") # cat("\nRLS coeffients summary stats (cannot be used for significance tests):\n") coef <- t(sapply(1:length(fit$Lfitval[[1]]), function(i){ val <- sapply(fit$Lfitval, function(Theta){ - Theta[scoreperiodused,i] + Theta[scoreperiod,i] }) # m <- mean(val,na.rm=TRUE) @@ -126,7 +128,7 @@ rls_summary <- function(object, scoreperiod = NA, scorefun = rmse, usecomplete = cat(pst("\n",toupper(scorename),":\n")) print(tmp) cat("\n") - invisible(list(scorefun = scorefun, scoreval = scoreval, scoreperiod = scoreperiodused)) + invisible(list(scorefun = scorefun, scoreval = scoreval, scoreperiod = scoreperiod)) } #' @importFrom stats sd diff --git a/R/rmse.R b/R/rmse.R index 12de721a1433446a57d6cf8be810c1a57c0b98e9..f90710d4bf68e1b3b7423ed8bbc0e8a920bd09f8 100644 --- a/R/rmse.R +++ b/R/rmse.R @@ -13,7 +13,7 @@ #' @title Computes the RMSE score. #' @param x a numerical vector of residuals. #' @return The RMSE score. -#' @seealso \code{\link{score}()} for calculation of a score for the k'th horizon, and \code{\link{score_fit}()} which takes a forecastmodel fit and returns score taking scoreperiod etc. into account. +#' @seealso \code{\link{score}()} for calculation of a score for the k'th horizon #' @name rmse #' @examples #' diff --git a/R/score.R b/R/score.R index e394c3d99056cdd1cc84814311181a57a21bd796..0c13add2919d963fed646d293482a67801802eac 100644 --- a/R/score.R +++ b/R/score.R @@ -9,10 +9,11 @@ #' Applies the \code{scorefun} on all horizons (each column) of the residuals matrix. See the description of each parameter for more details. #' #' @title Calculate the score for each horizon. -#' @param Residuals A matrix with residuals (columns named \code{hxx}) for which to calculate the score for each horizon. -#' @param scoreperiod as a logical vector controlling which points to be included in the score calculation. If NA then all values are included. -#' @param usecomplete if TRUE then only the values available for all horizons are included (i.e. if at one time point there is a missing value, then values for this time point is removed for all horizons in the calculation). +#' @param object ??list or A matrix with residuals (columns named \code{hxx}) for which to calculate the score for each horizon. +#' @param scoreperiod as a logical vector controlling which points to be included in the score calculation. If NA then all values are included (depeding on 'complete'). +#' @param usecomplete TRUE then only the values available for all horizons are included (i.e. if at one time point there is a missing value, then values for this time point is removed for all horizons in the calculation). #' @param scorefun The score function. +#' @param ... is passed on to the score function. #' @return A list with the a numeric vector with the score value for each horizon and the applied \code{scoreperiod} (note can be different from the given scoreperiod, if only complete observations are used (as per default)). #' @examples #' @@ -24,36 +25,74 @@ #' Resid <- residuals(Yhat, y) #' #' # Calculate the score for the k1 horizon -#' score(Resid)$scoreval +#' score(Resid) #' -#' # The first values were excluded, since there are NAs +#' # In the beginning the horizons have NAs #' head(Resid) -#' score(Resid)$scoreperiod +#' # Default is that only complete cases over all horizons are included +#' score(Resid) +#' # So including all cases for each horizon will give different values +#' score(Resid, usecomplete=FALSE) #' -#' @importFrom stats complete.cases +#' # Given a list +#' # The residuals for each horizon +#' Resid2 <- residuals(persistence(y,kseq=1:4)+rnorm(100), y) +#' +#' score(list(Resid,Resid2)) + +#' @rdname score #' @export -score <- function(Residuals, scoreperiod = NA, usecomplete = TRUE, scorefun = rmse){ - # If no scoreperiod is given, then use all - if(is.na(scoreperiod[1])){ - scoreperiod <- rep(TRUE,nrow(Residuals)) +score <- function(object, scoreperiod = NA, usecomplete = TRUE, scorefun = rmse, ...){ + UseMethod("score") +} + + +#' @rdname score +#' @export +score.list <- function(object, scoreperiod = NA, usecomplete = TRUE, scorefun = rmse, ...){ + # If only on element + if(length(object) == 1){ + return(as.matrix(score(object[[1]], scoreperiod, usecomplete, scorefun, ...))) + } + # Use only complete cases + if(usecomplete){ + tmp <- complete_cases(object) }else{ - # Do checking of scoreperiod - txt <- "It must be set to an index (int or logical) defining which points to be evaluated in the scorefun()." - if( length(scoreperiod) != nrow(Residuals) ){ - stop("scoreperiod is not same length as nrow(Residuals): ",txt) - }else{ - if( all(is.na(scoreperiod)) ){ stop("scoreperiod is all NA: ",txt) } - } + tmp <- rep(TRUE,nrow(object)) + } + if(!is.na(scoreperiod[1])){ + scoreperiod <- tmp & scoreperiod + }else{ + scoreperiod <- tmp + } + # Run on each element, usecomplete has been dealt with + lapply_cbind(object, score, usecomplete=FALSE, scoreperiod=scoreperiod, scorefun=scorefun, ...=...) +} + + +#' @rdname score +#' @export +score.data.frame <- function(object, scoreperiod = NA, usecomplete = TRUE, scorefun = rmse, ...){ + if(is.na(scoreperiod[1])){ + scoreperiod <- rep(TRUE,nrow(object)) } - # Take only the rows which have a value for each horizon? if(usecomplete){ - scoreperiod <- scoreperiod & complete.cases(Residuals) + scoreperiod <- complete_cases(object) & scoreperiod + } + # If no scoreperiod is given, then use all + # Do checking of scoreperiod + txt <- "It must be set to an index (int or logical) defining which points to be evaluated in the scorefun()." + if( length(scoreperiod) != nrow(object) ){ + stop("scoreperiod is not same length as nrow(object): ",txt) + }else{ + if( all(is.na(scoreperiod)) ){ stop("At least one forecast horizon or scoreperiod is all NA: ",txt) } } # Calculate the objective function for each horizon - scoreval <- sapply(1:ncol(Residuals), function(i){ - scorefun(Residuals[scoreperiod,i]) + scoreval <- sapply(1:ncol(object), function(i){ + scorefun(object[scoreperiod,i], ...) }) - nams(scoreval) <- gsub("h","k",nams(Residuals)) + nams(scoreval) <- gsub("h","k",nams(object)) # - return(list(scoreval=scoreval,scoreperiod=scoreperiod)) + return(scoreval) } + diff --git a/R/score_fit.R b/R/score_fit.R deleted file mode 100644 index 5ddb7a0f41ad39d84ba63ce0ec02ad5dce8dddac..0000000000000000000000000000000000000000 --- a/R/score_fit.R +++ /dev/null @@ -1,58 +0,0 @@ - -#' Calculate the score for each horizon for a forecast model fit. -#' -#' For evaluation of the score on each horizon, as specified in the fit. Use it for a consistent evaluation. -#' -#' @title Calculates scores for a forecast model fit. -#' @param fit A model fit -#' @param scoreperiod as an index (logical or integer) defining which points to inlude in the score calculation. If NA, the \code{scoreperiod} from the \code{fit$model} object is used. -#' @param usecomplete Only use points where -#' @param scorefun The score function applied, per default \code{\link{rmse}}. -#' @seealso \code{\link{rmse}} and \code{\link{score}} which are used in this function. -#' @return A list with: -#' - \code{scoreval} is the score value -#' - \code{scoreperiod} is the score period used (can be different that the one in arguments \code{fit} or \code{scoreperiod}) -#' - \code{scorename} is the name of the score function applied -#' @export -score_fit <- function(fit, scoreperiod = NA, usecomplete = TRUE, scorefun = rmse){ - # Calculate the score for each horizon - if("scorefun" %in% names(as.list(match.call()))){ - scorename <- as.list(match.call())$scorefun - }else{ - scorename = "rmse" - } - - # Check score period - txt <- "It must be set to an index (int or logical) defining which points to be evaluated in the scorefun()." - if(is.na(scoreperiod[1])){ - if("scoreperiod" %in% nams(fit$data)){ - scoreperiod <- fit$data$scoreperiod - }else{ - stop("scoreperiod is not set. Set it in the data used in the fit function or as argument in the present call: ",txt) - } - } - - # Calculate the Residuals if they were not in fit - if( !"Residuals" %in% names(fit) ){ - # Calculate the residuals - Residuals <- residuals(fit$Yhat, fit$data[fit$model$output]) - }else{ - Residuals <- fit$Residuals - } - - # Calculate the score - tmp <- score(Residuals, scoreperiod, usecomplete) - scoreval <- tmp$scoreval - scoreperiod <- tmp$scoreperiod - - # Give a warning if the score - if(!is.na(fit$scoreval[1])){ - if(length(fit$scoreval) != length(scoreval)){ - warning("fit contains 'scoreval', which is different in length than the scoreval calculated here (probably for different horizons (kseq))") - }else if(!all(fit$scoreval == scoreval)){ - warning("fit contains 'scoreval' which is different from the",scorename,"score calculated now (can also be because of 'usecomplete = TRUE', such that only points which have forecasts for all horizons are included.") - } - } - # - list(scoreval=scoreval, scoreperiod=scoreperiod, scorename=scorename) -} diff --git a/R/step_optim.R b/R/step_optim.R new file mode 100644 index 0000000000000000000000000000000000000000..3c84ec3b2bfc017adf59dea9716ee0b69bf0f905 --- /dev/null +++ b/R/step_optim.R @@ -0,0 +1,447 @@ +# Do this in a separate file to see the generated help: +#library(devtools) +#document() +#load_all(as.package("../../onlineforecast")) +#?step_optim + +#' Forward and backward model selection +#' +#' This function takes a model and carry out a model selection by stepping +#' backward, forward or in both directions. +#' +#' Note that mclapply is used. In order to control the number of cores to use, +#' then set it, e.g. to one core `options(mc.cores=1)`, which is needed for +#' debugging to work. +#' +#' The full model containing all inputs must be given. In each step new models +#' are generated, with either one removed input or one added input, and then all +#' the generated models are optimized and their scores compared. If any new +#' model have an improved score compared to the currently selected model, then +#' the new is selected and the process is repeated until no new improvement is +#' obtained. +#' +#' In addition to selecting inputs, then integer parameters can also be stepped +#' through, e.g. the order of basis splined or the number of harmonics in +#' Fourier series. +#' +#' The stepping process is different depending on the direction. In addition to +#' the full model, a starting model can be given, then the selection process +#' will start from that model. +#' +#' If the direction is "both", which is default (same as "backwardboth") then the +#' stepping is: +#' - In first step inputs are removed one-by-one +#' - In following steps, inputs still in the model are removed one-by-one, and +#' inputs not in the model are added one-by-one +#' +#' If the direction is "backwards": +#' - Inputs are only removed in each step +#' +#' If the direction is "forwardboth": +#' - In the first step all inputs are removed +#' - In following steps (same as "both") +#' +#' If the direction is "forward": +#' - In the first step all inputs are removed and from there inputs are only added +#' +#' +#' For stepping through integer variables in the transformation stage, then +#' these have to be set in the "prm" argument. The stepping process will follow +#' the input selection described above. +#' +#' In case of missing values, especially in combination with auto-regressive +#' models, it can be very important to make sure that only complete cases are +#' included when calculating the score. By providing the `fitfun` argument then +#' the score will be calculated using only the complete cases across horizons +#' and models in each step, see the last examples. +#' +#' Note, that either kseq or kseqopt must be set on the modelfull object. If kseqopt +#' is set, then it is used no matter the value of kseq. +#' +#' @title Forward and backward model selection +#' @param modelfull The full forecastmodel containing all inputs which will be +#' can be included in the selection. +#' @param data The data.list which holds the data on which the model is fitted. +#' @param prm A list of integer parameters to be stepped. Given using the same +#' syntax as parameters for optimization, e.g. `list(I__degree = c(min=3, +#' max=7))` will step the "degree" for input "I". +#' @param direction The direction to be used in the selection process. +#' @param modelstart A forecastmodel. If it's set then it will be used as the +#' selected model from the first step of the stepping. It should be a sub +#' model of the full model. +#' @param keepinputs If TRUE no inputs can be removed in a step, if FALSE then +#' any input can be removed. If given as a character vector with names of +#' inputs, then they cannot be removed in any step. +#' @param optimfun The function which will carry out the optimization in each +#' step. +#' @param fitfun A fit function, should be the same as used in optimfun(). If +#' provided, then the score is caculated with this function (instead of the +#' one called in optimfun(), hence the default is rls_fit(), which is called +#' in rls_optim()). Furthermore, information on complete cases are printed +#' and returned. +#' @param scorefun The score function used. +#' @param printout Logical. Passed on to fitting functions. +#' @param mc.cores The mc.cores argument of mclapply. If debugging it can be +#' necessary to set it to 1 to stop execution. +#' @param ... Additional arguments which will be passed on to optimfun. For +#' example control how many steps +#' +#' @return A list with the result of each step: +#' - '$model' is the model selected in each step +#' - '$score' is the score for the model selected in each step +#' - '$optimresult' the result return by the the optimfun +#' - '$completecases' a logical vector (NA if fitfun argument is not given) indicating which time points were complete across all horizons and models for the particular step. +#' +#' @examples +#' +#' +#' # The data, just a rather short period to keep running times short +#' D <- subset(Dbuilding, c("2010-12-15", "2011-02-01")) +#' # Set the score period +#' D$scoreperiod <- in_range("2010-12-22", D$t) +#' # +#' D$tday <- make_tday(D$t, 1:36) +#' # Generate an input which is just random noise, i.e. should be removed in the selection +#' set.seed(83792) +#' D$noise <- make_input(rnorm(length(D$t)), 1:36) +#' +#' # The full model +#' model <- forecastmodel$new() +#' # Set the model output +#' model$output = "heatload" +#' # Inputs (transformation step) +#' model$add_inputs(Ta = "Ta", +#' noise = "noise", +#' mu_tday = "fs(tday/24, nharmonics=5)", +#' mu = "one()") +#' # Regression step parameters +#' model$add_regprm("rls_prm(lambda=0.9)") +#' # Optimization bounds for parameters +#' model$add_prmbounds(lambda = c(0.9, 0.99, 0.9999)) +#' +#' # Select a model, in the optimization just run it for a single horizon +#' # Note that kseqopt could also be set +#' model$kseq <- 5 +#' +#' # Set the parameters to step on, note the +#' prm <- list(mu_tday__nharmonics = c(min=3, max=7)) +#' +#' # Note the control argument, which is passed to optim, it's now set to few +#' # iterations in the offline parameter optimization (MUST be increased in real applications) +#' control <- list(maxit=1) +#' +#' # On Windows multi cores are not supported, so for the examples use only one core +#' mc.cores <- 1 +#' +#' # Run the default selection scheme, which is "both" +#' # (same as "backwardboth" if no start model is given) +#' \donttest{L <- step_optim(model, D, prm, control=control, mc.cores=mc.cores) +#' +#' # The optim value from each step is returned +#' getse(L, "optimresult") +#' getse(L,"score") +#' +#' # The final model +#' L$final$model +#' +#' # Other selection schemes +#' Lforward <- step_optim(model, D, prm, "forward", control=control, mc.cores=mc.cores) +#' Lbackward <- step_optim(model, D, prm, "backward", control=control, mc.cores=mc.cores) +#' Lbackwardboth <- step_optim(model, D, prm, "backwardboth", control=control, mc.cores=mc.cores) +#' Lforwardboth <- step_optim(model, D, prm, "forwardboth", control=control, mc.cores=mc.cores) +#' +#' # It's possible avoid removing specified inputs +#' L <- step_optim(model, D, prm, keepinputs=c("mu","mu_tday"), control=control, mc.cores=mc.cores) +#' +#' # Give a starting model +#' modelstart <- model$clone_deep() +#' modelstart$inputs[2:3] <- NULL +#' L <- step_optim(model, D, prm, modelstart=modelstart, control=control, mc.cores=mc.cores) +#' +#' # If a fitting function is given, then it will be used for calculating the forecasts. +#' # Below it's the rls_fit function, so the same as used internally in rls_fit, so only +#' # difference is that now ONLY on the complete cases for all models in each step are used +#' # when calculating the score in each step +#' L1 <- step_optim(model, D, prm, fitfun=rls_fit, control=control, mc.cores=mc.cores) +#' +#' # The easiest way to conclude if missing values have an influence is to +#' # compare the selection result running with and without +#' L2 <- step_optim(model, D, prm, control=control, mc.cores=mc.cores) +#' +#' # Compare the selected models +#' tmp1 <- capture.output(getse(L1, "model")) +#' tmp2 <- capture.output(getse(L2, "model")) +#' identical(tmp1, tmp2)} +#' +#' +#' # Note that caching can be really smart (the cache files are located in the +#' # cachedir folder (folder in current working directory, can be removed with +#' # unlink(foldername)) See e.g. `?rls_optim` for how the caching works +#' # L <- step_optim(model, D, prm, "forward", cachedir="cache", cachererun=FALSE, mc.cores=mc.cores) +#' +#' @importFrom parallel mclapply +#' +#' @export + +step_optim <- function(modelfull, data, prm=list(NA), direction = c("both","backward","forward","backwardboth","forwardboth"), modelstart=NA, keepinputs = FALSE, optimfun = rls_optim, fitfun = NA, scorefun = rmse, printout = FALSE, mc.cores = getOption("mc.cores", 2L), ...){ + # Do: + # - checking of input, model, ... + # - Maybe have "cloneit" argument in optimfun, then don't clone inside optim. + # - Add argument controlling how much is kept in each iteration (e.g all fitted models) + # - Is raised as an issue: For score make sure that only the same points are included for all models, or print warning if not! + # - With lm we should use have some regularization, so use AIC or BIC as the score + # - Differentiate the parameters given to optimfun for the selected model and for the new models in each step. E.g. take more steps on the selected model. + # + # - Help: prm <- list(I__degree = c(min=1, max=7), mu_tday__nharmonics = c(min=1, max=7)) + # + # For keeping all the results + L <- list() + # The first value in direction is default + if(length(direction) > 1){ direction <- direction[1] } + # Init + istep <- 1 + # Different start up, if a start model is given + if( class(modelstart)[1] == "forecastmodel" ){ + # The full model will not be changed from here, so no need to clone it + mfull <- modelfull + m <- modelstart$clone() + }else{ + # No start model if given + mfull <- modelfull$clone_deep() + # Insert the starting prm values into the full model (must be in it, since added inputs are taken from there) + if(!is.na(prm[1])){ + if( direction == "backward" ){ + mfull$insert_prm(unlist(getse(prm, "max"))) + }else if( direction == "forward" ){ + mfull$insert_prm(unlist(getse(prm, "min"))) + }else{ + # Both directions, then start at init, or halfway between min and max + i <- which(sapply(prm, function(x){ "init" %in% names(x) })) + if(length(i)){ + mfull$insert_prm(unlist(getse(prm[i], "init"))) + } + i <- which(sapply(prm, function(x){ !"init" %in% names(x) })) + if(length(i)){ + tmp <- unlist(getse(prm[i], "min")) + mfull$insert_prm(tmp + round((unlist(getse(prm[i], "max")) - tmp) / 2)) + } + } + } + # Init current model + m <- mfull$clone_deep() + # + if(length(grep("forward",direction))){ + # Remove all inputs + m$inputs <- list() + # Start with no fit + istep <- 0 + scoreCurrent <- Inf + } + } + # Find the inputs to keep, if any + if(class(keepinputs) == "logical"){ + if(keepinputs){ + keepinputs <- nams(mfull$inputs) + }else{ + keepinputs <- "" + } + } + # Helper + c_mStep <- function(l, nms){ + names(l) <- nms + l <- l[!is.na(l)] + c(mStep, l) + } + # Go + done <- FALSE + while(!done){ + message("\n------------------------------------------------------------------------\n") + message(pst("Step ",istep,". Current model:")) + message(print(m)) + # If the init model is not yet optimized + if(istep == 1 & length(L) == 0){ + # Optimize + res <- optimfun(m, data, printout=printout, scorefun=scorefun, ...) + # Should we forecast only on the complete cases? + if(class(fitfun) == "function"){ + # Forecast to get the complete cases + mtmp <- m$clone_deep() + # If kseqopt is set, then make sure that it is used when fitting here + if(!is.na(m$kseqopt)){ + mtmp$kseq <- m$kseqopt + } + Yhat <- fitfun(res$par, mtmp, data, printout=printout)$Yhat + scoreCurrent <- sum(score(residuals(Yhat,data[[m$output]]),data$scoreperiod)) + casesCurrent <- complete_cases(Yhat) + }else{ + scoreCurrent <- res$value + casesCurrent <- NA + } + # Keep it + istep <- 1 + L[[istep]] <- list(model = m$clone_deep(), score = scoreCurrent, optimresult = res, completecases = casesCurrent) + } + + message("Current score: ",format(scoreCurrent,digits=7)) + if(class(fitfun) == "function"){ + message("Current complete cases: ",sum(casesCurrent)," (Diff in score from optim:",L[[istep]]$optimresult$value-scoreCurrent,")") + } + # Next step + istep <- istep + 1 + # -------- + mStep <- list() + + # Generate the input modified models + # Remove inputs + if(length(grep("backward|both", direction))){ + irm <- which(!nams(m$inputs) %in% keepinputs) + # Remove any? If only one input, then don't remove it + if(length(irm) & length(m$inputs) > 1){ + tmp <- lapply(irm, function(i){ + ms <- m$clone_deep() + ms$inputs[[i]] <- NULL + return(ms) + }) + mStep <- c_mStep(tmp, pst("-",names(m$inputs)[irm])) + } + } + + # Add inputs + if(length(grep("forward|both", direction))){ + # Add input one by one + iin <- which(!names(mfull$inputs) %in% names(m$inputs)) + if(length(iin)){ + tmp <- lapply(iin, function(i){ + ms <- m$clone_deep() + # Add one input + ms$inputs[[length(ms$inputs) + 1]] <- mfull$inputs[[i]] + names(ms$inputs)[length(ms$inputs)] <- names(mfull$inputs)[i] + # Sort according to the order in the full model + ms$inputs <- ms$inputs[order(match(names(ms$inputs), names(mfull$inputs)))] + return(ms) + }) + mStep <- c_mStep(tmp, pst("+",names(mfull$inputs)[iin])) + } + } + + # Step parameters + if(!is.na(prm[1])){ + if(length(grep("backward|both", direction))){ + # Count down the parameters one by one + tmp <- lapply(1:length(prm), function(i){ + p <- m$get_prmvalues(names(prm[i])) + # If the input is not in the current model, then p is NA, so don't include it for fitting + if(!is.na(p)){ + # Only the ones with prms above minimum + if(prm[[i]]["min"] < p){ + ms <- m$clone_deep() + p <- p - 1 + ms$insert_prm(p) + # Return both the prm value and the name of the model to be printed + return(list(ms, pst(names(p),"=",p))) + } + } + return(list(NA,NA)) + }) + mStep <- c_mStep(getse(tmp,1), getse(tmp,2)) + } + if(length(grep("forward|both", direction))){ + # Count up the parameters one by one + tmp <- mclapply(1:length(prm), function(i){ + p <- m$get_prmvalues(names(prm[i])) + # If the input is not in the current model, then p is NA, so don't include it for fitting + if(!is.na(p)){ + # Only the ones with prms above minimum + if(p < prm[[i]]["max"]){ + ms <- m$clone_deep() + p <- p + 1 + ms$insert_prm(p) + # Return both the prm value and the name of the model to be printed + return(list(ms, pst(names(p),"=",p))) + } + } + return(list(NA,NA)) + }) + mStep <- c_mStep(getse(tmp,1), getse(tmp,2)) + } + } + + # Optimize all the new models + if(length(mStep) == 0){ + # Nothing to do, so stop + done <- TRUE + }else{ + + # Run the optimization + Lstep <- mclapply(1:length(mStep), function(i, ...){ + optimfun(mStep[[i]], data, printout=printout, scorefun=scorefun, ...) + }, mc.cores=mc.cores, ...) + names(Lstep) <- names(mStep) + + # Complete cases considered: Should we forecast and recalculate the score on complete cases from all models? + if(class(fitfun) == "function"){ + LYhat <- mclapply(1:length(mStep), function(i){ + mtmp <- mStep[[i]]$clone_deep() + # If kseqopt is set, then make sure that it is used when fitting here + if(!is.na(m$kseqopt)){ + mtmp$kseq <- m$kseqopt + } + fitfun(Lstep[[i]]$par, mtmp, data, printout=printout)$Yhat + }, mc.cores=mc.cores) + # Use complete cases across models and horizons per default + scoreStep <- apply(score(residuals(LYhat,data[[m$output]]), data$scoreperiod), 2, sum) + casesStep <- sapply(LYhat, complete_cases) + }else{ + # Use the scores from optimfun + scoreStep <- unlist(getse(Lstep, "value")) + casesStep <- matrix(rep(NA,length(mStep)), nrow=1) + } + + # Print out + tmp <- as.matrix(unlist(getse(Lstep, "value"))) + if(scoreCurrent == Inf){ + tmp[ ,1] <- pst(format(tmp, digits=2)) + nams(tmp) <- "Score" + }else{ + tmp[ ,1] <- pst(format(100 * (scoreCurrent - tmp) / scoreCurrent, digits=2),"%") + nams(tmp) <- "Improvement" + } + if(class(fitfun) == "function"){ + tmp <- cbind(tmp, apply(casesStep != casesCurrent, 2, sum)) + nams(tmp)[2] <- "CasesDiff" + } + print_to_message(tmp) + + # Compare scores: Is one the step models score smaller than the current ref? + imin <- which.min(scoreStep) + if( scoreStep[imin] < scoreCurrent ){ + # Keep the best model (with it's optimized parameters) + m <- mStep[[imin]] + res <- Lstep[[imin]] + scoreCurrent <- scoreStep[[imin]] + casesCurrent <- casesStep[ ,imin] + # Insert the optimized parameters, such that next step starts at the optimized parameter values + if(is.null(names(res$par))){ + names(res$par) <- row.names(m$prmbounds) + } + m$prmbounds[names(res$par),"init"] <- res$par + # Keep for the final result + m$insert_prm(res$par) + L[[istep]] <- list(model = m$clone_deep(), score = scoreCurrent, optimresult = res, completecases = casesCurrent) + }else{ + # No improvement obtained from reduction, so return the current model (last in the list) + done <- TRUE + } + } + if(done){ + message("\n------------------------------------------------------------------------\n\nFinal model:") + message(print(m)) + } + } + if(length(L) == 1){ + names(L) <- "final" + }else{ + names(L) <- c(pst("step",1:(length(L)-1)),"final") + } + invisible(L) +} diff --git a/cran-comments.md b/cran-comments.md index a3024177f22c1ea47ce7f56edaff52697a199645..437dde459efb458e3fc75cff34c6d713f5e318bb 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,3 +1,15 @@ +#---------------------------------------------------------------- +# v0.10.0 + +We have added features and done some small changes to functions. This version +should be fully backward compatible. + + + + + + + #---------------------------------------------------------------- # v0.9.3 # Response to review of v0.9.2 by Uwe Ligges diff --git a/inst/CITATION b/inst/CITATION index 2640aff46571d5ba6daa7b7a4c56f25b4747a286..a7bf2f42f7926cf58a1e068d906609aef6040430 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -15,13 +15,3 @@ citEntry( "We are in process of writing a journal paper about the package, but for now we referer to the paper 'Short-term heat load forecasting for single family houses', in which the implemented modelling is described." ) ) - - -citEntry( - entry = "Manual", - title = "{{onlineforecast}: Forecast Modelling for Online Applications}", - author = "Peder Bacher and Hjörleifur G. Bergsteinsson", - year = "2020", - note = "R package version 0.9.3", - url = "https://onlineforecasting.org" -) diff --git a/make.R b/make.R index 25614e49c15013cb506efa20af15ca5e36dff562..857c1a06851ee2d62b75f70a8f90335ac3566998 100644 --- a/make.R +++ b/make.R @@ -31,7 +31,7 @@ library(roxygen2) # Misc # # Add new vignette -#usethis::use_vignette("setup-data") +# Don't use name of existing file, it will overwrite! usethis::use_vignette("model-selection") # ---------------------------------------------------------------- @@ -41,7 +41,7 @@ library(roxygen2) # http://r-pkgs.had.co.nz/tests.html # # Initialize first time the the testing framework -#use_testthat() +# use_testthat() # Init new test #use_test("newtest") @@ -50,10 +50,10 @@ library(roxygen2) #test() # # Run the examples +#load_all(as.package("../onlineforecast")) #run_examples() # # Run tests in a single file -# load_all(as.package("../onlineforecast")) # test_file("tests/testthat/test-rls-heat-load.R") @@ -63,7 +63,7 @@ document() build(".", vignettes=TRUE) # Install it -install.packages("../onlineforecast_0.9.3.tar.gz") +install.packages("../onlineforecast_0.10.0.tar.gz") library(onlineforecast) # ---------------------------------------------------------------- @@ -80,11 +80,11 @@ library(onlineforecast) # Test before release devtools::check() -devtools::check_built("../onlineforecast_0.9.3.tar.gz") +devtools::check_built("../onlineforecast_0.10.0.tar.gz") # Does give different results than check() above -#system("R CMD check --as-cran ../onlineforecast_0.9.3.tar.gz") -system("R CMD check ../onlineforecast_0.9.3.tar.gz") +#system("R CMD check --as-cran ../onlineforecast_0.9.4.tar.gz") +system("R CMD check ../onlineforecast_0.10.0.tar.gz") unlink("onlineforecast.Rcheck/", recursive=TRUE) # Use for more checking: diff --git a/man/AR.Rd b/man/AR.Rd new file mode 100644 index 0000000000000000000000000000000000000000..251c3b23fd6a02063446ebde2c2f4c91b2ce76e5 --- /dev/null +++ b/man/AR.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AR.R +\name{AR} +\alias{AR} +\title{Auto-Regressive (AR) input} +\usage{ +AR(lags) +} +\arguments{ +\item{lags}{integer vector: The lags of the AR to include.} +} +\value{ +A list of matrices, one for each lag in lags, each with columns according to model$kseq. +} +\description{ +Generate auto-regressive (AR) inputs in a model +} +\details{ +The AR function can be used in an onlineforecast model formulation. It +creates the input matrices for including AR inputs in a model during the +transformation stage. It takes the values from the model output in the provided data +does the needed lagging. + +The lags must be given according to the one-step ahead model, e.g.: + +\code{AR(lags=c(0,1))} will give: Y_{t+1|t} = \eqn{\phi_1} y_{t-0} + \eqn{\phi_2} y_{t-1} + \eqn{\epsilon}_{t+1} + +and: + +\code{AR(lags=c(0,3,12))} will give: Y_{t+1|t} = \eqn{\phi}_1 y_{t-0} + \eqn{\phi}_2 y_{t-3} + \eqn{\phi}_3 y_{t-12} + \eqn{\epsilon}_{t+1} + +Note, that + +For k>1 the coefficients will be fitted individually for each horizon, e.g.: + +\code{AR(lags=c(0,1))} will be the multi-step AR: Y_{t+k|t} = \eqn{\phi}_{1,k} y_{t-0} + \eqn{\phi}_{2,k} y_{t-1} + \eqn{\epsilon}_{t+k|t} + +See the details in ??(ref til vignette). +} +\examples{ + +# Setup data and a model for the example +D <- Dbuilding +model <- forecastmodel$new() +model$output = "heatload" +# Use the AR in the transformation stage +model$add_inputs(AR = "AR(c(0,1))") +# Regression parameters +model$add_regprm("rls_prm(lambda=0.9)") +# kseq must be added +model$kseq <- 1:4 +# In the transformation stage the AR input will be generated +# See that it generates two input matrices, simply with the lagged heat load at t for every k +model$transform_data(subset(D, 1:10)) + +# Fit with recursive least squares (no parameters prm in the model) +fit <- rls_fit(c(lambda=0.99), model, D, returnanalysis=TRUE) + +# Plot the result, see "?plot_ts.rls_fit" +plot_ts(fit, xlim=c(ct("2010-12-20"),max(D$t))) +# Plot for a short period with peaks +plot_ts(fit, xlim=c("2011-01-05","2011-01-07")) + +# For online updating, see ??ref{vignette, not yet available}: +# the needed lagged output values are stored in the model for next time new data is available +model$yAR +# The maximum lag needed is also kept +model$maxlagAR + +} diff --git a/man/Dbuilding.Rd b/man/Dbuilding.Rd new file mode 100644 index 0000000000000000000000000000000000000000..666d466906c49b11eddb38194d226c34557df2d4 --- /dev/null +++ b/man/Dbuilding.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{Dbuilding} +\alias{Dbuilding} +\title{Observations and weather forecasts from a single-family building, weather station and Danish Meteorological Institute (DMI)} +\format{ +A data list with 1854 rows and 7 variables: +\describe{ + \item{t}{Time in GMT as POSIXct} + \item{heatload}{The heatload of a single family building in W} + \item{heatloadtotal}{The average heatload of a 16 single family buildings in W} + \item{Taobs}{Observed ambient temperature at the weather station in Celcius} + \item{Iobs}{Observed global radiation at the weather station in W/m^2} + \item{Ta}{Weather forecasts of ambient temperature up to 36 hours ahead from DMI in Celcius} + \item{Ta}{Weather forecasts of global radiation up to 36 hours ahead from DMI in W/m^2} +} +} +\source{ +See \url{https://onlineforecasting.org/examples/datasets.html}. +} +\usage{ +Dbuilding +} +\description{ +Data of the period from 2010-12-15 to 2011-03-01. The weather station was located within a range of 10 km from the building. +} +\details{ +Hourly average values. The time point is set in the end of the hour. + +Set in the format of a data.list used as input to forecast models in the onlineforecast package. +} +\keyword{datasets} diff --git a/man/as.data.frame.data.list.Rd b/man/as.data.frame.data.list.Rd new file mode 100644 index 0000000000000000000000000000000000000000..5b3e712f02743049a679b6a097746714ddbb0181 --- /dev/null +++ b/man/as.data.frame.data.list.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.list.R +\name{as.data.frame.data.list} +\alias{as.data.frame.data.list} +\title{Convert to data.frame} +\usage{ +\method{as.data.frame}{data.list}(x, row.names = NULL, optional = FALSE, ...) +} +\arguments{ +\item{x}{The data.list to be converted.} + +\item{row.names}{Not used.} + +\item{optional}{Not used.} + +\item{...}{Not used.} +} +\value{ +A data.frame +} +\description{ +Converts a data.list to a data.frame. +} +\details{ +The forecasts in the data.list will result in columns named \code{varname.kxx} in the data.frame. +} +\examples{ + +#' # Use the data.list with building heat load +D <- Dbuilding +# Take a subset +D <- subset(D, 1:5, nms=c("t","Taobs","Ta","Iobs","I"), kseq=1:3) + +# Convert to a data.frame, note the names of the forecasts are appended .kxx (i.e. for Ta and I) +as.data.frame(D) + +} diff --git a/man/as.data.list.Rd b/man/as.data.list.Rd new file mode 100644 index 0000000000000000000000000000000000000000..9567fef28f93a2771f8bb7d8b6d642e7edbbb5c0 --- /dev/null +++ b/man/as.data.list.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/as.data.list.R +\name{as.data.list} +\alias{as.data.list} +\alias{as.data.list.data.frame} +\title{Convert to data.list class} +\usage{ +as.data.list(object) + +\method{as.data.list}{data.frame}(object) +} +\arguments{ +\item{object}{The data.frame to be converted.} +} +\value{ +a value of class data.list + +a data.list +} +\description{ +These functions will convert the object into a data.list. + +Convert a data.frame into a data.list +} +\details{ +A data.list is simply a list of vectors and data.frames. For the use in the +onlineforecast package the following format must be kept: + + - t: A vector of time. + + - vectors with same length as t: Holds observations and values synced to time t. + + - data.frames with number of rows as time t: Holds forecasts in each column named by \code{kxx} where \code{xx} is the + horizon, e.g. \code{k0} is synced as observations, and \code{k1} is one-step ahead. + +The convention is that columns with forecasts are postfixed with \code{.kxx} where +\code{xx} is the horizon. See the examples. +} +\examples{ +# Convert a dataframe with time and two observed variables +X <- data.frame(t=1:10, x=1:10, y=1:10) +as.data.list(X) + +# Convert a dataframe with time, forecast and an observed variable +X <- data.frame(t=1:10, x.k1=1:10, x.k2=10:1, yobs=1:10, y.k1=1:10, y.k2=1:10) +as.data.list(X) + +# Can be converted back and forth +X +as.data.frame(as.data.list(X)) + +} +\seealso{ +\code{For specific detailed info see the children, e.g. \link{as.data.list.data.frame} } + +as.data.list +} diff --git a/man/aslt.Rd b/man/aslt.Rd new file mode 100644 index 0000000000000000000000000000000000000000..aa86128e6723d279ce67a15b92c992a767d573a9 --- /dev/null +++ b/man/aslt.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aslt.R +\name{aslt} +\alias{aslt} +\alias{aslt.character} +\alias{aslt.POSIXct} +\alias{aslt.POSIXlt} +\alias{aslt.numeric} +\title{Convertion to POSIXlt} +\usage{ +aslt(object, ...) + +\method{aslt}{character}(object, tz = "GMT", ...) + +\method{aslt}{POSIXct}(object, tz = NA, ...) + +\method{aslt}{POSIXlt}(object, tz = NA, ...) + +\method{aslt}{numeric}(object, ...) +} +\arguments{ +\item{object}{The character, POSIXct, POSIClt, or numeric which is converted to POSIXct.} + +\item{...}{Arguments to be passed to methods.} + +\item{tz}{Timezone. If set, then the time zone will be changed of the object.} +} +\value{ +An object of class POSIXlt +} +\description{ +The argument is converted into POSIXlt with tz="GMT". +} +\section{Methods}{ + +#' @examples + +# Create a POSIXlt with tz="GMT" +aslt("2019-01-01") +class(aslt("2019-01-01")) +aslt("2019-01-01 01:00:05") + +# Convert between time zones +x <- aslt("2019-01-01", tz="CET") +aslt(x,tz="GMT") + +# To seconds and back again +aslt(as.numeric(x, units="sec")) + + + - aslt.character: Simply a wrapper for \code{as.POSIXlt} + + + - aslt.POSIXct: Converts to POSIXct. + + + - aslt.POSIXlt: Changes the time zone of the object if tz is given. + + + - aslt.numeric: Converts from UNIX time in seconds to POSIXlt. +} + diff --git a/man/bs.Rd b/man/bs.Rd new file mode 100644 index 0000000000000000000000000000000000000000..1356ccfede1417905e4bf8a09d3d00165205e543 --- /dev/null +++ b/man/bs.Rd @@ -0,0 +1,91 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bspline.R +\name{bspline} +\alias{bspline} +\title{Compute base splines of a variable using the R function \code{splines::bs}, use in the transform stage.} +\usage{ +bspline( + X, + Boundary.knots = NA, + intercept = FALSE, + df = NULL, + knots = NULL, + degree = 3, + bknots = NA, + periodic = FALSE +) +} +\arguments{ +\item{X}{data.frame (as part of data.list) with horizons as columns named \code{kxx} (i.e. one for each horizon)} + +\item{Boundary.knots}{The value is NA: then the boundaries are set to the range of each horizons (columns in X). See \code{?splines::bs}} + +\item{intercept}{See \code{?splines::bs}.} + +\item{df}{See \code{?splines::bs}} + +\item{knots}{See \code{?splines::bs}} + +\item{degree}{See \code{?splines::bs}} + +\item{bknots}{Is just a short for Boundary.knots and replace Boundary.knots (if Boundary.knots is not given)} + +\item{periodic}{Default FALSE. If TRUE, then \code{pbs::pbs} is called and periodic splines are generated.} +} +\value{ +List of data frames with the computed base splines, each with columns for the same horizons as in X +} +\description{ +Simply wraps the \code{splines::bs}, such that it can be used in the transformation stage. +} +\details{ +See the help for all arguments with \code{?splines::bs}. NOTE that two arguments have different default values. + +See the example \url{https://onlineforecasting.org/examples/solar-power-forecasting.html} where the function is used in a model. +} +\examples{ + +# How to make a diurnal curve using splines + # Select first 54 hours from the load data + D <- subset(Dbuilding, 1:76, kseq=1:4) + # Make the hour of the day as a forecast input + D$tday <- make_tday(D$t, kseq=1:4) + D$tday + + # Calculate the base splines for each column in tday + L <- bspline(D$tday) + + # Now L holds a data.frame for each base spline + str(L) + # Hence this will result in four inputs for the regression model + + # Plot (note that the splines period starts at tday=0) + plot(D$t, L$bs1$k1, type="s") + for(i in 2:length(L)){ + lines(D$t, L[[i]]$k1, col=i, type="s") + } + + # In a model formulation it will be: + model <- forecastmodel$new() + model$add_inputs(mutday = "bspline(tday)") + # Such that at the transform stage will give the same as above + model$transform_data(D) + +# Periodic splines are useful for modelling a diurnal harmonical functions +L <- bspline(D$tday, bknots=c(0,24), df=4, periodic=TRUE) +# or +L <- pbspline(D$tday, bknots=c(0,24), df=4) +# Note, how it has to have high enough df, else it generates an error + +# Plot +plot(D$t, L$bs1$k1, type="s") +for(i in 2:length(L)){ + lines(D$t, L[[i]]$k1, col=i, type="s") +} + +} +\seealso{ +Other Transform stage functions: +\code{\link{pbspline}()} +} +\concept{Transform stage functions} diff --git a/man/cache_name.Rd b/man/cache_name.Rd new file mode 100644 index 0000000000000000000000000000000000000000..84e82bb1d9a3de708f2736d73bc16f7de695b102 --- /dev/null +++ b/man/cache_name.Rd @@ -0,0 +1,89 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cache_name.R +\name{cache_name} +\alias{cache_name} +\title{Generation of a name for a cache file for the value of a function.} +\usage{ +cache_name(..., cachedir = "cache") +} +\arguments{ +\item{...}{The objects from which to calculate cache file name. +If no objects given, then all the objects of the calling function are used for generating the checksum for the file name.} + +\item{cachedir}{Path for saving the cache, i.e. prefixed to the generated name, remember to end with '/' to make a directory.} +} +\value{ +A generated cache file name. +} +\description{ +Caching of the value returned by a function +} +\details{ +Use it in the beginning of a function, which runs a time consuming calculation, like fitting a model using optimization. + +It makes a cache name, which can be used to save a unique cache file (see \code{\link{cache_save}()}). + +The \code{cache_name} function must receive all the objects (in \code{...}) which influence the value of the function. It simply calculates a checksum using the \code{digest} package. + +Further, it finds the name of the calling function and its definition, such that if anything changes in the function definition, then the cache file name changes too. +} +\examples{ +# A function for demonstrating the using caching +fun <- function(x, y){ + # Generate the cache name (no argument given, so both x and y is used) + nm <- cache_name(cachedir=cachedir) + # If the result is cached, then just return it + if(file.exists(nm)){ return(readRDS(nm)) } + # Do the calculation + res <- x^2 + y + 1 + # Wait 1 sec + Sys.sleep(1) + # Save for cache + cache_save(res, nm) + # Return + return(res) +} + +# For this example use a temporary directory +# In real use this should not be temporary! (changes between R sessions with tempdir()) +cachedir <- tempdir() + +# Uncomment to run: +# First time it takes at least 1 sec. +#fun(x=2,y=2) +# Second time it loads the cache and is much faster +#fun(x=2,y=2) +# Try changing the arguments (x,y) and run again + +# See the cache file(s) +#dir(cachedir) +# Delete the cache folder +#unlink(cachedir, recursive=TRUE) + +# Demonstrate how cache_name() is functioning +# Cache using the all objects given in the function calling, i.e. both x and y +fun <- function(x,y){ + x^2 + y + 1 + return(cache_name()) +} +# These are the same (same values) +fun(x=1,y=2) +fun(1,2) +fun(y=2,x=1) +# But this one is different +fun(x=2,y=1) + +# Test: cache using the values specified in the cache_name call +fun2 <- function(x,y){ + x^2 + y + 1 + return(cache_name(x)) +} + +# So now its only the x value that change the name +fun2(1,2) +fun2(1,3) +# But this one is different +fun2(3,3) +# And the function named changed the name + +} diff --git a/man/cache_save.Rd b/man/cache_save.Rd new file mode 100644 index 0000000000000000000000000000000000000000..23cbcaafa42f4fc6e5cd789ff2febfe087c9dee3 --- /dev/null +++ b/man/cache_save.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cache_save.R +\name{cache_save} +\alias{cache_save} +\title{Save a cache file (name generated with \code{code_name()}} +\usage{ +cache_save(object, filename) +} +\arguments{ +\item{object}{The object to cache (i.e. the value of the evaluating function).} + +\item{filename}{The cache file name (i.e. use the one generated by cache_name, see examples).} +} +\description{ +Saves the object as an .RDS file with the filename +} +\details{ +See the examples for \code{\link{cache_name}()}. +} diff --git a/man/check.Rd b/man/check.Rd new file mode 100644 index 0000000000000000000000000000000000000000..50f6de5cbd6f72bd3f448fa524346f86ea9d6083 --- /dev/null +++ b/man/check.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.list.R +\name{check} +\alias{check} +\title{Checking the object for appropriate form.} +\usage{ +check(object) +} +\arguments{ +\item{object}{The object to be checked.} +} +\value{ +The tables generated. + +# Check a data.list (see \code{?\link{check.data.list}}) +check(Dbuilding) +} +\description{ +Checking the object for appropriate form. +} +\details{ +Prints on table form the result of the check. +} diff --git a/man/check.data.list.Rd b/man/check.data.list.Rd new file mode 100644 index 0000000000000000000000000000000000000000..765165402045babc53bdf564b2b1cabfd3319f98 --- /dev/null +++ b/man/check.data.list.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.list.R +\name{check.data.list} +\alias{check.data.list} +\title{Checking the data.list for appropriate form.} +\usage{ +\method{check}{data.list}(object) +} +\arguments{ +\item{object}{The object to be checked.} +} +\value{ +The tables generated. + +# Check a data.list (see \code{?\link{check.data.list}}) +check(Dbuilding) + +# Vector with observations not same length as t +D <- Dbuilding +D$heatload <- D$heatload[1:10] +check(D) + +# Some NAs in k1 forecast +D <- Dbuilding +D$Ta$k1[1:1500] <- NA +check(D) + +# Wrong column names +names(D$Ta) +} +\description{ +Checking the data.list for appropriate form. +} +\details{ +Prints a check of the time vector t, which must have equidistant time points and no NAs. + +Then the results of checking vectors (observations): + - ok: A 'V' indicates a successful check + - maxNAs: Proportion of NAs + - length: printed if not the same as the 't' vector + - class: the class + +Then the results of checking data.frames and matrices (forecasts): + - ok: a 'V' indicates a successful check + - maxNAs: the proportion of NAs for the horizon (i.e. column) with the highest proportion of NAs + - meanNAs: the proportion of NAs of the entire data.frame + - nrow: printed if not the same as the 't' vector length + - colnames: columns must be names 'kxx', where 'xx' is the horizon + - sameclass: 'X' if not all columns are the same class + - class: prints the class of the columns if they are all the same +} diff --git a/man/complete_cases.Rd b/man/complete_cases.Rd new file mode 100644 index 0000000000000000000000000000000000000000..8a53bb6c4dd6a4e47b95c38961b5875af4a3cd0c --- /dev/null +++ b/man/complete_cases.Rd @@ -0,0 +1,65 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/complete_cases.R +\name{complete_cases} +\alias{complete_cases} +\alias{complete_cases.list} +\alias{complete_cases.data.frame} +\title{Find complete cases in forecast matrices} +\usage{ +complete_cases(object, kseq = NA) + +\method{complete_cases}{list}(object, kseq = NA) + +\method{complete_cases}{data.frame}(object, kseq = NA) +} +\arguments{ +\item{object}{A data.frame (with columns named 'kxx') or a list of +data.frames.} + +\item{kseq}{integer vector: If given then only these horizons are processed.} +} +\value{ +A logical vector specifying if there is no missing + values across all horizonsd. +} +\description{ +Returns a logical vector indicating the time points which +} +\details{ +Given a forecast matrix the forecasts are lagged "+k" steps to align them and +then 'complete.cases()' is run on that . + +Gieven a list of forecast matrices the points where all are complete (also all horizons) are complete are TRUE. +} +\examples{ +# Take a small data set +D <- subset(Dbuilding, 1:20, kseq=1:5) +# Check the forecast matrix of ambient temperature +D$Ta +# Which are complete over all horizons? The first are not since not all horizons +# have a value there (after lagging) +complete_cases(D$Ta) +# Same goes if given as a list +complete_cases(D["Ta"]) +# and if more than one is given +complete_cases(D[c("Ta","I")]) + +# Set some NA of some horizon +D$I$k3[8:9] <- NA +# Now they are recognized as not complete +complete_cases(D[c("Ta","I")]) + +# If we deal with residuals, which are observations and there for have column names "hxx" +Resid <- residuals(D$Ta, D$Taobs) +names(Resid) +# With columns with "h" instead of "k" no lagging occurs in complete_cases +complete_cases(Resid) +# +Resid2 <- Resid +Resid$h3[8:9] <- NA +complete_cases(list(Resid,Resid2)) + +} +\author{ +Peder Bacher +} diff --git a/man/ct.Rd b/man/ct.Rd new file mode 100644 index 0000000000000000000000000000000000000000..5783673fc658d0de2fdf28d0aaa3fa2ff3c33915 --- /dev/null +++ b/man/ct.Rd @@ -0,0 +1,128 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ct.R +\name{ct} +\alias{ct} +\alias{ct.character} +\alias{ct.POSIXct} +\alias{ct.POSIXlt} +\alias{ct.numeric} +\title{Convertion to POSIXct} +\usage{ +ct(object, ...) + +\method{ct}{character}(object, tz = "GMT", ...) + +\method{ct}{POSIXct}(object, tz = NA, duplicatedadd = NA, ...) + +\method{ct}{POSIXlt}(object, tz = NA, duplicatedadd = NA, ...) + +\method{ct}{numeric}(object, ...) +} +\arguments{ +\item{object}{The object to convert can be: character, numeric, POSIXct or POSIXlt} + +\item{...}{Arguments to be passed to methods.} + +\item{tz}{Timezone. If set, then the time zone will be changed of the object.} + +\item{duplicatedadd}{Seconds to be added to duplicated time stamps, to mitigate the problem of duplicated timestamps at the shift to winter time. So the second time a time stamp occurs (identified with \code{duplicated}) then the seconds will be added.} +} +\value{ +An object of class POSIXct +} +\description{ +The object is converted into POSIXct with tz="GMT". +} +\details{ +A simple helper, which wraps \code{\link{as.POSIXct}}` and sets the time zone to "GMT" per default. +} +\section{Methods}{ + + + + - ct.character: Simply a wrapper for \code{as.POSIXct} with default \code{tz} + + + - ct.POSIXct: Changes the time zone of the object if \code{tz} is given. + + + - ct.POSIXlt: Converts to POSIXct. + + + - ct.numeric: Converts from UNIX time in seconds to POSIXct with \code{tz} as GMT. +} + +\examples{ + + +# Create a POSIXct with tz="GMT" +ct("2019-01-01") +class(ct("2019-01-01")) +ct("2019-01-01 01:00:05") + + +# Convert to POSIXct +class(ct(as.POSIXlt("2019-01-01"))) + +# To seconds and back again +ct(as.numeric(1000, units="sec")) + + +# -------- +# Convert character of time which has summer time leaps +# Example from CET (with CEST which is winter time) +# +# The point of shifting to and from summer time: +# DST Start (Clock Forward) DST End (Clock Backward) +# Sunday, March 31, 02:00 Sunday, October 27, 03:00 + +# -------- +# From to winter time to summer time +txt <- c("2019-03-31 01:00", + "2019-03-31 01:30", + "2019-03-31 03:00", + "2019-03-31 03:30") +x <- ct(txt, tz="CET") +x +ct(x, tz="GMT") + +# BE AWARE of this conversion of the 02:00: to 02:59:59 (exact time of shift) will lead to a +# wrong conversion +txt <- c("2019-03-31 01:30", + "2019-03-31 02:00", + "2019-03-31 03:30") +x <- ct(txt, tz="CET") +x +ct(x, tz="GMT") +# Which a diff on the time can detect, since all steps are not equal +plot(diff(ct(x, tz="GMT"))) + +# -------- +# Shift to winter time is more problematic +# It works like this +txt <- c("2019-10-27 01:30", + "2019-10-27 02:00", + "2019-10-27 02:30", + "2019-10-27 03:00", + "2019-10-27 03:30") +x <- ct(txt, tz="CET") +x +ct(x, tz="GMT") + +# however, timestamps can be given like this +txt <- c("2019-10-27 01:30", + "2019-10-27 02:00", + "2019-10-27 02:30", + "2019-10-27 02:00", + "2019-10-27 02:30", + "2019-10-27 03:00", + "2019-10-27 03:30") +x <- ct(txt, tz="CET") +x +ct(x, tz="GMT") +# Again can be detected, since all steps are not equal +plot(diff(ct(x, tz="GMT"))) +# This can be fixed by (note that it can go wrong, e.g. with gaps around convertion etc.) +ct(x, tz="GMT", duplicatedadd=3600) + +} diff --git a/man/data.list.Rd b/man/data.list.Rd new file mode 100644 index 0000000000000000000000000000000000000000..81840c491b023e2b9e48c02be8fcae6162d67802 --- /dev/null +++ b/man/data.list.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.list.R +\name{data.list} +\alias{data.list} +\title{Make a data.list} +\usage{ +data.list(...) +} +\arguments{ +\item{...}{Should hold: time t, observations as vectors and forecasts as data.frames} +} +\value{ +a data.list. +} +\description{ +Make a data.list of the vectors and data.frames given. +} +\details{ +See the vignette 'setup-data' on how a data.list must be setup. + +It's simply a list of class \code{data.list} holding: + - vector \code{t} + - vector(s) of observations + - data.frames (or matrices) of forecast inputs +} +\examples{ +# Put together a data.list +# The time vector +time <- seq(ct("2019-01-01"),ct("2019-01-02"),by=3600) +# Observations time series (as vector) +xobs <- rnorm(length(time)) +# Forecast input as data.frame +X <- data.frame(matrix(rnorm(length(time)*3), ncol=3)) +names(X) <- pst("k",1:3) + +D <- data.list(t=time, xobs=xobs, X=X) + +# Check it +check(D) + +} diff --git a/man/depth.Rd b/man/depth.Rd new file mode 100644 index 0000000000000000000000000000000000000000..859ba15d422cd3935fafbae99ae216cbb2f7a383 --- /dev/null +++ b/man/depth.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/depth.R +\name{depth} +\alias{depth} +\title{Depth of a list} +\usage{ +depth(this) +} +\arguments{ +\item{this}{list} +} +\value{ +integer +} +\description{ +Depth of a list +} +\details{ +Returns the depth of a list +} diff --git a/man/equals-.data.list.Rd b/man/equals-.data.list.Rd new file mode 100644 index 0000000000000000000000000000000000000000..c45f61ae4efd6bfdc234accc6a4a5b010e4acb6b --- /dev/null +++ b/man/equals-.data.list.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.list.R +\name{==.data.list} +\alias{==.data.list} +\title{Determine if two data.lists are identical} +\usage{ +\method{==}{data.list}(x, y) +} +\arguments{ +\item{x}{first data.list} + +\item{y}{second data.list} +} +\value{ +logical +} +\description{ +Compare two data.lists +} +\details{ +Returns TRUE if the two data.lists are fully identical, so all data, order of variables etc. must be fully identical +} +\examples{ + +Dbuilding == Dbuilding + +D <- Dbuilding +D$Ta$k2[1] <- NA +Dbuilding == D + +D <- Dbuilding +names(D)[5] <- "I" +names(D)[6] <- "Ta" +Dbuilding == D + + +} diff --git a/man/flattenlist.Rd b/man/flattenlist.Rd new file mode 100644 index 0000000000000000000000000000000000000000..c5d1c53fc6508c9137dae976c300171bc548c2a8 --- /dev/null +++ b/man/flattenlist.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/flattenlist.R +\name{flattenlist} +\alias{flattenlist} +\title{Flattens list} +\usage{ +flattenlist(x) +} +\arguments{ +\item{x}{List to flatten.} +} +\value{ +A flatten list +} +\description{ +Flattens list in a single list of data.frames +} +\details{ +Flattens list. Can maybe be made better. It might end up copying data in +memory!? It might change the order of the elements. +} diff --git a/man/forecastmodel.Rd b/man/forecastmodel.Rd new file mode 100644 index 0000000000000000000000000000000000000000..92b7d88a36a735d143762ed8d7ab3e7494e4f860 --- /dev/null +++ b/man/forecastmodel.Rd @@ -0,0 +1,171 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/forecastmodel.R-documentation.R +\name{forecastmodel} +\alias{forecastmodel} +\title{Class for forecastmodels} +\description{ +R6 class for a forecastmodel +} +\details{ +This class holds the variables and functions needed for defining and setting up a forecast model - independent of the fitting scheme. +See the vignettes on how to setup and use a model and the website \url{https://onlineforecasting.org} for more info. + + + +Holds all the information needed independently of the fitting scheme (e.g. lm_fit or rls_fit), see the fields and functions below. + +The fields are separated into: + - Fields for setting up the model + - Fields used when fitting (e.g. which horizons to fit for is set in \code{kseq} + +See the fields description below. + +Note, it's an R6 class, hence an object variable is a pointer (reference), which means two important points: + - In order to make a copy, the function clone_deep() must be used (usually \code{clone(deep=TRUE)}, but that will end in an infinite loop). + - It can be manimulated directly in functions (without return). The code is written such that no external functions manipulate the model object, except for online updating. + +For online updating (i.e. receiving new data and updating the fit), then the model definition and the data becomes entangled, since transformation functions like low-pass filtering with \code{\link{lp}()} requires past values. +See the vignette ??(ref to online vignette, not yet available) and note that \code{\link{rls_fit}()} resets the state, which is also done in all \code{xxx_fit} functions (e.g. \code{\link{rls_fit}}. +} +\section{Public fields used for setting up the model}{ + + + - output = NA, character: Name of the output. + + - inputs = list(), add them with add_inputs(): List of inputs (which are R6 objects) (note the "cloning of list of reference objects" issue below in deep_clone function) + + - regprmexpr = NA: The expression (as character) used for generating the regprm, e.g. "\code{\link{rls_prm}()}" for RLS. + + - regprm = list(): Regression parameters calculated by evaluating the \code{regprmexpr}. + + - prmbounds = as.matrix(data.frame(lower=NA, init=NA, upper=NA)): The bounds for optimization of the parameters, e.g. with \code{\link{rls_optim}()}. + + - outputrange = NA, numeric vector of length 2: Limits of the predictions cropped in the range, e.g. outputrange = c(0,Inf) removes all negative output predictions. +} + +\section{Public fields used when the model is fitted}{ + + + - kseq = NA: The horizons to fit for. + + - kseqopt = NA: The horizons to fit for when optimizing. + + - p = NA: The (transformation stage) parameters used for the fit. + + - Lfits = list(): The regression fits, one for each k in kseq (simply a list with the latest fit). + + - datatr = NA: Transformed input data (data.list with all inputs for regression) +} + +\section{Public methods}{ + +All public functions are described below and in examples a section for each is included: +} + +\section{\code{$new()}}{ + +Create a new `forecastmodel` object. + +Returns a forecastmodel object. +} + +\section{\code{$add_inputs(...)}}{ + + Add inputs to the model. + + - \code{...}: The inputs are given as arguments, see examples. +} + +\section{\code{$add_regprm(regprm_expr)}}{ + +Add expression (as character) which generates regression parameters. +} + +\section{\code{$add_prmbounds(...)}}{ + +Add the transformation parameters and bounds for optimization. +} + +\section{\code{$get_prmbounds(...)}}{ + +Get the transformation parameter bounds, used by optimization functions e.g. \code{\link{rls_optim}()}. +} + +\section{\code{$insert_prm(prm)}}{ + +Insert the transformation parameters prm in the input expressions and regression expressions, and keep them in $prm (simply string manipulation). +} + +\section{\code{$transform_data(data)}}{ + +Function for transforming the input data to the regression stage input data (see \code{vignette("setup-data", package = "onlineforecast")}). +} + +\section{\code{$reset_state()}}{ + +Resets the input states and stored data for iterative fitting (datatr rows and yAR) (see ??(ref to online updating vignette, not yet available). +} + +\section{\code{$check(data = NA)}}{ + +Check if the model is setup correctly. +} + +\examples{ +# New object +model <- forecastmodel$new() + +# Print it +model + + +# Add model inputs +model$add_inputs(Ta = "lp(Ta)") +# See it +model$inputs +# Update to use no low-pass filter +model$add_inputs(Ta = "Ta") +model$inputs +# Add another +model$add_inputs(I = "lp(I)") +model$inputs + +# Simply a list, so manipulate directly +class(model$inputs$Ta) +model$inputs$Ta$expr <- "lp(Ta, a1=0.9)" + +# Add the parameters for the regression stage +model$add_regprm("rls_prm(lambda=0.99)") +# The evaluation is a list, which is set in +model$regprm + + +# Set the lambda to be optimized between 0.9 and 0.999, starting at 0.99 +model$add_prmbounds(lambda = c(0.9, 0.99, 0.999)) +# Note the "__" syntax to set parameters for inputs: "input__prm" +model$add_prmbounds(Ta__a1 = c(0.8, 0.95, 0.99)) + +# Get the lower bounds +model$get_prmbounds("lower") + +# Insert the init parameters +prm <- model$get_prmbounds("init") +prm +# Before +model$inputs$Ta$expr +# After +model$insert_prm(prm) +model$inputs$Ta$expr + +# Check if the model is setup and can be used with a given data.list +# An error is thrown +try(model$check(Dbuilding)) +# Add the model output +model$output <- "heatload" +# Still not error free +try(model$check(Dbuilding)) +# Add the horizons to fit for +model$kseq <- 1:4 +# Finally, no errors :) +model$check(Dbuilding) +} diff --git a/man/fs.Rd b/man/fs.Rd new file mode 100644 index 0000000000000000000000000000000000000000..e9c35fb9ebab354bb501106c40fa2cfebb45d435 --- /dev/null +++ b/man/fs.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fs.R +\name{fs} +\alias{fs} +\title{Generation of Fourrier series.} +\usage{ +fs(X, nharmonics) +} +\arguments{ +\item{X}{must be a dataframe with columns k1,k2,..., . One period is from 0 to 1 +(so for example if X is hour of day, then divide X by 24 to obtain a daily period).} + +\item{nharmonics}{the number of harmonics, so creates double as many inputs! i.e. one sine and one cos for each harmonic.} +} +\value{ +Returns a list of dataframes (two for each i in \code{1:nharmonics}) with same number of columns as X. +} +\description{ +Function for generating Fourrier series as a function of x E.g. use for +harmonic functions for modelling the diurnal patterns or for basis functions. +} +\examples{ +# Make a data.frame with time of day in hours for different horizons +tday <- make_tday(seq(ct("2019-01-01"), ct("2019-01-04"), by=3600), kseq=1:5) +# See whats in it +str(tday) +head(tday) + +# Now use the function to generate Fourier series +L <- fs(tday/24, nharmonics=2) +# See what is in it +str(L) + +# Make a plot to see the harmonics +par(mfrow=c(2,1)) +# The first harmonic +plot(L$sin1$k1, type="l") +lines(L$cos1$k1, type="l") +# The second harmonic +plot(L$sin2$k1, type="l") +lines(L$cos2$k1, type="l") + + +} diff --git a/man/getse.Rd b/man/getse.Rd new file mode 100644 index 0000000000000000000000000000000000000000..c4d3b22bac6b95caff9b2a640cf7131c12ce0195 --- /dev/null +++ b/man/getse.Rd @@ -0,0 +1,71 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/getse.R +\name{getse} +\alias{getse} +\title{Getting subelement from list.} +\usage{ +getse(L, inm = NA, depth = 2, useregex = FALSE, fun = NA) +} +\arguments{ +\item{L}{The list to get sub elements from.} + +\item{inm}{Either an integer index or a name of the subelements to return.} + +\item{depth}{The depth of the subelements to match names in: +- 1: is directly in the list. +- 2: is in list of each element in the list. +- 3 and more: simply deeper in the sublists.} + +\item{useregex}{logical: should inm be used as regex pattern for returning elements matching, in the specified layer.} + +\item{fun}{function: if given, then it will be applied to all the matched subelements before returning them.} +} +\value{ +A list of the matched elements. +} +\description{ +A helping function for getting subelemlts from a list. +} +\details{ +Often it is needed to get a subelement from a list, which can be done using lapply. +But to make life easiere here is a small function for getting subelements in a nested list at a certain debth. +} +\examples{ +# Make a nested list +L <- list(x1=list(x=list("val11","val112"), + y=list("val12"), + test=list("testlist2")), + x2=list(x=list("val21","val212"), + y=list("val22"), + test=list("testlist2")), + x3=list(x=list("val31","val312"), + y=list("val32"), + test=list("testlist3"))) + +# Get the subelement "x1" +str(getse(L, "x1", depth=1)) +# Same as +str(L[["x1"]]) + +# Get the element named x in second layer +str(getse(L, "x", depth=2)) +# Depth is default to 2 +str(getse(L, "y")) + +# Nice when splitting string +x <- strsplit(c("x.k1","y.k2"), "\\\\.") +# Get all before the split "\\." +getse(x, 1) +# Get after +getse(x, 2) + +# Get an element with an integer index +x <- strsplit(c("x.k1","y.k2","x2"), "\\\\.") +getse(x, 1) +# if the element is not there, then an error is thrown +try(getse(x, 2)) + +# Use regex pattern for returning elements matching in the specified layer +getse(L, "^te", depth=2, useregex=TRUE) + +} diff --git a/man/gof.Rd b/man/gof.Rd new file mode 100644 index 0000000000000000000000000000000000000000..d5432bba065f02e98b4aa78d77448b74fa74094d --- /dev/null +++ b/man/gof.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gof.R +\name{gof} +\alias{gof} +\title{Simple wrapper for graphics.off()} +\usage{ +gof() +} +\description{ +Simple wrapper for graphics.off() +} diff --git a/man/grapes-times-times-grapes.Rd b/man/grapes-times-times-grapes.Rd new file mode 100644 index 0000000000000000000000000000000000000000..aad3f991e6a47d5fed89f588ed477dee99574f3b --- /dev/null +++ b/man/grapes-times-times-grapes.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/operator_multiply.R +\name{\%**\%} +\alias{\%**\%} +\title{Multiplication of list with y, elementwise} +\usage{ +x \%**\% y +} +\arguments{ +\item{x}{a list of matrices, data.frames, etc.} + +\item{y}{a vector, data.frame or matrix} +} +\value{ +A list of same length of x +} +\description{ +Multiplication of each element in a list (x) with y +} +\details{ +Each element of x is multiplied with y using the usual elementwise '*' operator. + +Typical use is when a function, e.g. \code{\link{bspline}()}, returns a list of matrices (e.g. one for each base spline) and they should individually be multiplied with y (a vector, matrix, etc.). + +Since this is intended to be used for forecast models in the transformation stage +then there are some percularities: + +If the number of columns or the names of the columns are not equal for one element in x +and y, then only the columns with same names are used, hence the resulting matrices can be +of lower dimensions. + +See the example \url{https://onlineforecasting.org/examples/solar-power-forecasting.html} where the operator is used. +} +\examples{ + +x <- list(matrix(1:9,3), matrix(9:1,3)) +x + +y <- matrix(2,3,3) +y + +x \%**\% y + +y <- 1:3 + +x \%**\% y + +# Naming percularity +nams(x[[1]]) <- c("k1","k2","k3") +nams(x[[2]]) <- c("k2","k3","k4") +y <- matrix(2,3,3) +nams(y) <- c("k1","k3","k7") + +# Now the only the horizons matching will be used +x \%**\% y + +} diff --git a/man/in_range.Rd b/man/in_range.Rd new file mode 100644 index 0000000000000000000000000000000000000000..c3412162da3a75c61bfd95ad9454014cbdfcf2ad --- /dev/null +++ b/man/in_range.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/in_range.R +\name{in_range} +\alias{in_range} +\title{Selects a period} +\usage{ +in_range(tstart, time, tend = NA) +} +\arguments{ +\item{tstart}{The start of the period.} + +\item{time}{The timestamps as POSIX.} + +\item{tend}{The end of the period. If not given then the period will have no end.} +} +\value{ +A logical vector indicating the selected period with TRUE +} +\description{ +Returns a logical vector of boolean values where TRUE indicates if timestamp is within the +specified period. +} +\details{ +Returns a logical vector of boolean values where TRUE indicates if timestamp is within the +specified period spanned by tstart and tend. + +Note the convention of time stamp in the end of the time intervals causes the time point +which equals \code{tstart} not to be included. See last example. + +The times can be given as character or POSIX, per default in tz='GMT'. +} +\examples{ + +# Take a subset +D <- subset(Dbuilding, c("2010-12-15", "2011-01-01")) + +# Just a logical returning TRUE in a specified period +in_range("2010-12-20", D$t, "2010-12-22") + +# Set which period to evaluate when optimizing parameters, like in rls_optim() +# (the points with scoreperiod == false are not included in the score evaluation) +D$scoreperiod <- in_range("2010-12-20", D$t) +D$scoreperiod + +# Further, excluding a small period by +D$scoreperiod[in_range("2010-12-26", D$t, "2010-12-27")] <- FALSE +D$scoreperiod + +# Note the convention of time stamp in the end of the time intervals +# causes the point with t = 2010-12-26 00:00:00 not to be included +# since it's covering to "2010-12-25 23:00:00" to "2010-12-26 00:00:00" +D$t[in_range("2010-12-26", D$t, "2010-12-27")] + +# When characters are given, then they are translated to the time zone of the time vector +D <- subset(Dbuilding, c("2010-12-15", "2010-12-16")) +D$t <- ct(D$t, tz="CET") +D$t[in_range("2010-12-15 02:00", D$t, "2010-12-15 05:00")] + +} diff --git a/man/input_class.Rd b/man/input_class.Rd new file mode 100644 index 0000000000000000000000000000000000000000..33d459bf0c59ec20f8d96ad85a923e162915d1e6 --- /dev/null +++ b/man/input_class.Rd @@ -0,0 +1,105 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/input_class.R-documentation.R +\name{input_class} +\alias{input_class} +\title{Class for forecastmodel inputs} +\description{ +R6 class for for forecastmodel inputs +} +\details{ +Holds variables and functions needed for an input, as added by \code{\link{forecastmodel}$add_inputs()}. + + +Details of the class. +} +\section{Public fields}{ + + + - expr = NA: The expression as string for transforming the input. + + - state_L = list(): The list holding potential state values kept by the function evaluated in the expression. + + - state_i = integer(1): index counter for the state list. +} + +\section{Public methods}{ + +All public functions are described below and in examples a section for each is included: +} + +\section{\code{$new(expr)}}{ + +Create a new input with the expression \code{expr}. +} + +\section{\code{$evaluate(data}}{ + +Generate (transform) the input by evaluating the expr with the \code{data} (data.list) attached. +} + +\section{\code{$state_reset()}}{ + +Each function in the expressions (lp, fs, etc.) have the possibility to save a state, which can be read next time the are called. + +Reset the state by deleting \code{state_L} and setting \code{state_i} to 0. + + +# After running +model$inputs[[1]]$evaluate(D) +# the lp() has saved it's state for next time +model$inputs[[1]]$state_L +# New data arrives +Dnew <- subset(Dbuilding, 11, kseq=1:4) +# So in lp() the state is read and it continues +model$inputs[[1]]$evaluate(Dnew) + +# If we want to reset the state, which is done in all _fit() functions (e.g. rls_fit), such that all transformations starts from scratch +# Reset the state +model$inputs[[1]]$evaluate(D) +# Test resetting +model$inputs[[1]]$state_reset() +# Now there is no state +model$inputs[[1]]$evaluate(Dnew) +# So lp() starts by taking the first data point +Dnew$Ta +} + +\section{\code{$state_getval(initval)}}{ + +Get the saved value in state. This function can be used in the beginning of transformation functions to get the current state value. +First time called return the \code{initval}. + +Note that since all transformation functions are called in the same order, +then the state can be read and saved by keeping a counter internally, the value is saved in the field $state_i. + +See example of use in \code{\link{lp}()}. +} + +\section{\code{$state_setval(val)}}{ + +Set the state value, done in the end of a transformation function, see above. + +See example of use in \code{\link{lp}()}. +} + +\examples{ +# new: + +# An input is created in a forecastmodel +model <- forecastmodel$new() +model$add_inputs(Ta = "lp(Ta, a1=0.9)") +# The 'inputs' is now a list +model$inputs +# With the input object +class(model$inputs[[1]]) + +# Now the transformation stage can be carried out to create the regression stage data +# Take a data.list subset for the example +D <- subset(Dbuilding, 1:10, kseq=1:4) +# Transform the data +model$inputs[[1]]$evaluate(D) +# What happens is simply that the expression is evaluated with the data +# (Note that since not done in the model some functions are missing) +eval(parse(text=model$inputs[[1]]$expr), D) + +} diff --git a/man/lagdf.Rd b/man/lagdf.Rd new file mode 100644 index 0000000000000000000000000000000000000000..c13d5f56e47f5ae3443c01c6765bc1be5ca58494 --- /dev/null +++ b/man/lagdf.Rd @@ -0,0 +1,81 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lagdf.R +\name{lagdf} +\alias{lagdf} +\alias{lagdf.data.frame} +\title{Lagging which returns a data.frame} +\usage{ +lagdf(x, lagseq) + +\method{lagdf}{data.frame}(x, lagseq) +} +\arguments{ +\item{x}{The data.frame to have columns lagged} + +\item{lagseq}{The sequence of lags as an integer. Alternatively, as a character "+k", "-k", "+h" or "-h", e.g. "k12" will with "+k" be lagged 12.} +} +\value{ +A data.frame. + +A data.frame with columns that are lagged +} +\description{ +Lagging by shifting the values back or fourth always returning a data.frame. + +Lagging of a data.frame +} +\details{ +This function lags (shifts) the values of the vector. A data.frame is always returned with the columns +as the vectors lagged with the values in lagseq. The column names are set to "kxx", where xx are the lag of the column. + +This function lags the columns with the integer values specified with the argument \code{lagseq}. +} +\examples{ +# The values are simply shifted +# Ahead in time +lagdf(1:10, 3) +# Back in time +lagdf(1:10, -3) +# Works but returns a numeric column +lagdf(as.factor(1:10), 3) +# Works and returns a character column +lagdf(as.character(1:10), 3) +# Giving several lag values +lagdf(1:10, c(1:3)) +lagdf(1:10, c(5,3,-1)) + +# See also how to lag a forecast data.frame with: ?lagdf.data.frame + + + +# dataframe of forecasts +X <- data.frame(k1=1:10, k2=1:10, k3=1:10) +X + +# Lag all columns +lagdf(X, 1) +\dontshow{if(!all(is.na(lagdf(X, 1)[1, ]))){stop("Lag all columns didn't work")}} + +# Lag each column different steps +lagdf(X, 1:3) +# Lag each columns with its k value from the column name +lagdf(X, "+k") +\dontshow{ + if(any(lagdf(X, 1:3) != lagdf(X, "+k"),na.rm=TRUE)){stop("Couldn't lag +k")} +} +# Also works for columns named hxx +names(X) <- gsub("k", "h", names(X)) +lagdf(X, "-h") + +# If lagseq must have length as columns in X, it doesn't know how to lag and an error is thrown +try(lagdf(X, 1:2)) + +\dontshow{ +if(!class(lagdf(data.frame(k1=1:10), 2)) == "data.frame"){stop("Trying to lag data.frame with 1 column, but return is not class data.frame")} +if(!all(dim(lagdf(data.frame(k1=1:10), "+k")) == c(10,1))){stop("Trying to lag data.frame with 1 column, but return is not class data.frame")} +} + +} +\seealso{ +\code{\link{lagdf.data.frame}} which is run when \code{x} is a data.frame. +} diff --git a/man/lagdl.Rd b/man/lagdl.Rd new file mode 100644 index 0000000000000000000000000000000000000000..7698f34ca5ed01fb10ca8cb55bcf31e366385eba --- /dev/null +++ b/man/lagdl.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lagdl.R +\name{lagdl} +\alias{lagdl} +\title{Lagging which returns a data.list} +\usage{ +lagdl(DL, lagseq) +} +\arguments{ +\item{DL}{The data.list to be lagged.} + +\item{lagseq}{The integer(s) setting the lag steps.} +} +\value{ +A data.list. +} +\description{ +Lagging by shifting the values back or fourth always returning a data.list. +} +\details{ +This function lags (shifts) the values of the vector. A data.list is always returned with each data.frame lagged with \code{lagdf}. +} +\examples{ +# The values are simply shifted in each data.frame with lagdf + +} diff --git a/man/lagvec.Rd b/man/lagvec.Rd new file mode 100644 index 0000000000000000000000000000000000000000..e2bad3c4b3b88c4ebd4e6bc18acc1c0803490739 --- /dev/null +++ b/man/lagvec.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lagvec.R +\name{lagvec} +\alias{lagvec} +\title{Lag by shifting} +\usage{ +lagvec(x, lag) +} +\arguments{ +\item{x}{The vector to lag} + +\item{lag}{(integer) The steps to lag.} +} +\value{ +The shifted vector +} +\description{ +Lag by shifting the vecter +} +\details{ +A positive value of \code{lag} shifts the values to the right in the vector. +} +\examples{ + +# The values are simply shifted +# Ahead in time +lagvec(1:10, 3) +# Back in time +lagvec(1:10, -3) +# Works but returns a numric +lagvec(as.factor(1:10), 3) +# Works and returns a character +lagvec(as.character(1:10), 3) + +} diff --git a/man/lapply_cbind.Rd b/man/lapply_cbind.Rd new file mode 100644 index 0000000000000000000000000000000000000000..f075fcc2e0fe7cbeb82d85b57bf7c0d788129ff2 --- /dev/null +++ b/man/lapply_cbind.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lapply.R +\name{lapply_cbind} +\alias{lapply_cbind} +\title{Helper which does lapply and then cbind} +\usage{ +lapply_cbind(X, FUN, ...) +} +\arguments{ +\item{X}{object to apply on} + +\item{FUN}{function to apply} + +\item{...}{passed on to lapply} +} +\description{ +Helper which does lapply and then cbind +} diff --git a/man/lapply_cbind_df.Rd b/man/lapply_cbind_df.Rd new file mode 100644 index 0000000000000000000000000000000000000000..4801b513be67a328f52a51bf39d871a8e200bb0a --- /dev/null +++ b/man/lapply_cbind_df.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lapply.R +\name{lapply_cbind_df} +\alias{lapply_cbind_df} +\title{Helper which does lapply, cbind and then as.data.frame} +\usage{ +lapply_cbind_df(X, FUN, ...) +} +\arguments{ +\item{X}{object to apply on} + +\item{FUN}{function to apply} + +\item{...}{passed on to lapply} +} +\description{ +Helper which does lapply, cbind and then as.data.frame +} diff --git a/man/lapply_rbind.Rd b/man/lapply_rbind.Rd new file mode 100644 index 0000000000000000000000000000000000000000..55d51afe182edc69c1459c6015eb33fdb81aa3e6 --- /dev/null +++ b/man/lapply_rbind.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lapply.R +\name{lapply_rbind} +\alias{lapply_rbind} +\title{Helper which does lapply and then rbind} +\usage{ +lapply_rbind(X, FUN, ...) +} +\arguments{ +\item{X}{object to apply on} + +\item{FUN}{function to apply} + +\item{...}{passed on to lapply} +} +\description{ +Helper which does lapply and then rbind +} diff --git a/man/lapply_rbind_df.Rd b/man/lapply_rbind_df.Rd new file mode 100644 index 0000000000000000000000000000000000000000..3bd25abacc42a80e2f4e3097a8efca19c307689c --- /dev/null +++ b/man/lapply_rbind_df.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lapply.R +\name{lapply_rbind_df} +\alias{lapply_rbind_df} +\title{Helper which does lapply, rbind and then as.data.frame} +\usage{ +lapply_rbind_df(X, FUN, ...) +} +\arguments{ +\item{X}{object to apply on} + +\item{FUN}{function to apply} + +\item{...}{passed on to lapply} +} +\description{ +Helper which does lapply, rbind and then as.data.frame +} diff --git a/man/lm_fit.Rd b/man/lm_fit.Rd new file mode 100644 index 0000000000000000000000000000000000000000..1e7399b147378c3343376e5b4e95656f0244d387 --- /dev/null +++ b/man/lm_fit.Rd @@ -0,0 +1,91 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lm_fit.R +\name{lm_fit} +\alias{lm_fit} +\title{Fit an onlineforecast model with \code{\link{lm}}} +\usage{ +lm_fit( + prm = NA, + model, + data, + scorefun = NA, + returnanalysis = TRUE, + printout = TRUE +) +} +\arguments{ +\item{prm}{as numeric with the parameters to be used when fitting.} + +\item{model}{object of class forecastmodel with the model to be fitted.} + +\item{data}{as data.list with the data to fit the model on.} + +\item{scorefun}{Optional. If scorefun is given, e.g. \code{\link{rmse}}, then the value of this is also returned.} + +\item{returnanalysis}{as logical determining if the analysis should be returned. See below.} + +\item{printout}{Defaults to TRUE. Prints the parameters for model.} +} +\value{ +Depends on: + + - If \code{returnanalysis} is TRUE a list containing: + + * \code{Yhat}: data.frame with forecasts for \code{model$kseq} horizons. + + * \code{model}: The forecastmodel object cloned deep, so can be modified without changing the original object. + + * \code{data}: data.list with the data used, see examples on how to obtain the transformed data. + + * \code{Lfitval}: a character "Find the fits in model$Lfits", it's a list with the lm fits for each horizon. + + * \code{scoreval}: data.frame with the scorefun result on each horizon (only scoreperiod is included). + + - If \code{returnanalysis} is FALSE (and \code{scorefun} is given): The sum of the score function on all horizons (specified with model$kseq). +} +\description{ +Fit a linear regression model given a onlineforecast model, seperately for each prediction horizon +} +\examples{ + +# Take data +D <- subset(Dbuilding, c("2010-12-15", "2011-01-01")) +D$y <- D$heatload +# Define a simple model +model <- forecastmodel$new() +model$output <- "y" +model$add_inputs(Ta = "lp(Ta, a1=0.9)", + mu = "one()") + +# Before fitting the model, define which points to include in the evaluation of the score function +D$scoreperiod <- in_range("2010-12-20", D$t) +# And the sequence of horizons to fit for +model$kseq <- 1:6 + +# Now we can fit the model with RLS and get the model validation analysis data +fit <- lm_fit(prm=NA, model=model, data=D) +# What did we get back? +names(fit) +class(fit) +# The one-step forecast +plot(D$y, type="l") +lines(lagvec(fit$Yhat$k1,-1), col=2) +# Get the residuals +plot(residuals(fit)$h1) +# Score for each horizon +score(residuals(fit)) + +# The lm_fit don't put anything in this field +fit$Lfitval +# Find the lm fits here +model$Lfits +# See result for k=1 horizon +summary(model$Lfits$k1) +# Some diurnal pattern is present +acf(residuals(fit)$h1, na.action=na.pass, lag.max=96) + +# Run with other parameters and return the RMSE +lm_fit(c(Ta__a1=0.8), model, D, scorefun=rmse, returnanalysis=FALSE) +lm_fit(c(Ta__a1=0.9), model, D, scorefun=rmse, returnanalysis=FALSE) + +} diff --git a/man/lm_optim.Rd b/man/lm_optim.Rd new file mode 100644 index 0000000000000000000000000000000000000000..407caa4dafdc7dcfa9f40375230ce91e26bffb0c --- /dev/null +++ b/man/lm_optim.Rd @@ -0,0 +1,84 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lm_optim.R +\name{lm_optim} +\alias{lm_optim} +\title{Optimize parameters for onlineforecast model fitted with LM} +\usage{ +lm_optim( + model, + data, + kseq = NA, + scorefun = rmse, + cachedir = "", + cachererun = FALSE, + printout = TRUE, + method = "L-BFGS-B", + ... +) +} +\arguments{ +\item{model}{The onlineforecast model, including inputs, output, kseq, p} + +\item{data}{The data.list including the variables used in the model.} + +\item{kseq}{The horizons to fit for (if not set, then model$kseq is used)} + +\item{scorefun}{The function to be score used for calculating the score to be optimized.} + +\item{cachedir}{A character specifying the path (and prefix) of the cache file name. If set to \code{""}, then no cache will be loaded or written. See \url{https://onlineforecasting.org/vignettes/nice-tricks.html} for examples.} + +\item{cachererun}{A logical controlling whether to run the optimization even if the cache exists.} + +\item{printout}{A logical determining if the score function is printed out in each iteration of the optimization.} + +\item{method}{The method argument for \code{\link{optim}}.} + +\item{...}{Additional parameters to \code{\link{optim}}} +} +\value{ +Result object of optim(). +Parameters resulting from the optimization can be found from \code{result$par} +} +\description{ +Optimize parameters (transformation stage) of LM model +} +\details{ +This is a wrapper for \code{\link{optim}} to enable easy use of bounds and caching in the optimization. +} +\examples{ + +# Take data +D <- subset(Dbuilding, c("2010-12-15", "2011-01-01")) +D$y <- D$heatload +# Define a simple model +model <- forecastmodel$new() +model$add_inputs(Ta = "lp(Ta, a1=0.9)", + mu = "one()") +# Before fitting the model, define which points to include in the evaluation of the score function +D$scoreperiod <- in_range("2010-12-20", D$t) +# And the sequence of horizons to fit for +model$kseq <- 1:6 + +# Now we can fit the model and get the score, as it is +lm_fit(model=model, data=D, scorefun=rmse, returnanalysis=FALSE) +# Or we can change the low-pass filter coefficient +lm_fit(c(Ta__a1=0.99), model, D, rmse, returnanalysis=FALSE) + +# This could be passed to optim() (or any optimizer). +# See \code{forecastmodel$insert_prm()} for more details. +optim(c(Ta__a1=0.98), lm_fit, model=model, data=D, scorefun=rmse, returnanalysis=FALSE, + lower=c(Ta__a1=0.4), upper=c(Ta__a1=0.999), method="L-BFGS-B") + +# lm_optim is simply a helper it makes using bounds easiere and enables caching of the results +# First add bounds for lambda (lower, init, upper) +model$add_prmbounds(Ta__a1 = c(0.4, 0.98, 0.999)) + +# Now the same optimization as above can be done by +val <- lm_optim(model, D) +val + + +} +\seealso{ +\code{link{optim}} for how to control the optimization and \code{\link{rls_optim}} which works very similarly. +} diff --git a/man/lm_predict.Rd b/man/lm_predict.Rd new file mode 100644 index 0000000000000000000000000000000000000000..998f64435c0da0688b2472be897e94a34f4a4d28 --- /dev/null +++ b/man/lm_predict.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lm_predict.R +\name{lm_predict} +\alias{lm_predict} +\title{Prediction with an lm forecast model.} +\usage{ +lm_predict(model, datatr) +} +\arguments{ +\item{model}{Onlineforecast model object which has been fitted.} + +\item{datatr}{Transformed data.} +} +\value{ +The Yhat forecast matrix with a forecast for each model$kseq and for each time point in \code{datatr$t}. +} +\description{ +Use a fitted forecast model to predict its output variable with transformed data. +} +\details{ +See the ??ref(recursive updating vignette, not yet available). +} +\examples{ + + +# Take data +D <- subset(Dbuilding, c("2010-12-15", "2011-01-01")) +D$y <- D$heatload +# Define a model +model <- forecastmodel$new() +model$add_inputs(Ta = "lp(Ta, a1=0.7)", mu = "one()") + +# Before fitting the model, define which points to include in the evaluation of the score function +D$scoreperiod <- in_range("2010-12-20", D$t) +# And the sequence of horizons to fit for +model$kseq <- 1:6 + +# Transform using the mdoel +datatr <- model$transform_data(D) + +# See the transformed data +str(datatr) + +# The model has not been fitted +model$Lfits + +# To fit +lm_fit(model=model, data=D) + +# Now the fits for each horizon are there (the latest update) +# For example +summary(model$Lfits$k1) + +# Use the fit for prediction +D$Yhat <- lm_predict(model, datatr) + +# Plot it +plot_ts(D, c("y|Yhat"), kseq=1) + +} diff --git a/man/long_format.Rd b/man/long_format.Rd new file mode 100644 index 0000000000000000000000000000000000000000..05ff5ae838c7e5c96db50a9d828142b22a09675d --- /dev/null +++ b/man/long_format.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/long_format.R +\name{long_format} +\alias{long_format} +\title{Long format of prediction data.frame} +\usage{ +long_format(fit, Time = NULL) +} +\arguments{ +\item{fit}{The result from either lm_fit or rls_fit} + +\item{Time}{If the timestamps are missing from the fit object} +} +\value{ +Data.frame of when the prediction where made, also the prediction value and timestamp. +} +\description{ +Creates a long format of the predictions +} +\details{ +This functions creates a useful prediction data.frame which can be useful for analysis and plotting. +} +\examples{ + +# Take data +D <- subset(Dbuilding, c("2010-12-15", "2011-01-01")) +D$y <- D$heatload +D$scoreperiod <- in_range("2010-12-20", D$t) +# Define a model +model <- forecastmodel$new() +model$add_inputs(Ta = "Ta", + mu = "one()") +model$add_regprm("rls_prm(lambda=0.99)") +model$kseq <- 1:6 +# Fit it +fit <- rls_fit(prm=c(lambda=0.99), model, D) + +# Get the forecasts (in fit$Yhat) on long format +long_format(fit) + +} diff --git a/man/lp.Rd b/man/lp.Rd new file mode 100644 index 0000000000000000000000000000000000000000..7964f26b5dc99ba1df2eef96f010ae13307eabc5 --- /dev/null +++ b/man/lp.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lp.R +\name{lp} +\alias{lp} +\title{First-order low-pass filtering} +\usage{ +lp(X, a1, usestate = TRUE) +} +\arguments{ +\item{X}{Dataframe or matrix (or list of them) of forecasts in columns to be low-pass filtered.} + +\item{a1}{The low-pass filter coefficient.} + +\item{usestate}{logical: Use the state kept in the model$input? if \code{lp()} is used outside model$transform_data(), then it must be set to FALSE, otherwise the input$state (which is not there) will be read leading to an error.} +} +\value{ +The low-pass filtered dataframe (as a matrix) +} +\description{ +First-order low-pass filtering of a time series vector. +} +\details{ +This function applies a first order unity gain low-pass filter to the columns of \code{X}. +The low-pass filter is applied to each column seperately. The stationary gain of the filter i one. + +If a list of dataframes (or matrices) is given, then the low-pass filtering is recursively applied on each. +} +\examples{ +# Make a dataframe for the examples +X <- data.frame(k1=rep(c(0,1),each=5)) +X$k2 <- X$k1 +Xf <- lp(X, 0.5, usestate=FALSE) +Xf + +# See the input and the low-pass filtered result +plot(X$k1) +lines(Xf[ ,"k1"]) +# Slower response with higher a1 value +lines(lp(X, 0.8, usestate=FALSE)[ ,"k1"]) + +# If a list of dataframes is given, then lp() is recursively applied on each +lp(list(X,X), 0.5, usestate=FALSE) + + +} diff --git a/man/lp_vector_cpp.Rd b/man/lp_vector_cpp.Rd new file mode 100644 index 0000000000000000000000000000000000000000..4120434166a873686f4bbd90142cf908370d2245 --- /dev/null +++ b/man/lp_vector_cpp.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{lp_vector_cpp} +\alias{lp_vector_cpp} +\title{Low pass filtering of a vector.} +\arguments{ +\item{x}{A numeric vector} + +\item{a1}{the first order low-pass filter coefficient} +} +\description{ +This function returns a vector which is x through a unity gain first-order low-pass filter. +} diff --git a/man/make_input.Rd b/man/make_input.Rd new file mode 100644 index 0000000000000000000000000000000000000000..751da32ff8cc89963b65821f59e3eaa91a724dcb --- /dev/null +++ b/man/make_input.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/make_input.R +\name{make_input} +\alias{make_input} +\title{Make a forecast matrix (as data.frame) from observations.} +\usage{ +make_input(observations, kseq) +} +\arguments{ +\item{observations}{vector of observations.} + +\item{kseq}{vector of integers, respresenting the desired "k-steps ahead".} +} +\value{ +Returns a forecast matrix (as a data.frame) with simply the observation vector copied to each column. +} +\description{ +This function creates a data.frame with columns for each horizon such that it can be +added to a data.list and used in a forecast model. +} +\examples{ + +# Data for example +D <- subset(Dbuilding, c("2010-12-15","2010-12-20")) + +# Generate the input +make_input(D$heatload, 1:4) + +# Set is in D, such that it can be used in input expressions (e.g. by model$add_inputs(AR = "Ar0") +D$AR0 <- make_input(D$heatload, 1:36) + +} diff --git a/man/make_periodic.Rd b/man/make_periodic.Rd new file mode 100644 index 0000000000000000000000000000000000000000..73dfb17a6236b15396ef2ec914eff64b8adf5896 --- /dev/null +++ b/man/make_periodic.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/make_periodic.R +\name{make_periodic} +\alias{make_periodic} +\title{Make an forecast matrix with a periodic time signal.} +\usage{ +make_periodic(time, kseq, period, offset = 0, tstep = NA) +} +\arguments{ +\item{time}{vector of times of class "POSIXct" "POSIXt".} + +\item{kseq}{vector of integers, representing the desired "k-steps ahead".} + +\item{period}{a numeric setting the length of the period in seconds.} + +\item{offset}{a numeric setting an offset in the period start in seconds.} + +\item{tstep}{step time of k in seconds.} +} +\value{ +Returns a forecast matrix (data.frame) with rownames = times, colnames = k1, k2, k3, ... +The content of the data frame is the hour of day. +} +\description{ +This function creates a data.frame with k-steps-ahead values of a periodic time signal, +such that it can be added to a data.list and used inputs to a forecast model. +} +\examples{ +# Create a time sequence of 30 min sample period +tseq <- seq(ct("2019-01-01"), ct("2019-02-01 12:00"), by=1800) + +# Make the three hourly periodic sequence +make_periodic(tseq, 1:10, 3*3600) + +# With an offset of one hour +make_periodic(tseq, 1:10, 3*3600, 3600) + +} +\seealso{ +make_tday +} +\keyword{data.frame} +\keyword{lags} +\keyword{periodic} diff --git a/man/make_tday.Rd b/man/make_tday.Rd new file mode 100644 index 0000000000000000000000000000000000000000..b3a92800663cca2ad0aad9f0aca896c89b141a85 --- /dev/null +++ b/man/make_tday.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/make_tday.R +\name{make_tday} +\alias{make_tday} +\title{Make an hour-of-day forecast matrix} +\usage{ +make_tday(time, kseq, tstep = NA) +} +\arguments{ +\item{time}{vector of times of class "POSIXct" "POSIXt".} + +\item{kseq}{vector of integers, representing the desired "k-steps ahead".} + +\item{tstep}{step time of k in seconds.} +} +\value{ +Returns a forecast matrix (data.frame) with rownames = times, colnames = k1, k2, k3, ... +The content of the data frame is the hour of day. +} +\description{ +This function creates a data.frame with k-steps-ahead values of hour of day, +such that it can be added to a data.list and used inputs to a forecast model. +} +\examples{ +# Create a time sequence of 30 min sample period +tseq <- seq(ct("2019-01-01"), ct("2019-02-01 12:00"), by=1800) + +# Make the time of day sequence (assuming time between k steps is same as for tseq) +make_tday(tseq, 1:10) + +# With 0.5 hour steps, kstep in hours +make_tday(tseq, 1:10, tstep=3600) + + +} +\seealso{ +make_periodic +} +\keyword{data.frame} +\keyword{hourofday} +\keyword{lags} diff --git a/man/nams.Rd b/man/nams.Rd new file mode 100644 index 0000000000000000000000000000000000000000..1827e0cd7ad8f1c1e3e8faa5a36e0d407516b69d --- /dev/null +++ b/man/nams.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nams.R +\name{nams} +\alias{nams} +\alias{nams<-} +\title{Return the column names} +\usage{ +nams(x) + +nams(x) <- value +} +\arguments{ +\item{x}{The matrix or data.frame to set the column names for.} + +\item{value}{The names to be given.} +} +\description{ +Return the column names of a dataframe or a matrix. +} +\details{ +Simply to have a single function for returning the column names, instead of +\code{colnames()} for a \code{matrix} and \code{names()} for a \code{data.frame}). +} +\examples{ + +# Generate a matrix +X <- matrix(1, nrow=2, ncol=3) +colnames(X) <- c("c1","c2","c3") +D <- as.data.frame(X) + +# Annoyingly this fails (for a matrix) +\dontrun{names(X)} +# Could use this everywhere +colnames(D) +# but this is shorter +nams(X) +nams(D) + +# Also for assignment +nams(D) <- c("x1","x2","x3") +nams(D) + +} diff --git a/man/one.Rd b/man/one.Rd new file mode 100644 index 0000000000000000000000000000000000000000..7963af49d21ca12b923d7af02052a58b677f366b --- /dev/null +++ b/man/one.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ones.R +\name{one} +\alias{one} +\title{Create ones for model input intercept} +\usage{ +one() +} +\value{ +A data.frame of ones +} +\description{ +Returns a data.frame of ones which can be used in forecast model inputs +} +\details{ +The function returns ones which can be used to generate ones, e.g. to be used as a intercept for a model. + +See vignettes 'setup-data' and 'setup-and-use-model'. +} +\examples{ + +# A model +model <- forecastmodel$new() +# Use the function in the input definition +model$add_inputs(mu = "one()") +# Set the forecast horizons +model$kseq <- 1:4 +# During the transformation stage the one will be generated for the horizons +model$transform_data(subset(Dbuilding, 1:7)) + +} diff --git a/man/onlineforecast.Rd b/man/onlineforecast.Rd new file mode 100644 index 0000000000000000000000000000000000000000..cf53df43a84f169d0852296c2b6c14c3c3086f74 --- /dev/null +++ b/man/onlineforecast.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/onlineforecast-package.R +\docType{package} +\name{onlineforecast} +\alias{onlineforecast} +\title{Functions for online forecasting} +\description{ +Functions for online forecasting +} +\details{ +This package provides functions to for setting up forecast models which run in an online setting, e.g. like demand, solar and wind power forecasts updated regularly - often hourly, and forecasts up to 48 hours ahead. + +See the website \url{https://onlineforecasting.org} for more information. +} diff --git a/man/pairs.data.list.Rd b/man/pairs.data.list.Rd new file mode 100644 index 0000000000000000000000000000000000000000..6990e75eae475a74aee1d36072aeec38fd9e1569 --- /dev/null +++ b/man/pairs.data.list.Rd @@ -0,0 +1,65 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.list.R +\name{pairs.data.list} +\alias{pairs.data.list} +\title{Generation of pairs plot for a data.list.} +\usage{ +\method{pairs}{data.list}( + x, + subset = NA, + nms = NA, + kseq = NA, + lagforecasts = TRUE, + pattern = NA, + lower.panel = NULL, + panel = panel.smooth, + pch = 20, + cex = 0.7, + ... +) +} +\arguments{ +\item{x}{The data.list from which to plot.} + +\item{subset}{The subset to be included. Passed to \code{\link{subset.data.list}()}.} + +\item{nms}{The names of the variables to be included. Passed to \code{\link{subset.data.list}()}.} + +\item{kseq}{The horizons to be included. Passed to \code{\link{subset.data.list}()}.} + +\item{lagforecasts}{Lag the forecasts such that they are synced with obervations. Passed to \code{\link{subset.data.list}()}.} + +\item{pattern}{Regex pattern to select the included variables. Passed to \code{\link{subset.data.list}()}.} + +\item{lower.panel}{Passed to \code{\link{pairs}()}.} + +\item{panel}{Passed to \code{\link{pairs}()}.} + +\item{pch}{Passed to \code{\link{pairs}()}.} + +\item{cex}{Passed to \code{\link{pairs}()}.} + +\item{...}{Passed to \code{\link{pairs}()}.} +} +\description{ +Generate a pairs plot for the vectors in the data.list. +} +\details{ +A very useful plot for checking what is in the forecasts, how they are synced and match the observations. +} +\examples{ +# Take a subset for the example +D <- subset(Dbuilding, c("2010-12-15","2011-01-15"), pattern="^Ta|^I", kseq=1:3) +pairs(D) + +# If the forecasts and the observations are not aligned in time, +# which is easy to see by comparing to the previous plot. +pairs(D, lagforecasts=FALSE) +# Especially for the solar I syncronization is really important! +# Hence if the forecasts were not synced properly, then it can be detected using this type of plot. + +# Alternatively, lag when taking the subset +D <- subset(Dbuilding, c("2010-12-15","2011-01-15"), pattern="^Ta|^I", kseq=1:3, lagforecasts=TRUE) +pairs(D, lagforecasts=FALSE) + +} diff --git a/man/par_ts.Rd b/man/par_ts.Rd new file mode 100644 index 0000000000000000000000000000000000000000..eb4f515a90411a3512be510b9a5849bdcfcf2074 --- /dev/null +++ b/man/par_ts.Rd @@ -0,0 +1,94 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/par_ts.R +\name{par_ts} +\alias{par_ts} +\title{Set parameters for \code{\link{plot_ts}()}} +\usage{ +par_ts(fromoptions = FALSE, p = NA, ...) +} +\arguments{ +\item{fromoptions}{logical: Read the list of parameters set in \code{\link{options}("par_ts")$par_ts}, then the additional parameters set in \code{...} are replaced before the list is returned.} + +\item{p}{List of the parameters, as returned by the function itself. If given, the additional parameters set in \code{...} are replaced before the list is returned.} + +\item{...}{any of the following parameters can be set replacing the default values: + +\code{xnm} "t": The name of the time + +\code{legendspace} 10: +Horizontal space for the lengend in character spaces + +\code{legendcex} 1: Scaling of the legend + +\code{legendrangeshow} TRUE: Include the range for each variable in the legend + +\code{ylimextend} c(lower,upper): Extend the ylim for each plot with a proportion, seperately for the lower and upper limit + +\code{yaxisextend} c(lower,upper): Extend the yaxis for each plot with a proportion, seperately for the lower and upper limit + +\code{mainsline} (numeric): with the \code{line} for the main in the plots. + +\code{cex} (numeric): The cex to use for the \code{plot_ts} plots. + +\code{plotfun}: The function used for plotting, as default \code{lines}. + +\code{xaxisformat} (character): The format of the xaxis, see \code{\link{strptime}()}. + +\code{colorramp} colorRampPalette: The colorramp used for setting multiple colors in each plot} +} +\value{ +A list of the parameters above, which can be set globally (see examples) or passed to \code{\link{plot_ts}}. +} +\description{ +Set parameters for \code{\link{plot_ts}()} globally +} +\details{ +Often in a report some plot parameters must be set for all plots, which is done with \code{\link{par}()}. + +The parameters which are general for \code{\link{plot_ts}()} can be set and saved in \code{\link{options}()}, +and they will then be applied as default in all calls to plot_ts(). See the examples how to do this. + +If any of these parameters are given to \code{\link{plot_ts}()}, then it will be used over the default. +} +\examples{ + +# Data for plots +D <- subset(Dbuilding, 1:192) + +# See the parameters which can be set +p <- par_ts() +names(p) +p$xnm + +# Using the default values +plot_ts(D, c("heatload","Ta"), kseq=1:24) + +# Set the parameters directly +plot_ts(D, c("heatload","Ta"), kseq=1:24, legendcex=0.8, legendspace=8) + +# Set parameters to be given in a list +p <- par_ts() +p$legendcex <- 0.8 +p$legendspace <- 8 + +# Use those parameters +plot_ts(D, c("heatload","Ta"), kseq=1:24, p=p) + +# Set globally (if not set specifed the default values will be used) +options(par_ts=p) + +# Now the global parameters will be used +plot_ts(D, c("heatload","Ta"), kseq=1:24) + +# Still providing a parameter directly it will used, e.g. change the plotting function +plot_ts(D, c("heatload","Ta"), kseq=1:24, plotfun=points) + +# Control more precisely the plotting function +plot_ts(D, c("heatload","Ta"), kseq=1:24, plotfun=function(x, ...){ points(x, type="b", ...)}) + +# Another colorramp function +p$colorramp <- rainbow +options(par_ts=p) +plot_ts(D, c("heatload","Ta"), kseq=1:24) + +} diff --git a/man/pbspline.Rd b/man/pbspline.Rd new file mode 100644 index 0000000000000000000000000000000000000000..3a35d3b71d8bbf8b571234525595e35735d0e4fd --- /dev/null +++ b/man/pbspline.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bspline.R +\name{pbspline} +\alias{pbspline} +\title{Wrapper for \code{bspline} with \code{periodic=TRUE}} +\usage{ +pbspline( + X, + Boundary.knots = NA, + intercept = FALSE, + df = NULL, + knots = NULL, + degree = 3, + bknots = NA +) +} +\arguments{ +\item{X}{see \code{?bspline}} + +\item{Boundary.knots}{see \code{?bspline}} + +\item{intercept}{see \code{?bspline}} + +\item{df}{see \code{?bspline}} + +\item{knots}{see \code{?bspline}} + +\item{degree}{see \code{?bspline}} + +\item{bknots}{see \code{?bspline}} +} +\description{ +Wrapper for \code{bspline} with \code{periodic=TRUE} +} +\details{ +Simply a wrapper. +} +\seealso{ +Other Transform stage functions: +\code{\link{bspline}()} +} +\concept{Transform stage functions} diff --git a/man/persistence.Rd b/man/persistence.Rd new file mode 100644 index 0000000000000000000000000000000000000000..d7c13d7f6f20003daa60697e9235320de8053e02 --- /dev/null +++ b/man/persistence.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/persistence.R +\name{persistence} +\alias{persistence} +\title{Generate persistence forecasts} +\usage{ +persistence(y, kseq, perlen = NA) +} +\arguments{ +\item{y}{(numeric) The model output to be forecasted.} + +\item{kseq}{(integer) The horizons to be forecasted.} + +\item{perlen}{(integer) The period length for seasonal persistence.} +} +\value{ +Forecast matrix as a \code{data.frame} (named \code{Yhat} in similar functions) +} +\description{ +Generate persistence and periodic persistence forecasts +} +\details{ +Generate a forecast matrix using persistence. The simple persistence is with the current value of y, i.e. the value at time t is used as forecast + +A seasonal persistence with a specific period can be generated by setting the argument \code{perlen} to the length of the period in steps. The value used for the forecast is then the latest available, which is matches the seasonality for time t+k, see the examples. +} +\examples{ + +# Simple persistence just copies the current value for the forecasts +persistence(1:10, kseq=1:4) + +# Seasonal persistence takes the value perlen steps back +persistence(1:10, kseq=1:4, perlen=4) + +# If the horizons are longer than perlen, then the perlen*i steps back is taken (i is an integer) +persistence(1:10, kseq=1:12, perlen=4) + + +} diff --git a/man/plot_ts.Rd b/man/plot_ts.Rd new file mode 100644 index 0000000000000000000000000000000000000000..0aa23692515eca12dca4398a6b8020e8b0949b3f --- /dev/null +++ b/man/plot_ts.Rd @@ -0,0 +1,245 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_ts.R, R/plotly_ts.R +\name{plot_ts} +\alias{plot_ts} +\alias{plot_ts.data.list} +\alias{plot_ts.data.frame} +\alias{plot_ts.matrix} +\alias{plot_ts.rls_fit} +\alias{plotly_ts} +\title{Time series plotting} +\usage{ +plot_ts( + object, + patterns = ".*", + xlim = NA, + ylims = NA, + xlab = "", + ylabs = NA, + mains = "", + mainouter = "", + legendtexts = NA, + colormaps = NA, + xat = NA, + usely = FALSE, + plotit = TRUE, + p = NA, + ... +) + +\method{plot_ts}{data.list}( + object, + patterns = ".*", + xlim = NA, + ylims = NA, + xlab = "", + ylabs = NA, + mains = "", + mainouter = "", + legendtexts = NA, + colormaps = NA, + xat = NA, + usely = FALSE, + plotit = TRUE, + p = NA, + kseq = NA, + ... +) + +\method{plot_ts}{data.frame}( + object, + patterns = ".*", + xlim = NA, + ylims = NA, + xlab = "", + ylabs = NA, + mains = NA, + mainouter = "", + legendtexts = NA, + colormaps = NA, + xat = NA, + usely = FALSE, + plotit = TRUE, + p = NA, + namesdata = NA, + ... +) + +\method{plot_ts}{matrix}( + object, + patterns = ".*", + xlim = NA, + ylims = NA, + xlab = "", + ylabs = NA, + mains = NA, + mainouter = "", + legendtexts = NA, + colormaps = NA, + xat = NA, + usely = FALSE, + plotit = TRUE, + p = NA, + namesdata = NA, + ... +) + +\method{plot_ts}{rls_fit}( + object, + patterns = c("^y$|^Yhat$", "^Residuals$", "CumAbsResiduals$", pst("^", + names(fit$Lfitval[[1]]), "$")), + xlim = NA, + ylims = NA, + xlab = "", + ylabs = NA, + mains = "", + mainouter = "", + legendtexts = NA, + colormaps = NA, + xat = NA, + usely = FALSE, + plotit = TRUE, + p = NA, + kseq = NA, + ... +) + +plotly_ts( + object, + patterns = ".*", + xlim = NA, + ylims = NA, + xlab = "", + ylabs = NA, + mains = "", + mainouter = "", + legendtexts = NA, + colormaps = NA, + xat = NA, + usely = FALSE, + p = NA, + ... +) +} +\arguments{ +\item{object}{A \code{data.list} or \code{data.frame} with observations and forecasts, note diffe} + +\item{patterns}{See \code{\link{plot_ts}}. The default pattern finds the generated series in the function, '!!RLSinputs!!' will be replaced with the names of the RLS inputs (regression stage inputs).} + +\item{xlim}{The time range as a character of length 2 and form "YYYY-MM-DD" or POSIX. Date to start and end the plot.} + +\item{ylims}{The \code{ylim} for each plot given in a list.} + +\item{xlab}{A character with the label for the x-axis.} + +\item{ylabs}{A character vector with labels for the y-axes.} + +\item{mains}{A character vector with the main for each plot.} + +\item{mainouter}{A character with the main at the top of the plot (can also be added afterwards with \code{title(main, outer=TRUE)}).} + +\item{legendtexts}{A list with the legend texts for each plot (replaces the names of the variables).} + +\item{colormaps}{A list of colormaps, which will be used in each plot.} + +\item{xat}{POSIXt specifying where the ticks on x-axis should be put.} + +\item{usely}{If TRUE then plotly will be used.} + +\item{plotit}{If FALSE then the plot will not be generated, only data returned.} + +\item{p}{The plot_ts parameters in a list, as generated with the function \code{\link{par_ts}()}.} + +\item{...}{Parameters passed to \code{\link{par_ts}}, see the list of parameters in \code{?\link{par_ts}}.} + +\item{kseq}{For \code{class(object)=="data.list"} an integer vector, default = NA. Control which forecast horizons to include in the plots. If NA all the horizons will be included.} + +\item{namesdata}{For \code{class(object)=="data.frame"} a character vector. Names of columns in object to be searched in, instead of \code{names(object)}.} + +\item{fit}{An \code{rls_fit}.} +} +\value{ +A list with a data.frame with the data for each plot, if usely=TRUE, then a list of the figures (drawn with print(subplot(L, shareX=TRUE, nrows=length(L), titleY = TRUE))). + +The plotted data in a \code{data.list}. +} +\description{ +Plot time series of observations and forecasts (lagged to be aligned in time). + +Plot forecasts, residuals, cumulated residuals and RLS coefficients + +Simply the same as \code{\link{plot_ts}()} with \code{usely=TRUE}, such that plotly is used. +} +\details{ +Generates time series plots depending on the variables matched by each regular expression given in the \code{patterns} argument. + +The forecasts matrices in the \code{data.list} given in \code{object} will be lagged to be aligned in time (i.e. k-step forecasts will be lagged by k). + +Use the plotly package if argument \code{usely} is TRUE, see \code{\link{plotly_ts}()}. + +A useful plot for residual analysis and model validation of an RLS fitted forecast model. + +All parameters, except those described below, are simply passed to \code{\link{plot_ts}()}. + +The \code{plotly} package must be installed and loaded. + +Note that the plot parameters set with \code{\link{par_ts}()} have no effect on the \code{plotly} plots. + +See \url{https://onlineforecasting.org/vignettes/nice-tricks.html}. +} +\examples{ + +# Time series plots for \code{data.list}, same as for \code{data.frame} except use of \code{kseq} +D <- Dbuilding +plot_ts(D, c("heatload","Ta"), kseq=c(1,24)) +# Make two plots (and set the space for the legend) +plot_ts(D, c("heatload","Ta"), kseq=c(1,24), legendspace=11) +# Only the Ta observations +plot_ts(D, c("heatload","Taobs$"), kseq=c(1,24), legendspace=11) + +# Give labels +plot_ts(D, c("heatload","Ta"), kseq=c(1,24), xlab="Time", ylabs=c("Heat (kW)","Temperature (C)")) +# Mains (see mainsline in par_ts()) +plot_ts(D, c("heatload","Ta"), kseq=c(1,24), mains=c("Heatload","Temperature"), mainsline=c(-1,-2)) + +# Format of the xaxis (see par_ts()) +plot_ts(D, c("heatload","Ta"), kseq=c(1,24), xaxisformat="\%Y-\%m-\%d \%H:\%m") + +# Return the data, for other plots etc. +L <- plot_ts(D, c("heatload","Ta"), kseq=c(1,24)) +names(L[[1]]) +names(L[[2]]) + + + +# Fit a model (see vignette 'setup-and-use-model' +D <- Dbuilding +D$scoreperiod <- in_range("2010-12-22", D$t) +model <- forecastmodel$new() +model$output = "heatload" +model$add_inputs(Ta = "Ta", + mu = "one()") +model$add_regprm("rls_prm(lambda=0.9)") +model$kseq <- c(3,18) +fit1 <- rls_fit(NA, model, D, returnanalysis = TRUE) + +# Plot it +plot_ts(fit1) + +# Return the data +Dplot <- plot_ts(fit1) + +# The RLS coefficients are now in a nice format +head(Dplot$mu) + + +# See the website link above + +} +\seealso{ +\code{\link{par_ts}} for setting plot control parameters. + +\code{\link{regex}} for regular expressions to select which variables to plot. + +\code{\link{plot_ts}}. +} diff --git a/man/print.forecastmodel.Rd b/man/print.forecastmodel.Rd new file mode 100644 index 0000000000000000000000000000000000000000..2e4b8139885f93a4d6b2228c1876f8966da3be4b --- /dev/null +++ b/man/print.forecastmodel.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/forecastmodel.R +\name{print.forecastmodel} +\alias{print.forecastmodel} +\title{Print forecast model} +\usage{ +\method{print}{forecastmodel}(x, ...) +} +\arguments{ +\item{x}{A forecastmodel object} + +\item{...}{Not used.} +} +\description{ +Prints a forecast model +} +\details{ +A simple print out of the model output and inputs +} diff --git a/man/print_to_message.Rd b/man/print_to_message.Rd new file mode 100644 index 0000000000000000000000000000000000000000..8ac0089cc5f51256c42a86d16c8b3578e186c05b --- /dev/null +++ b/man/print_to_message.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/print_to_message.R +\name{print_to_message} +\alias{print_to_message} +\title{Simple function for capturing from the print function and send it in a message().} +\usage{ +print_to_message(...) +} +\arguments{ +\item{...}{Passed to print which passed to message.} +} +\description{ +Simple function for capturing from the print function and send it in a message(). +} diff --git a/man/pst.Rd b/man/pst.Rd new file mode 100644 index 0000000000000000000000000000000000000000..ed9d42c47460005d2814be9d23a90cbc4ae19bed --- /dev/null +++ b/man/pst.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pst.R +\name{pst} +\alias{pst} +\title{Simple wrapper for paste0().} +\usage{ +pst(...) +} +\arguments{ +\item{...}{Passed to paste0().} +} +\description{ +Simple wrapper for paste0(). +} diff --git a/man/resample.Rd b/man/resample.Rd new file mode 100644 index 0000000000000000000000000000000000000000..857abbd8c133ef2ddc922f1ef12287eb4eac78d2 --- /dev/null +++ b/man/resample.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/resample.R +\name{resample} +\alias{resample} +\title{Resampling to equidistant time series} +\usage{ +resample( + object, + ts, + tstart = NA, + tend = NA, + timename = "t", + fun = mean, + quantizetime = TRUE, + ... +) +} +\arguments{ +\item{object}{Can be data.frame} + +\item{ts}{(numeric) New sample period in seconds} + +\item{tstart}{A POSIXxx (or charater or numeric), which indicates the first time point in the series returned} + +\item{tend}{A POSIXxx (or charater or numeric), which indicates the last time point in the series returned} + +\item{timename}{(character) The name of the time column in object} + +\item{fun}{(function) The function of apply. Default is mean, such that average values are obtained} + +\item{quantizetime}{(logical) Should the new time points be set to the end of the time intervals, or should they also be the result of the fun function} + +\item{...}{Passed on to the fun function} +} +\value{ +A downsampled data.frame +} +\description{ +Make a downsampling to a lower sampling frequency +} +\details{ +Given an object with a column indicating the time points of the observations the +function returns a similar object, where the function is applied for each new (and longer) +interval. + +Typically it is used if for example 15 minute values should be made into 1 hour values. + +NOTE that it is always assumed that the time point is at the end of the time interval, +e.g. if hourly values are returned, then "2019-01-01 01:00" indicates the first hour in 2019. + +All time points at the time point (border) of between two intervals is assigned to the +first interval of the two. +} +\examples{ + +# Generate some test data with 10 minutes sampling frequency for one day +X <- data.frame(t=seq(ct("2019-01-01 00:10"),ct("2019-01-02"), by=10*60)) + +# A single sine over the day +X$val <- sin(as.numeric(X$t)/3600*2*pi/(24)) + +# Resample to hourly average values +Xre <- resample(X, 3600) +plot(X$t, X$val) +lines(Xre$t, Xre$val, type="b", col=2) + +# Resample to hourly max values +Xre <- resample(X, 3600, fun=max) +lines(Xre$t, Xre$val, type="b", col=3) + +# Another starting time point +Xre <- resample(X, 3600, tstart="2019-01-01 00:30") +lines(Xre$t, Xre$val, type="b", col=4) + + +} diff --git a/man/residuals.Rd b/man/residuals.Rd new file mode 100644 index 0000000000000000000000000000000000000000..5d6512e21da9ded58c77dafcfc27f2200c55fa63 --- /dev/null +++ b/man/residuals.Rd @@ -0,0 +1,66 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/residuals.R +\name{residuals.data.frame} +\alias{residuals.data.frame} +\alias{residuals.matrix} +\alias{residuals.list} +\alias{residuals.forecastmodel_fit} +\title{Calculate the residuals given a forecast matrix and the observations.} +\usage{ +\method{residuals}{data.frame}(object, y, ...) + +\method{residuals}{matrix}(object, y, ...) + +\method{residuals}{list}(object, y, ...) + +\method{residuals}{forecastmodel_fit}(object, ...) +} +\arguments{ +\item{object}{The forecast matrix (a data.frame with kxx as column names, Yhat in returned fits).} + +\item{y}{The observations vector.} + +\item{...}{Not used.} +} +\value{ +If object is a matrix or data.frame: a data.frame with the residuals for each horizon. +If object is a list: A list with residuals from each element. +} +\description{ +Calculate the residuals given a forecast matrix and the observations. +} +\details{ +Simply give the forecast matrix and the observations to get the residuals for each horizon in the forecast matrix. + +The residuals returned are synced with the observations (i.e. k0) and the columns are names "hxx" (not kxx) to indicate this and will not be lagged in \code{\link{plot_ts}()}. +} +\examples{ +# ?? list example +# Just a vector to be forecasted +n <- 100 +D <- data.list() +D$t <- 1:n +D$y <- c(filter(rnorm(n), 0.95, "recursive")) +plot(D$y, type="l") + +# Generate a forecast matrix with a simple persistence model +D$Yhat <- persistence(D$y, kseq=1:4) + +# The residuals for each horizon +D$Resid <- residuals(D$Yhat, D$y) +D$Resid +# Note the names of the columns +names(D$Resid) +# which means that they are aligned with the observations and will not be lagged in the plot +plot_ts(D, c("y|Yhat","Resid")) + +# Check that it matches (the forecasts is lagged in the plot_ts +# such that the forecast for t+k is at t+k (and not t)) +plot_ts(D, c("y|Yhat","Resid"), xlim=c(1,10), kseq=1, + plotfun=function(x,...){lines(x,...,type="b")}) + +# Just for fun, see the auto-correlation function of the persistence +acf(D$Resid$h1, na.action=na.pass) +acf(D$Resid$h4, na.action=na.pass) + +} diff --git a/man/rls_fit.Rd b/man/rls_fit.Rd new file mode 100644 index 0000000000000000000000000000000000000000..eed3348a3aa17e0bfa153af8b63189bf1e456e61 --- /dev/null +++ b/man/rls_fit.Rd @@ -0,0 +1,136 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rls_fit.R +\name{rls_fit} +\alias{rls_fit} +\title{Fit an onlineforecast model with Recursive Least Squares (RLS).} +\usage{ +rls_fit( + prm = NA, + model, + data, + scorefun = NA, + returnanalysis = TRUE, + runcpp = TRUE, + printout = TRUE +) +} +\arguments{ +\item{prm}{vector with the parameters for fitting. Deliberately as the first element to be able to use \code{\link{optim}} or other optimizer. If NA then the model will be fitted with the current values in the input expressions, see examples.} + +\item{model}{as an object of class forecastmodel: The model to be fitted.} + +\item{data}{as a data.list with the data to fit the model on.} + +\item{scorefun}{as a function (optional), default is \code{\link{rmse}}. If the score function is given it will be applied to the residuals of each horizon (only data$scoreperiod is included).} + +\item{returnanalysis}{as a logical. If FALSE then the sum of the scoreval on all horizons are returned, if TRUE a list with values for analysis.} + +\item{runcpp}{logical: If true the c++ implementation of RLS is run, if false the R implementation is run (slower).} + +\item{printout}{logical: If TRUE the offline parameters and the score function value are printed.} +} +\value{ +Depends on: + + - If \code{returnanalysis} is TRUE a list containing: + + * \code{Yhat}: data.frame with forecasts for \code{model$kseq} horizons. + + * \code{model}: The forecastmodel object cloned deep, so can be modified without changing the original object. + + * \code{data}: data.list with the data used, see examples on how to obtain the transformed data. + + * \code{Lfitval}: list with RLS coefficients in a data.frame for each horizon, use \code{\link{plot_ts.rls_fit}} to plot them and to obtain them as a data.frame for each coefficient. + + * \code{scoreval}: data.frame with the scorefun result on each horizon (only scoreperiod is included). + + - If \code{returnanalysis} is FALSE (and \code{scorefun} is given): The sum of the score function on all horizons (specified with model$kseq). +} +\description{ +This function fits the onlineforecast model to the data and returns either: model validation data or just the score value. +} +\details{ +This function has three main purposes (in the examples these three are demonstrated in the examples): + +- Returning model validation data, such as residuals and recursive estimated parameters. + +- For optimizing the parameters using an R optimizer function. The parameters to optimize for is given in \code{prm} + +- Fitting a model to data and saving the final state in the model object (such that from that point the model can be updated recursively as new data is received). + +Note, if the \code{scorefun} is given the \code{data$scoreperiod} must be set to (int or logical) define which points to be evaluated in the scorefun. +} +\examples{ + + +# Take data +D <- subset(Dbuilding, c("2010-12-15", "2011-01-01")) +D$y <- D$heatload +# Define a simple model +model <- forecastmodel$new() +model$output <- "y" +model$add_inputs(Ta = "Ta", + mu = "one()") +model$add_regprm("rls_prm(lambda=0.99)") + +# Before fitting the model, define which points to include in the evaluation of the score function +D$scoreperiod <- in_range("2010-12-20", D$t) +# And the sequence of horizons to fit for +model$kseq <- 1:6 + +# Now we can fit the model with RLS and get the model validation analysis data +fit <- rls_fit(model = model, data = D) +# What did we get back? +names(fit) +# The one-step forecast +plot(D$y, type="l") +lines(fit$Yhat$k1, col=2) +# The one-step RLS coefficients over time (Lfitval is a list of the fits for each horizon) +plot(fit$Lfitval$k1$Ta, type="l") + +# A summary +summary(fit) +# Plot the fit +plot_ts(fit, kseq=1) + +# Fitting with lower lambda makes the RLS coefficients change faster +fit2 <- rls_fit(prm = c(lambda=0.9), model, D) +plot_ts(fit2, kseq=1) + + +# It can return a score +rls_fit(c(lambda=0.9), model, D, scorefun=rmse, returnanalysis=FALSE) + +# Such that it can be passed to an optimzer (see ?rls_optim for a nice wrapper of optim) +val <- optim(c(lambda=0.99), rls_fit, model = model, data = D, scorefun = rmse, + returnanalysis=FALSE) +val$par +# Which can then simply be applied +rls_fit(val$par, model, D, scorefun=rmse, returnanalysis=FALSE) +# see ?rls_optim, how optim is wrapped for a little easiere use + +# See rmse as a function of horizon +fit <- rls_fit(val$par, model, D, scorefun = rmse) +plot(fit$scoreval, xlab="Horizon k", ylab="RMSE") +# See ?score for a little more consistent way of calculating this + + +# Try adding a low-pass filter to Ta +model$add_inputs(Ta = "lp(Ta, a1=0.92)") +# To obtain the transformed data, i.e. the data which is used as input to the RLS +model$reset_state() +# Generate the the transformed data +datatr <- model$transform_data(D) +# What did we get? +str(datatr) +# See the effect of low-pass filtering +plot(D$Ta$k1, type="l") +lines(datatr$Ta$k1, col=2) +# Try changing the 'a1' coefficient and rerun +# ?rls_optim for how to optimize also this coefficient + + +} +\seealso{ +For optimizing parameters \code{\link{rls_optim}()}, for summary \code{summary.rls_fit}, for plotting \code{\link{plot_ts.rls_fit}()}, and the other functions starting with 'rls_'. +} diff --git a/man/rls_optim.Rd b/man/rls_optim.Rd new file mode 100644 index 0000000000000000000000000000000000000000..258dfa16fe3746f14b178b900f8c411bee8fbae0 --- /dev/null +++ b/man/rls_optim.Rd @@ -0,0 +1,89 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rls_optim.R +\name{rls_optim} +\alias{rls_optim} +\title{Optimize parameters for onlineforecast model fitted with RLS} +\usage{ +rls_optim( + model, + data, + kseq = NA, + scorefun = rmse, + cachedir = "", + cachererun = FALSE, + printout = TRUE, + method = "L-BFGS-B", + ... +) +} +\arguments{ +\item{model}{The onlineforecast model, including inputs, output, kseq, p} + +\item{data}{The data.list which holds the data on which the model is fitted.} + +\item{kseq}{The horizons to fit for (if not set, then model$kseq is used)} + +\item{scorefun}{The function to be score used for calculating the score to be optimized.} + +\item{cachedir}{A character specifying the path (and prefix) of the cache file name. If set to \code{""}, then no cache will be loaded or written. See \url{https://onlineforecasting.org/vignettes/nice-tricks.html} for examples.} + +\item{cachererun}{A logical controlling whether to run the optimization even if the cache exists.} + +\item{printout}{A logical determining if the score function is printed out in each iteration of the optimization.} + +\item{method}{The method argument for \code{\link{optim}}.} + +\item{...}{Additional parameters to \code{\link{optim}}} +} +\value{ +Result object of optim(). +Parameters resulting from the optimization can be found from \code{result$par} +} +\description{ +Optimize parameters (transformation stage) of RLS model +} +\details{ +This is a wrapper for \code{\link{optim}} to enable easy use of bounds and caching in the optimization. + +One smart trick, is to cache the optimization results. Caching can be done by providing a path to the +\code{cachedir} argument (relative to the current working directory). +E.g. \code{rls_optim(model, D, cachedir="cache")} will write a file in the folder 'cache', such that +next time the same call is carried out, then the file is read instead of running the optimization again. +See the example in \url{https://onlineforecasting.org/vignettes/nice-tricks.html}. +} +\examples{ + +# Take data +D <- subset(Dbuilding, c("2010-12-15", "2011-01-01")) +D$y <- D$heatload +# Define a simple model +model <- forecastmodel$new() +model$add_inputs(Ta = "Ta", mu = "one()") +model$add_regprm("rls_prm(lambda=0.99)") + +# Before fitting the model, define which points to include in the evaluation of the score function +D$scoreperiod <- in_range("2010-12-20", D$t) +# And the sequence of horizons to fit for +model$kseq <- 1:6 +# Now we can fit the model and get the score, as it is +rls_fit(model=model, data=D, scorefun=rmse, returnanalysis=FALSE) +# Or we can change the lambda +rls_fit(c(lambda=0.9), model, D, rmse, returnanalysis=FALSE) + +# This could be passed to optim() (or any optimizer, see forecastmodel$insert_prm()). +optim(c(lambda=0.98), rls_fit, model=model, data=D, scorefun=rmse, returnanalysis=FALSE, + lower=c(lambda=0.9), upper=c(lambda=0.999), method="L-BFGS-B") + +# rls_optim is simply a helper, it's makes using bounds easiere and enables caching of the results +# First add bounds for lambda (lower, init, upper) +model$add_prmbounds(lambda = c(0.9, 0.98, 0.999)) + +# Now the same optimization as above can be done by +val <- rls_optim(model, D) +val + + +} +\seealso{ +\code{link{optim}} for how to control the optimization. +} diff --git a/man/rls_predict.Rd b/man/rls_predict.Rd new file mode 100644 index 0000000000000000000000000000000000000000..240a7591e702fe1afea13f4492a8caa23e866fc2 --- /dev/null +++ b/man/rls_predict.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rls_predict.R +\name{rls_predict} +\alias{rls_predict} +\title{Prediction with an rls model.} +\usage{ +rls_predict(model, datatr = NA) +} +\arguments{ +\item{model}{Onlineforecast model object which has been fitted.} + +\item{datatr}{Transformed data.} +} +\value{ +The Yhat forecast matrix with a forecast for each model$kseq and for each time point in \code{datatr$t}. +} +\description{ +Use a fitted forecast model to predict its output variable with transformed data. +} +\details{ +See the ??ref(recursive updating vignette, not yet available). +} +\examples{ + +# Take data +D <- subset(Dbuilding, c("2010-12-15", "2011-01-01")) +D$y <- D$heatload +# Define a simple model +model <- forecastmodel$new() +model$add_inputs(Ta = "Ta", mu = "one()") +model$add_regprm("rls_prm(lambda=0.99)") + +# Before fitting the model, define which points to include in the evaluation of the score function +D$scoreperiod <- in_range("2010-12-20", D$t) +# And the sequence of horizons to fit for +model$kseq <- 1:6 + +# Transform using the mdoel +datatr <- model$transform_data(D) + +# See the transformed data +str(datatr) + +# The model has not been fitted +model$Lfits + +# To fit +rls_fit(model=model, data=D) + +# Now the fits for each horizon are there (the latest update) +# For example the current parameter estimates +model$Lfits$k1$theta + +# Use the current values for prediction +D$Yhat <- rls_predict(model, datatr) + +# Plot it +plot_ts(D, c("y|Yhat"), kseq=1) + +# Recursive updating and prediction +Dnew <- subset(Dbuilding, c("2011-01-01", "2011-01-02")) + +for(i in 1:length(Dnew$t)){ + # New data arrives + Dt <- subset(Dnew, i) + # Remember that the transformation must only be done once if some transformation + # which is has a state, e.g. lp(), is used + datatr <- model$transform_data(Dt) + # Update, remember that this must only be once for each new point + # (it updates the parameter estimates, i.e. model$Lfits) + rls_update(model, datatr, Dt$heatload) + # Now predict to generate the new forecast + print(rls_predict(model, datatr)) +} + +} diff --git a/man/rls_prm.Rd b/man/rls_prm.Rd new file mode 100644 index 0000000000000000000000000000000000000000..06fbe508dff4a7f6c20b1c417388463ec3f1f2d2 --- /dev/null +++ b/man/rls_prm.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rls_prm.R +\name{rls_prm} +\alias{rls_prm} +\title{Function for generating the parameters for RLS regression} +\usage{ +rls_prm(lambda) +} +\arguments{ +\item{lambda}{The forgetting factor} +} +\value{ +A list of the parameters +} +\description{ +Function for generating the parameters for RLS regression +} +\details{ +The RLS needs only a forgetting factor parameter. +} +\examples{ + +# Take data +D <- subset(Dbuilding, c("2010-12-15", "2011-01-01")) +D$y <- D$heatload +D$scoreperiod <- in_range("2010-12-20", D$t) +# Define a simple model +model <- forecastmodel$new() +model$add_inputs(Ta = "Ta", mu = "one()") +model$kseq <- 1:6 + +# Here the expression which sets the parameters is defined +model$add_regprm("rls_prm(lambda=0.99)") +model$regprmexpr + +# These will fit with lambda=0.99 +rls_fit(prm=NA, model, D) +rls_fit(prm=c(lambda=0.99), model, D) + +# The expression is evaluated when the model is fitted +rls_fit(prm=c(lambda=0.85), model, D) + +# What happens is simply that the expression was manipulated +model$regprmexpr +model$regprm + +# Same change could be done by +model$regprm <- list(lambda=0.3) +model$regprm +val <- rls_fit(prm=NA, model, D) + +} diff --git a/man/rls_summary.Rd b/man/rls_summary.Rd new file mode 100644 index 0000000000000000000000000000000000000000..f8aa6b197b68f26d6f5745eb40fa3dab5f665be6 --- /dev/null +++ b/man/rls_summary.Rd @@ -0,0 +1,78 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rls_summary.R +\name{rls_summary} +\alias{rls_summary} +\title{Print summary of an onlineforecast model fitted with RLS} +\usage{ +rls_summary(object, scoreperiod = NA, scorefun = rmse, printit = TRUE, ...) +} +\arguments{ +\item{object}{of class \code{rls_fit}, so a fit calculated by \code{\link{rls_fit}}.} + +\item{scoreperiod}{logical (or index). If this scoreperiod is given, then it will be used over the one in the fit.} + +\item{scorefun}{The score function to be applied on each horizon.} + +\item{printit}{Print the result.} + +\item{...}{Not used.} +} +\value{ +A list of: + + - scorefun. + + - scoreval (value of the scorefun for each horizon). + + - scoreperiod is the scoreperiod used. +} +\description{ +The summary of an onlineforecast model fitted with RLS with simple stats providing a simple overview. +} +\details{ +The following is printed: + +* The model. + +* Number of observations included in the scoreperiod. + +* RLS coefficients summary statistics for the estimated coefficient time series (since observations are correlated, then usual statistics cannot be applied directly): + + - mean: the sample mean of the series. + + - sd: sample standard deviation of the series. + + - min: minimum of the series. + + - max: maximum of the series. + +* Scorefunction applied for each horizon, per default the RMSE. +} +\examples{ + +# Take data +D <- subset(Dbuilding, c("2010-12-15", "2011-01-01")) +D$y <- D$heatload +D$scoreperiod <- in_range("2010-12-20", D$t) +# Define a model +model <- forecastmodel$new() +model$add_inputs(Ta = "Ta", + mu = "one()") +model$add_regprm("rls_prm(lambda=0.99)") +model$kseq <- 1:6 +# Fit it +fit <- rls_fit(prm=c(lambda=0.99), model, D) + +# Print the summary +summary(fit) +# We see: +# - The model (output, inputs, lambda) +# - The Ta coefficient is around -0.12 in average (for all horizons) with a standard dev. of 0.03, +# so not varying extremely (between -0.18 and -0.027). +# - The intercept mu is around 5.5 and varying very little. +# - The RMSE is around 0.9 for all horizons. + +# The residuals and coefficient series can be seen by +plot_ts(fit) + +} diff --git a/man/rls_update.Rd b/man/rls_update.Rd new file mode 100644 index 0000000000000000000000000000000000000000..613dfdf05626371106d3553ae47a19d8c0d06d0f --- /dev/null +++ b/man/rls_update.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rls_update.R +\name{rls_update} +\alias{rls_update} +\title{Updates the model fits} +\usage{ +rls_update(model, datatr = NA, y = NA, runcpp = TRUE) +} +\arguments{ +\item{model}{A model object} + +\item{datatr}{a data.list with transformed data (from model$transform_data(D))} + +\item{y}{A vector of the model output for the corresponding time steps in \code{datatr}} + +\item{runcpp}{Optional, default = TRUE. If TRUE, a c++ implementation of the update is run, otherwise a slower R implementation is used.} +} +\value{ +Returns a named list for each horizon (\code{model$kseq}) containing the variables needed for the RLS fit (for each horizon, which is saved in model$Lfits): + +It will update variables in the forecast model object. +} +\description{ +Calculates the RLS update of the model coefficients with the provived data. +} +\details{ +See vignette ??ref(recursive updating, not yet finished) on how to use the function. +} +\examples{ + +# See rls_predict examples + +} +\seealso{ +See \code{\link{rls_predict}}. +} diff --git a/man/rls_update_cpp.Rd b/man/rls_update_cpp.Rd new file mode 100644 index 0000000000000000000000000000000000000000..38b4978c0f8ab5359a75094df4bcd3522413a59d --- /dev/null +++ b/man/rls_update_cpp.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{rls_update_cpp} +\alias{rls_update_cpp} +\title{Calculating k-step recursive least squares estimates} +\arguments{ +\item{y}{Vector of observation} + +\item{X}{Matrix of input variables (design matrix)} + +\item{theta}{Vector of parameters (initial value)} + +\item{P}{Covariance matrix (initial value)} + +\item{lambda}{Forgetting factor} + +\item{k}{Forecast horizon} + +\item{n}{Length of the input} + +\item{np}{Dimension of P (np x np)} + +\item{istart}{Start index} + +\item{kmax}{Keep only the last kmax rows for next time} +} +\description{ +This function applies the k-step recursive least squares scheme to estimate +parameters in a linear regression model. +} diff --git a/man/rmse.Rd b/man/rmse.Rd new file mode 100644 index 0000000000000000000000000000000000000000..1fe836ca91fda21e614b9e3b5615b0f40d39da51 --- /dev/null +++ b/man/rmse.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rmse.R +\name{rmse} +\alias{rmse} +\title{Computes the RMSE score.} +\usage{ +rmse(x) +} +\arguments{ +\item{x}{a numerical vector of residuals.} +} +\value{ +The RMSE score. +} +\description{ +Returns the RMSE. +} +\details{ +Used for forecast evaluation evaluation and optimization of parameters in model fitting. + +Note that \code{NA}s are ignored (i.e. \code{mean} is called with \code{na.rm=TRUE}). +} +\examples{ + + + # Just a vector to be forecasted + y <- c(filter(rnorm(100), 0.95, "recursive")) + # Generate a forecast matrix with a simple persistence model + Yhat <- persistence(y, kseq=1:4) + # The residuals for each horizon + Resid <- residuals(Yhat, y) + +# Calculate the score for the k1 horizon +rmse(Resid$h1) + +# For all horizons +apply(Resid, 2, rmse) + + +} +\seealso{ +\code{\link{score}()} for calculation of a score for the k'th horizon +} diff --git a/man/score.Rd b/man/score.Rd new file mode 100644 index 0000000000000000000000000000000000000000..bfd2d7b2edb1daeeb3de3318935352728a180aaf --- /dev/null +++ b/man/score.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/score.R +\name{score} +\alias{score} +\alias{score.list} +\alias{score.data.frame} +\title{Calculate the score for each horizon.} +\usage{ +score(object, scoreperiod = NA, usecomplete = TRUE, scorefun = rmse, ...) + +\method{score}{list}(object, scoreperiod = NA, usecomplete = TRUE, scorefun = rmse, ...) + +\method{score}{data.frame}(object, scoreperiod = NA, usecomplete = TRUE, scorefun = rmse, ...) +} +\arguments{ +\item{object}{??list or A matrix with residuals (columns named \code{hxx}) for which to calculate the score for each horizon.} + +\item{scoreperiod}{as a logical vector controlling which points to be included in the score calculation. If NA then all values are included (depeding on 'complete').} + +\item{usecomplete}{TRUE then only the values available for all horizons are included (i.e. if at one time point there is a missing value, then values for this time point is removed for all horizons in the calculation).} + +\item{scorefun}{The score function.} + +\item{...}{is passed on to the score function.} +} +\value{ +A list with the a numeric vector with the score value for each horizon and the applied \code{scoreperiod} (note can be different from the given scoreperiod, if only complete observations are used (as per default)). +} +\description{ +Calculates the score for each horizon for a matrix with residuals for each horizon. +} +\details{ +Applies the \code{scorefun} on all horizons (each column) of the residuals matrix. See the description of each parameter for more details. +} +\examples{ + +# Just a vector to be forecasted +y <- c(filter(rnorm(100), 0.95, "recursive")) +# Generate a forecast matrix with a simple persistence model +Yhat <- persistence(y, kseq=1:4) +# The residuals for each horizon +Resid <- residuals(Yhat, y) + +# Calculate the score for the k1 horizon +score(Resid) + +# In the beginning the horizons have NAs +head(Resid) +# Default is that only complete cases over all horizons are included +score(Resid) +# So including all cases for each horizon will give different values +score(Resid, usecomplete=FALSE) + +# Given a list +# The residuals for each horizon +Resid2 <- residuals(persistence(y,kseq=1:4)+rnorm(100), y) + +score(list(Resid,Resid2)) +} diff --git a/man/setpar.Rd b/man/setpar.Rd new file mode 100644 index 0000000000000000000000000000000000000000..bdbbca275d637b2dba12c9aaa1a26affa4976d0d --- /dev/null +++ b/man/setpar.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/setpar.R +\name{setpar} +\alias{setpar} +\title{Setting \code{\link{par}()} plotting parameters} +\usage{ +setpar(tmpl = "ts", mfrow = c(1, 1), ...) +} +\arguments{ +\item{tmpl}{The name of the parameter template, give "ts" as default} + +\item{mfrow}{The mfrow for \code{par}.} + +\item{...}{More parameters for \code{par}.} +} +\value{ +Return the original set of parameters, such that they can be reset after plotting. +} +\description{ +Setting \code{\link{par}()} plotting parameters to a set of default values +} +\details{ +A simple function, which sets the \code{\link{par}()} plotting parameters to a default set of values. + +Actually, only really used for setting useful \code{par} values for multiple time series plots with same x-axis. +Give \code{tmpl="ts"} and \code{mfrow=c(x,1)}, where x is the number of plots. +} +\examples{ + +# Make some data +D <- data.frame(t=seq(ct("2020-01-01"),ct("2020-01-10"),len=100), x=rnorm(100), y=runif(100)) +# Remember the currect par values +oldpar <- setpar() + +# Generate two stacked plots with same x-axis +setpar("ts", mfrow=c(2,1)) +plot(D$t, D$x, type="l") +plot(D$t, D$y, type="l") +# Note xaxt="s" must be set +axis.POSIXct(1, D$t, xaxt="s", format="\%Y-\%m-\%d") + +# Set back the par to the previous +par(oldpar) + +# In a function, where this is used and a plot is generated, +# then do like this in order to automatically reset on exit +oldpar <- setpar(mfrow=c(2,1)) +on.exit(par(oldpar)) + +} diff --git a/man/stairs.Rd b/man/stairs.Rd new file mode 100644 index 0000000000000000000000000000000000000000..0fa38d4566c19858beb53bcef8e0e3dc5f9f4382 --- /dev/null +++ b/man/stairs.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stairs.R +\name{stairs} +\alias{stairs} +\title{Plotting stairs with time point at end of interval.} +\usage{ +stairs(x, y, type = "b", preline = FALSE, pch = 19, ...) +} +\arguments{ +\item{x}{x values for plot.} + +\item{y}{y values for plot.} + +\item{type}{if 'b' then include points.} + +\item{preline}{if TRUE, then a line backwards from the first point is added.} + +\item{pch}{Passed to \code{points()}.} + +\item{...}{Passed to \code{lines()} and \code{points()} when they are called in the function.} +} +\description{ +Plotting steps with time point at end of interval +} +\details{ +It's easy to plot stairs with \code{plot(x,y,type="s")}, however that makes the steps forward from \code{x}, for time series this works if the time points are at the beginning of the intervals. + +Often with time series the time points are in the end of the intervals, so the steps should go backaward, this is achieved with this function. +} +\examples{ + +# Usual stairs plot has steps forward from x +x <- rnorm(10) +plot(1:10, x, type="s") + +# Stairs with step backward from x +plot(1:10, x, type="n") +stairs(1:10, x) + +# Use for time series plotting +plot_ts(Dbuilding, "heatload", c("2010-12-15","2010-12-16"), plotfun=stairs) + +# Set it globally for all plot_ts +p <- par_ts() +p$plotfun <- stairs +options(par_ts=p) +plot_ts(Dbuilding, "heatload", c("2010-12-15","2010-12-16")) + +# Modify it to only lines +plot_ts(Dbuilding, "heatload", c("2010-12-15","2010-12-16"), + plotfun=function(x,y,...){stairs(x,y, type="l")}) + +} diff --git a/man/state_getval.Rd b/man/state_getval.Rd new file mode 100644 index 0000000000000000000000000000000000000000..cccdc87c7c2ccefa5604a6f9c63d31907a51ccc5 --- /dev/null +++ b/man/state_getval.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/state_getval.R +\name{state_getval} +\alias{state_getval} +\title{Get the state value kept in last call.} +\usage{ +state_getval(initval) +} +\arguments{ +\item{initval}{If no state was kept, then this init value is returned.} +} +\value{ +The state value, but if not found, then the initval. +} +\description{ +Get the state value kept in last call to the transformation function. +} +\details{ +Transformation functions (e.g. \code{\link{lp}}, \code{\link{fs}}, \code{\link{bspline}}) can need to keep a state value between calls, e.g. when new data arrives and must be transformed. This function is used to getting the state values set in last call to the function. + +Uses the \code{input_class$state_getval()}. +} +\examples{ + +# See how it can be used in lp, which needs to save the state of the filter +# Note how it is not needed to do anything else than getting and setting the state +# in transformations (model$transform_data()), then multiple transformation functions can be called, +# but they are always in the same order, so the state (set,get) functions keep a counter internally +# to make sure that the correct values are set and returned when called again. +lp + + +} +\seealso{ +\code{\link{state_setval}()} for setting the state value and \code{\link{input_class}}. +} diff --git a/man/state_setval.Rd b/man/state_setval.Rd new file mode 100644 index 0000000000000000000000000000000000000000..af1c4bd12499e7b8a41118ee7a780d31854fb7b5 --- /dev/null +++ b/man/state_setval.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/state_setval.R +\name{state_setval} +\alias{state_setval} +\title{Set a state value to be kept for next the transformation function is called.} +\usage{ +state_setval(val) +} +\arguments{ +\item{val}{The value to set and kept for next call.} +} +\description{ +Set a state value to be kept for next the transformation function is called. +} +\details{ +Transformation functions (e.g. \code{\link{lp}}, \code{\link{fs}}, \code{\link{bspline}}) can need to keep a state value between calls, e.g. when new data arrives and must be transformed. This function is used to setting the state values set in last call to the function. + +Uses the \code{input_class$state_getval()}. +} +\examples{ + +# See how it can be used in lp, which needs to save the state of the filter +# Note how it is not needed to do anything else than getting and setting the state +# in transformations (model$transform_data()), then multiple transformation functions can be called, +# but they are always in the same order, so the state (set,get) functions keep a counter internally +# to make sure that the correct values are set and returned when called again. +lp + + +} +\seealso{ +\code{\link{state_setval}()} for setting the state value and \code{\link{input_class}}. +} diff --git a/man/step_optim.Rd b/man/step_optim.Rd new file mode 100644 index 0000000000000000000000000000000000000000..febd2e67647784615b314ae843f774a074eb5049 --- /dev/null +++ b/man/step_optim.Rd @@ -0,0 +1,211 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/step_optim.R +\name{step_optim} +\alias{step_optim} +\title{Forward and backward model selection} +\usage{ +step_optim( + modelfull, + data, + prm = list(NA), + direction = c("both", "backward", "forward", "backwardboth", "forwardboth"), + modelstart = NA, + keepinputs = FALSE, + optimfun = rls_optim, + fitfun = NA, + scorefun = rmse, + printout = FALSE, + mc.cores = getOption("mc.cores", 2L), + ... +) +} +\arguments{ +\item{modelfull}{The full forecastmodel containing all inputs which will be +can be included in the selection.} + +\item{data}{The data.list which holds the data on which the model is fitted.} + +\item{prm}{A list of integer parameters to be stepped. Given using the same +syntax as parameters for optimization, e.g. `list(I__degree = c(min=3, +max=7))` will step the "degree" for input "I".} + +\item{direction}{The direction to be used in the selection process.} + +\item{modelstart}{A forecastmodel. If it's set then it will be used as the +selected model from the first step of the stepping. It should be a sub +model of the full model.} + +\item{keepinputs}{If TRUE no inputs can be removed in a step, if FALSE then +any input can be removed. If given as a character vector with names of +inputs, then they cannot be removed in any step.} + +\item{optimfun}{The function which will carry out the optimization in each +step.} + +\item{fitfun}{A fit function, should be the same as used in optimfun(). If +provided, then the score is caculated with this function (instead of the +one called in optimfun(), hence the default is rls_fit(), which is called +in rls_optim()). Furthermore, information on complete cases are printed +and returned.} + +\item{scorefun}{The score function used.} + +\item{printout}{Logical. Passed on to fitting functions.} + +\item{mc.cores}{The mc.cores argument of mclapply. If debugging it can be +necessary to set it to 1 to stop execution.} + +\item{...}{Additional arguments which will be passed on to optimfun. For +example control how many steps} +} +\value{ +A list with the result of each step: + - '$model' is the model selected in each step + - '$score' is the score for the model selected in each step + - '$optimresult' the result return by the the optimfun + - '$completecases' a logical vector (NA if fitfun argument is not given) indicating which time points were complete across all horizons and models for the particular step. +} +\description{ +Forward and backward model selection +} +\details{ +This function takes a model and carry out a model selection by stepping +backward, forward or in both directions. + +Note that mclapply is used. In order to control the number of cores to use, +then set it, e.g. to one core `options(mc.cores=1)`, which is needed for +debugging to work. + +The full model containing all inputs must be given. In each step new models +are generated, with either one removed input or one added input, and then all +the generated models are optimized and their scores compared. If any new +model have an improved score compared to the currently selected model, then +the new is selected and the process is repeated until no new improvement is +obtained. + +In addition to selecting inputs, then integer parameters can also be stepped +through, e.g. the order of basis splined or the number of harmonics in +Fourier series. + +The stepping process is different depending on the direction. In addition to +the full model, a starting model can be given, then the selection process +will start from that model. + +If the direction is "both", which is default (same as "backwardboth") then the +stepping is: + - In first step inputs are removed one-by-one + - In following steps, inputs still in the model are removed one-by-one, and + inputs not in the model are added one-by-one + +If the direction is "backwards": + - Inputs are only removed in each step + +If the direction is "forwardboth": + - In the first step all inputs are removed + - In following steps (same as "both") + +If the direction is "forward": +- In the first step all inputs are removed and from there inputs are only added + + +For stepping through integer variables in the transformation stage, then +these have to be set in the "prm" argument. The stepping process will follow +the input selection described above. + +In case of missing values, especially in combination with auto-regressive +models, it can be very important to make sure that only complete cases are +included when calculating the score. By providing the `fitfun` argument then +the score will be calculated using only the complete cases across horizons +and models in each step, see the last examples. + +Note, that either kseq or kseqopt must be set on the modelfull object. If kseqopt +is set, then it is used no matter the value of kseq. +} +\examples{ + + +# The data, just a rather short period to keep running times short +D <- subset(Dbuilding, c("2010-12-15", "2011-02-01")) +# Set the score period +D$scoreperiod <- in_range("2010-12-22", D$t) +# +D$tday <- make_tday(D$t, 1:36) +# Generate an input which is just random noise, i.e. should be removed in the selection +set.seed(83792) +D$noise <- make_input(rnorm(length(D$t)), 1:36) + +# The full model +model <- forecastmodel$new() +# Set the model output +model$output = "heatload" +# Inputs (transformation step) +model$add_inputs(Ta = "Ta", + noise = "noise", + mu_tday = "fs(tday/24, nharmonics=5)", + mu = "one()") +# Regression step parameters +model$add_regprm("rls_prm(lambda=0.9)") +# Optimization bounds for parameters +model$add_prmbounds(lambda = c(0.9, 0.99, 0.9999)) + +# Select a model, in the optimization just run it for a single horizon +# Note that kseqopt could also be set +model$kseq <- 5 + +# Set the parameters to step on, note the +prm <- list(mu_tday__nharmonics = c(min=3, max=7)) + +# Note the control argument, which is passed to optim, it's now set to few +# iterations in the offline parameter optimization (MUST be increased in real applications) +control <- list(maxit=1) + +# On Windows multi cores are not supported, so for the examples use only one core +mc.cores <- 1 + +# Run the default selection scheme, which is "both" +# (same as "backwardboth" if no start model is given) +\donttest{L <- step_optim(model, D, prm, control=control, mc.cores=mc.cores) + +# The optim value from each step is returned +getse(L, "optimresult") +getse(L,"score") + +# The final model +L$final$model + +# Other selection schemes +Lforward <- step_optim(model, D, prm, "forward", control=control, mc.cores=mc.cores) +Lbackward <- step_optim(model, D, prm, "backward", control=control, mc.cores=mc.cores) +Lbackwardboth <- step_optim(model, D, prm, "backwardboth", control=control, mc.cores=mc.cores) +Lforwardboth <- step_optim(model, D, prm, "forwardboth", control=control, mc.cores=mc.cores) + +# It's possible avoid removing specified inputs +L <- step_optim(model, D, prm, keepinputs=c("mu","mu_tday"), control=control, mc.cores=mc.cores) + +# Give a starting model +modelstart <- model$clone_deep() +modelstart$inputs[2:3] <- NULL +L <- step_optim(model, D, prm, modelstart=modelstart, control=control, mc.cores=mc.cores) + +# If a fitting function is given, then it will be used for calculating the forecasts. +# Below it's the rls_fit function, so the same as used internally in rls_fit, so only +# difference is that now ONLY on the complete cases for all models in each step are used +# when calculating the score in each step +L1 <- step_optim(model, D, prm, fitfun=rls_fit, control=control, mc.cores=mc.cores) + +# The easiest way to conclude if missing values have an influence is to +# compare the selection result running with and without +L2 <- step_optim(model, D, prm, control=control, mc.cores=mc.cores) + +# Compare the selected models +tmp1 <- capture.output(getse(L1, "model")) +tmp2 <- capture.output(getse(L2, "model")) +identical(tmp1, tmp2)} + + +# Note that caching can be really smart (the cache files are located in the +# cachedir folder (folder in current working directory, can be removed with +# unlink(foldername)) See e.g. `?rls_optim` for how the caching works +# L <- step_optim(model, D, prm, "forward", cachedir="cache", cachererun=FALSE, mc.cores=mc.cores) + +} diff --git a/man/subset.data.list.Rd b/man/subset.data.list.Rd new file mode 100644 index 0000000000000000000000000000000000000000..9d7be6629a20a10ca7ffad68641ee7ac41fa0d6a --- /dev/null +++ b/man/subset.data.list.Rd @@ -0,0 +1,79 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.list.R +\name{subset.data.list} +\alias{subset.data.list} +\title{Take a subset of a data.list.} +\usage{ +\method{subset}{data.list}( + x, + subset = NA, + nms = NA, + kseq = NA, + lagforecasts = FALSE, + pattern = NA, + ... +) +} +\arguments{ +\item{x}{The data.list to take a subset of.} + +\item{subset}{Given as the integer indexes or a logical vector, or alternatively \code{c(tstart,tend)}, where tstart and tend are either as POSIX or characters.} + +\item{nms}{The names of the variables in \code{x} to be included.} + +\item{kseq}{The k horizons of forecasts to be included.} + +\item{lagforecasts}{Should the forecasts be lagged k steps (thus useful for plotting etc.).} + +\item{pattern}{Regex pattern applied to select the variables in x to be included.} + +\item{...}{Not implemented.} +} +\value{ +a data.list with the subset. +} +\description{ +Take a subset of a data.list. +} +\details{ +Different arguments can be given to select the subset. See the examples. +} +\examples{ +# Use the data.list with building heat load +D <- Dbuilding +# Take a subset for the example +D <- subset(D, 1:10, nms=c("t","Taobs","Ta","Iobs","I"), kseq=1:3) + +# Take subset index 2:4 +subset(D, 2:4) + +# Take subset for a period +subset(D, c("2010-12-15 02:00","2010-12-15 04:00")) + +# Cannot request a variable not there +try(subset(D, nms=c("x","Ta"))) + +# Take specific horizons +subset(D, nms=c("I","Ta"), kseq = 1:2) +subset(D, nms=c("I","Ta"), kseq = 1) + +# Lag the forecasts such that they are aligned in time with observations +subset(D, nms=c("Taobs","Ta"), kseq = 2:3, lagforecasts = TRUE) + +# The order follows the order in nms +subset(D, nms=c("Ta","I"), kseq = 2) + +# Return variables mathing a regex +subset(D, kseq=2, pattern="^I") + +# Take data for Ta and lag the forecasts (good for plotting and fitting a model) +X <- subset(Dbuilding, 1:1000, pattern="^Ta", kseq = 10, lagforecasts = TRUE) + +# A scatter plot between the forecast and the observations +# (try lagforecasts = FALSE and see the difference) +plot(X$Ta$k10, X$Taobs) + +# Fit a model for the 10-step horizon +abline(lm(Taobs ~ Ta.k10, X), col=2) + +} diff --git a/misc-R/step_optim-test.R b/misc-R/step_optim-test.R new file mode 100644 index 0000000000000000000000000000000000000000..419f02678e188accd4766bdaf91c73a8aa1bba53 --- /dev/null +++ b/misc-R/step_optim-test.R @@ -0,0 +1,72 @@ + +# Load the current package +library("devtools") +pack <- as.package("../../onlineforecast") +load_all(pack) + + +# Set the data in D to simplify notation +D <- Dbuilding +# Set the score period +D$scoreperiod <- in_range("2010-12-22", D$t) +# +D$tday <- make_tday(D$t, 2) +# Generate noise input +set.seed(83792) +D$noise <- make_input(rnorm(length(D$t)), 2) + +# Generate new object (R6 class) +model <- forecastmodel$new() +# Set the model output +model$output = "heatload" +# Inputs (transformation step) +model$add_inputs(Ta = "Ta", + I = "bspline(tday, Boundary.knots = c(6,18), degree = 5, intercept=TRUE) %**% I", + noise = "noise", + mu_tday = "fs(tday/24, nharmonics=5)", + mu = "one()") +# Regression step parameters +model$add_regprm("rls_prm(lambda=0.9)") +# Optimization bounds for parameters +model$add_prmbounds(lambda = c(0.9, 0.99, 0.9999)) + + +# Select a model +kseq <- 2 +prm <- list(I__degree = c(min=3, max=7), mu_tday__nharmonics = c(min=3, max=7)) + +L <- step_optim(model, D, kseq, prm, "forward") +L <- step_optim(model, D, kseq, prm, "backward") +L <- step_optim(model, D, kseq, prm, "backwardboth") +L <- step_optim(model, D, kseq, prm, "forwardboth") + + +# Caching (the cache files are located in the cachedir folder (folder in current working directory, can be removed with unlink(foldername)) +L <- step_optim(model, D, kseq, prm, "forward", cachedir="cache", cachererun=FALSE) +L <- step_optim(model, D, kseq, prm, "backward", cachedir="cache", cachererun=FALSE) +L <- step_optim(model, D, kseq, prm, "backwardboth", cachedir="cache", cachererun=FALSE) +L <- step_optim(model, D, kseq, prm, "forwardboth", cachedir="cache", cachererun=FALSE) + + + +# Start from a given model +modelstart <- model$clone_deep() +modelstart$inputs <- modelstart$inputs[-2:-4] +L <- step_optim(model, D, kseq, prm, "both", modelstart=modelstart) +L <- step_optim(model, D, kseq, prm, "forward", modelstart=modelstart) + +# The the result in detail (L holds the selected model and it's optim result in each step) +L[[1]] +L[[2]] + +# The models +getse(L, "model") +# The optim results +getse(L, "result") +# +unlist(getse(getse(L, "model"), "prm")) +sum(unlist(getse(getse(L, "result"), "counts"))) + + + + diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000000000000000000000000000000000000..1f95ad427fd367d4fcb346d8e0afdbe07dfbf8c5 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(onlineforecast) + +test_check("onlineforecast") diff --git a/tests/testthat/test-newtest.R b/tests/testthat/test-newtest.R new file mode 100644 index 0000000000000000000000000000000000000000..8849056e25add60923383f43de7eaca9211ca51f --- /dev/null +++ b/tests/testthat/test-newtest.R @@ -0,0 +1,3 @@ +test_that("multiplication works", { + expect_equal(2 * 2, 4) +}) diff --git a/tests/testthat/test-rls-heat-load.R b/tests/testthat/test-rls-heat-load.R index 054105f1781badfe05536b104d2f6400d943d5e4..426fd9d96ed5e7b79ffac90171f3b93fd50d2602 100644 --- a/tests/testthat/test-rls-heat-load.R +++ b/tests/testthat/test-rls-heat-load.R @@ -1,5 +1,3 @@ -context("running RLS test") - ## Load current package (must be outcommented) ## library("devtools") ## load_all(as.package("../../../onlineforecast")) diff --git a/vignettes/forecast-evaluation.Rmd b/vignettes/forecast-evaluation.Rmd index f40ad815dc04a0efea2c4192d147c8fde69717fa..1b44e287aec1d5c3985f523cd6c2c1b0249d9d62 100644 --- a/vignettes/forecast-evaluation.Rmd +++ b/vignettes/forecast-evaluation.Rmd @@ -2,7 +2,10 @@ title: "Forecast evaluation" author: "Peder Bacher" date: "`r Sys.Date()`" -output: rmarkdown::html_vignette +output: + rmarkdown::html_vignette: + toc: true + toc_debth: 3 vignette: > %\VignetteIndexEntry{Forecast evaluation} %\VignetteEngine{knitr::rmarkdown} @@ -16,6 +19,7 @@ library(knitr) vignettename <- "forecast-evaluation" # REMEMBER: IF CHANGING IN THE shared-init (next block), then copy to the others! ``` + <!--shared-init-start--> ```{r init, cache=FALSE, include=FALSE, purl=FALSE} # Width will scale all @@ -28,6 +32,7 @@ figheight2 <- 6.5 figheight3 <- 8 figheight4 <- 9.5 figheight5 <- 11 + # Set the size of squared figures (same height as full: figheight/figwidth) owsval <- 0.35 ows <- paste0(owsval*100,"%") @@ -42,7 +47,7 @@ fhs <- figwidth * owsval # Set the knitr options knitr::opts_chunk$set( collapse = TRUE, - comment = "## ", + comment = "##!! ", prompt = FALSE, cache = TRUE, cache.path = paste0("../tmp/vignettes/tmp-",vignettename,"/"), @@ -54,25 +59,32 @@ knitr::opts_chunk$set( ) options(digits=3) -hook_output <- knit_hooks$get("output") -knit_hooks$set(output = function(x, options) { + +# For cropping output and messages +cropfun <- function(x, options, func){ lines <- options$output.lines - if (is.null(lines)) { - return(hook_output(x, options)) # pass to default hook - } - x <- unlist(strsplit(x, "\n")) - more <- "## ...output cropped" - if (length(lines)==1) { # first n lines - if (length(x) > lines) { - # truncate the output, but add .... - x <- c(head(x, lines), more) - } - } else { - x <- c(more, x[lines], more) + ## if (is.null(lines)) { + ## return(func(x, options)) # pass to default hook + ## } + if(!is.null(lines)){ + x <- unlist(strsplit(x, "\n")) + i <- grep("##!!",x) + if(length(i) > lines){ + # truncate the output, but add .... + x <- x[-i[(lines+1):length(i)]] + x[i[lines]] <- pst(x[i[lines]], "\n\n## ...output cropped") + } + # paste these lines together + x <- paste(c(x, ""), collapse = "\n") } - # paste these lines together - x <- paste(c(x, ""), collapse = "\n") - hook_output(x, options) + x <- gsub("!!","",x) + func(x, options) +} + +hook_chunk <- knit_hooks$get("chunk") + +knit_hooks$set(chunk = function(x, options) { + cropfun(x, options, hook_chunk) }) ``` @@ -107,7 +119,7 @@ Just start by: D <- Dbuilding # Keep the model output in y (just easier code later) D$y <- D$heatload -# +# Generate time of day in a forecast matrix D$tday <- make_tday(D$t, 0:36) ``` @@ -168,21 +180,21 @@ model$add_inputs(Ta = "lp(Ta, a1=0.9)", I = "lp(I, a1=0.9)", mu = "one()") model$add_prmbounds(Ta__a1 = c(0.8, 0.9, 0.99), - I__a1 = c(0.6, 0.9, 0.99), - lambda = c(0.9, 0.99, 0.9999)) + I__a1 = c(0.6, 0.9, 0.99), + lambda = c(0.9, 0.99, 0.9999)) model$add_regprm("rls_prm(lambda=0.9)") -model$kseq <- c(3,18) -# Optimize the parameters -model$prm <- rls_optim(model, D)$par +# Optimize the parameters, only on two horizons +kseqopt <- c(3,18) +rls_optim(model, D, kseqopt) ``` Fit for all horizons and see the fit summary: ```{r} -# Fit for all horizons +# Forecast for all horizons model$kseq <- 1:36 # Fit with RLS fit1 <- rls_fit(model$prm, model, D) -# Check the fit +# See the summary of the fit summary(fit1) ``` @@ -191,31 +203,26 @@ Fourier series. It can simply be added to the current model object: ```{r, output.lines=10} # Add a diurnal curve using fourier series model$add_inputs(mu_tday = "fs(tday/24, nharmonics=4)") -model$kseq <- c(3,18) # Optimize the parameters -model$prm <- rls_optim(model, D)$par +rls_optim(model, D, kseq=kseqopt) ``` Fit for all horizons and see the fit summary: ```{r} -# Fit for all horizons -model$kseq <- 1:36 # Fit with RLS fit2 <- rls_fit(model$prm, model, D) # Check the fit summary(fit2) ``` - - Keep the forecasts for plotting and later analysis: ```{r} -# Keep the forecasts from each model +# Keep the forecasts from each model by just inserting them in the data.list D$Yhat1 <- fit1$Yhat D$Yhat2 <- fit2$Yhat ``` -Plot the full score period: +Plot the forecasts for the full score period: ```{r, fig.height=figheight2} # Plot to see the forecasts for the shortest and the longest horizon plot_ts(subset(D,D$scoreperiod), c("^y|^Yhat1","^y|^Yhat2"), kseq = c(1,36)) @@ -251,10 +258,8 @@ the latest value lagged a given period from the forecast time point. First the simple persistence: ```{r} -# Just keep the horizons -kseq <- 1:36 -# The simple persistence -D$YhatP <- persistence(D$y, kseq) +# The simple persistence (forecast for same horizons as the model) +D$YhatP <- persistence(D$y, model$kseq) # Plot a few horizons plot_ts(D, c("^y$|YhatP$"), c("2011-01-05","2011-01-10"), kseq=c(1,24,36)) ``` @@ -269,7 +274,7 @@ time point, at the same time of day, as the forecast time point (i.e. `tod(t+k)`). It can be obtained by: ```{r} # Use the argument perlen to set the period length -D$YhatDP <- persistence(D$y, kseq, perlen=24) +D$YhatDP <- persistence(D$y, model$kseq, perlen=24) # Plot a few horizons plot_ts(D, c("^y$|YhatDP$"), c("2011-01-05","2011-01-10"), kseq=c(1,24,36)) ``` @@ -282,13 +287,14 @@ lag values (going >48 the forecasts will be 72 lagged and so fourth). Now it's just a matter of calculating the score, as a function of the horizon, for each model and compare them. -We have kept the forecasts in the same format for each model, we can find them by: +We have kept the forecasts in forecast matrices for each model, we can find them by: ```{r} # Find the forecasts in D nms <- grep("^Yhat", names(D), value=TRUE) nms ``` -So it's the small model, large model, and simple and diurnal persistence, respectively. +So it's the small and the large model, and as reference the simple and the +diurnal persistence model. One quite important point: When comparing forecasts from different models exactly the same forecast points must be included. When NAs are present, not all @@ -299,42 +305,60 @@ So to make sure that exactly the same points are included in the score calculation, we must only forecast points where all forecasts are available (i.e. non-NA): ```{r} -# The non-NA for the first forecast -ok <- !is.na(D[[nms[1]]]) -# Go through the remaining: all must be non-NA for a point -for(nm in nms[-1]){ - ok <- ok & !is.na(D[[nm]]) -} -ok <- as.data.frame(ok) -names(ok) <- pst("k",kseq) -# Lag to match resiuduals in time -ok <- lagdf(ok, "+k") -# Only the score period -ok <- ok & D$scoreperiod -# Finally, the vector with TRUE for all points with no NAs for any forecast -ok <- apply(ok, 1, all) +# Find all complete cases for all forecasts and horizons +ok <- complete_cases(D[nms]) ``` -How many points are left? +Check if there are NAs in the forecasts: ```{r} sum(ok) length(ok) ``` -Now the residuals can be calculated and the score: +The forecasts will always have NAs from the start, e.g.: ```{r} -# Use the residuals function +D$Yhat1[1:11, 1:10] +``` +but in this particular case other periods with NAs exists: +```{r} +D$y[59:72] +D$YhatP[59:72, 1] +``` +They have been excluded as non complete cases. + +Actually we only want to include the scoreperiod in the evaluation: +```{r} +ok <- ok & D$scoreperiod +``` +and there all models had complete forecasts: +```{r} +sum(ok) +sum(D$scoreperiod) +``` + +We can now use the `score()` function for calculating the score with only the +complete cases found above: +```{r} +# The score as a function of the horizon R <- residuals(D$Yhat1, D$y) -# And the score as a function of the horizon -score(R, scoreperiod=ok)$scoreval +score(R, ok & D$scoreperiod) +``` +Actually, the default way is to only use complete cases, hence: +```{r} +# Only complete cases are used per default +score(R, D$scoreperiod) == score(R, ok & D$scoreperiod) +``` +Whether to use complete cases only can be controlled by: +```{r} +# The score as a function of the horizon +score(R, usecomplete=FALSE) == score(R) ``` -Calculated the score (default is RMSE) for all models: +These steps can be made for all models by, where also only complete cases for +all models are used per default: ```{r} -RMSE <- sapply(nms, function(nm){ - score(residuals(D[[nm]],D$y), ok)$scoreval -}) +RMSE <- score(residuals(D[nms], D$y), D$scoreperiod) ``` ```{r, include=FALSE} @@ -346,47 +370,41 @@ RMSE <- sapply(nms, function(nm){ Plot the RMSE as a function of the horizon: ```{r, fig.height=figheight2} -RMSE <- as.data.frame(RMSE) -names(RMSE) <- nms - -plot(0, type="n", xlim=range(kseq), ylim=range(RMSE), xlab="Horizon k", ylab="RMSE (kW)") -for(i in 1:length(RMSE)){ - points(kseq, RMSE[ ,i], type="b", col=i) +plot(0, type="n", xlim=range(model$kseq), ylim=range(RMSE), xlab="Horizon k", ylab="RMSE (kW)") +for(i in 1:ncol(RMSE)){ + points(model$kseq, RMSE[ ,i], type="b", col=i) } +legend("topleft", nms, lty=1, col=1:length(nms)) ``` - ### Training set and test set -As explained, it is most times not necessary to divide in a train and a test -set, when fitting recursively, however it can be useful sometimes. +As explained, it is most times not necessary to divide in a train and a test set +when fitting recursively (i.e. using RLS), however it can sometimes be useful. An easy approach is to set a logical vector, which is TRUE until the end of the -training period: +training period (with a week of burn-in): ```{r plottrain} -D$trainperiod <- in_range(D$t[1]-1, D$t, "2011-02-01") +D$trainperiod <- in_range(D$t[7*24]-1, D$t, "2011-02-01") plot(D$t, D$trainperiod) ``` then optimize the parameters only on this period, by taking a subset: ```{r, output.lines=10} -model$kseq <- c(3,18) # Optimize the parameters -model$prm <- rls_optim(model, subset(D,D$trainperiod))$par +rls_optim(model, subset(D,D$trainperiod), kseqopt) ``` and then fit on the entire set: ```{r} -# Fit for all horizons -model$kseq <- 1:36 # Fit with RLS -fittmp <- rls_fit(model$prm, model, D) +fit <- rls_fit(model$prm, model, D) ``` Finally, the score can be calculated on the period following the train period by: ```{r scorefit} -score_fit(fittmp, !D$trainperiod)$scoreval +score(fit$Yhat, !D$trainperiod) ``` In this way it's rather easy to set up different schemes, like optimizing the @@ -415,7 +433,28 @@ Plot for the larger model (plots not included here): plot_ts(fit2, kseq=kseq) ``` -A shorter period (plots not included here): +A pairs plot with residuals and inputs to see if patterns are left: +```{r plotpairs, fig.height=figwidth} +kseq <- c(1,36) +D$Residuals <- residuals(fit2)[ ,pst("h",kseq)] +D$hour <- aslt(D$t)$hour +pairs(D, subset=D$scoreperiod, pattern="Residuals|Ta|I|hour|^t$", kseq=kseq) +``` +So inspecting the two upper rows, there are no clear patterns to be seen for the +mean (red lines are not deviating from zero). Some differences in the marginal +distribution of the residuals are seen, e.g. for the hour the variance is +clearly highest in the mornings. + +Examine how well the dynamics are modelled with the auto-correlation and cross-correlations: +```{r plotacf, fig.height=figwidth, echo=-1} +par(mfrow=c(2,2)) +acf(D$Residuals$h1, na.action=na.pass) +ccf(lagvec(D$Ta$k1,1), D$Residuals$h1, na.action=na.pass) +ccf(lagvec(D$I$k1,1), D$Residuals$h1, na.action=na.pass) +ccf(lagvec(D$I$k1,1), D$Residuals$h1, na.action=na.pass) +``` + +A shorter period of plots of the fits (plots not included here): ```{r, fig.height=figheight5, fig.keep="none"} xlim <- c("2011-01-01","2011-01-14") plot_ts(fit1, xlim=xlim, kseq=kseq) @@ -443,17 +482,7 @@ The RLS coefficients are in the fit for each horizon: str(fit1$Lfitval$k1) ``` -A pairs plot with residuals and inputs to see if patterns are left: -```{r plotpairs, fig.height=figwidth} -kseq <- c(1,36) -D$Residuals <- residuals(fit2)[ ,pst("h",kseq)] -D$hour <- aslt(D$t)$hour -pairs(D, subset=D$scoreperiod, pattern="Residuals|Ta|I|hour|^t$", kseq=kseq) -``` -So inspecting the two upper rows, there are no clear patterns to be seen for the -mean (red lines are not deviating from zero). Some differences in the marginal -distribution of the residuals are seen, e.g. for the hour the variance is -clearly highest in the mornings. + Histograms and box-plots to find patterns. From the histogram and qq-norm plot: ```{r} diff --git a/vignettes/make.R b/vignettes/make.R index 89532033717af48b131aa7e02106dcc506c2ce51..e7a457777369b4d6a8ccfc5139b5724a32622313 100644 --- a/vignettes/make.R +++ b/vignettes/make.R @@ -1,4 +1,3 @@ -# REMEMBER TO CHANGE IN shared-init in all library(knitr) library(rmarkdown) @@ -14,21 +13,25 @@ makeit <- function(nam, openit=FALSE, clean=TRUE){ render(namrmd, output_file=paste0(dirnam,nam), clean=clean) purl(namrmd) system(paste0("mv ",nam,".R ",dirnam,nam,".R")) - if(openit){ system(paste0("chromium-browser ",dirnam,nam,".html &")) } + if(openit){ system(paste0("chromium-freeworld ",dirnam,nam,".html &")) } } # unlink(paste0(dirnam,"tmp-setup-data/"), recursive=TRUE) -makeit("setup-data", openit=TRUE) +makeit("setup-data", openit=FALSE) # unlink(paste0(dirnam,"tmp-setup-and-use-model/"), recursive=TRUE) makeit("setup-and-use-model", openit=FALSE, clean=TRUE) # -unlink(paste0(dirnam,"tmp-output/tmp-forecast-evaluation/"), recursive=TRUE) +unlink(paste0(dirnam,"tmp-forecast-evaluation/"), recursive=TRUE) makeit("forecast-evaluation", openit=FALSE) -# Finish and include it!! -## unlink(paste0(dirnam,"tmp-output/tmp-online-updating/"), recursive=TRUE) -## makeit("online-updating", openit=FALSE) +# +unlink(paste0(dirnam,"tmp-model-selection/"), recursive=TRUE) +makeit("model-selection", openit=FALSE) + +# +unlink(paste0(dirnam,"tmp-output/tmp-online-updating/"), recursive=TRUE) +makeit("online-updating", openit=FALSE) diff --git a/vignettes/model-selection.Rmd b/vignettes/model-selection.Rmd new file mode 100644 index 0000000000000000000000000000000000000000..860d2fc8cefb822ccc4cbf4cd412e22acc03d034 --- /dev/null +++ b/vignettes/model-selection.Rmd @@ -0,0 +1,230 @@ +--- +title: "Model selection" +author: "Peder Bacher" +date: "`r Sys.Date()`" +output: + rmarkdown::html_vignette: + toc: true + toc_debth: 3 +vignette: > + %\VignetteIndexEntry{Model selection} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r external-code, cache=FALSE, include=FALSE, purl = FALSE} +# Have to load the knitr to use hooks +library(knitr) +# This vignettes name +vignettename <- "model-selection" +# REMEMBER: IF CHANGING IN THE shared-init (next block), then copy to the others! +``` + +<!--shared-init-start--> +```{r init, cache=FALSE, include=FALSE, purl=FALSE} +# Width will scale all +figwidth <- 12 +# Scale the wide figures (100% out.width) +figheight <- 4 +# Heights for stacked time series plots +figheight1 <- 5 +figheight2 <- 6.5 +figheight3 <- 8 +figheight4 <- 9.5 +figheight5 <- 11 + +# Set the size of squared figures (same height as full: figheight/figwidth) +owsval <- 0.35 +ows <- paste0(owsval*100,"%") +ows2 <- paste0(2*owsval*100,"%") +# +fhs <- figwidth * owsval + +# Set for square fig: fig.width=fhs, fig.height=fhs, out.width=ows} +# If two squared the: fig.width=2*fhs, fig.height=fhs, out.width=ows2 + +# Check this: https://bookdown.org/yihui/rmarkdown-cookbook/chunk-styling.html +# Set the knitr options +knitr::opts_chunk$set( + collapse = TRUE, + comment = "##!! ", + prompt = FALSE, + cache = TRUE, + cache.path = paste0("../tmp/vignettes/tmp-",vignettename,"/"), + fig.align="center", + fig.path = paste0("../tmp/vignettes/tmp-",vignettename,"/"), + fig.height = figheight, + fig.width = figwidth, + out.width = "100%" +) +options(digits=3) + + +# For cropping output and messages +cropfun <- function(x, options, func){ + lines <- options$output.lines + ## if (is.null(lines)) { + ## return(func(x, options)) # pass to default hook + ## } + if(!is.null(lines)){ + x <- unlist(strsplit(x, "\n")) + i <- grep("##!!",x) + if(length(i) > lines){ + # truncate the output, but add .... + x <- x[-i[(lines+1):length(i)]] + x[i[lines]] <- pst(x[i[lines]], "\n\n## ...output cropped") + } + # paste these lines together + x <- paste(c(x, ""), collapse = "\n") + } + x <- gsub("!!","",x) + func(x, options) +} + +hook_chunk <- knit_hooks$get("chunk") + +knit_hooks$set(chunk = function(x, options) { + cropfun(x, options, hook_chunk) +}) + +``` + +[onlineforecasting]: https://onlineforecasting.org/articles/onlineforecasting.pdf +[building heat load forecasting]: https://onlineforecasting.org/examples/building-heat-load-forecasting.html +[onlineforecasting.org]: https://onlineforecasting.org +<!--shared-init-end--> + + + +## Intro +This vignette provides a short overview on the functionalities for model +selection in the onlineforecast package. It follows up on the +vignettes [setup-data](setup-data.html) and +[setup-and-use-model](setup-and-use-model.html), and continues the building load +forecast modelling presented there. If something is introduced in the present +text, but not explained, then have a look in the two preceding vignettes to find an explanation. + +## Load forecasts + +First Load the package, setup the model and calculate the forecasts: +```{r, echo=1:2, purl=1:2} +# Load the package +library(onlineforecast) +#library(devtools) +#load_all(as.package("../../onlineforecast")) +``` + +Just start by taking a rather short data set: +```{r} +# The data, just a rather short period to keep running times short +D <- subset(Dbuilding, c("2010-12-15", "2011-02-01")) +# Set the score period +D$scoreperiod <- in_range("2010-12-22", D$t) +# +D$tday <- make_tday(D$t, 1:36) +``` + +As a test we generate a random sequence and will use that as an input. In the +model selection this should not be selected in the final model: +```{r} +# Generate an input which is just random noise, i.e. should be removed in the selection +set.seed(83792) +D$noise <- make_input(rnorm(length(D$t)), 1:36) +``` + +Set up a model, which will serve as the full model in the selection: +```{r} +# The full model +model <- forecastmodel$new() +# Set the model output +model$output = "heatload" +# Inputs (transformation step) +model$add_inputs(Ta = "Ta", + noise = "noise", + mu_tday = "fs(tday/24, nharmonics=4)", + mu = "one()") +# Regression step parameters +model$add_regprm("rls_prm(lambda=0.9)") +# Optimization bounds for parameters +model$add_prmbounds(lambda = c(0.9, 0.99, 0.9999)) +``` +Finally, set the horizons to run (just keep a vector for later use): +```{r} +# Select a model, just run optimization and score for a single horizon +model$kseq <- 5 +``` + +Now we can use the `step_optim()` function for the selection. In each step new models are generated, with either one removed input or one added input, and then all the generated models are optimized and their scores compared. If any new model have an improved score compared to the currently selected model, then the new is selected and the process is repeated until no new improvement is achieved. + +In addition to selecting inputs, then integer parameters can also +be stepped through, e.g. the order of basis splined or the number +of harmonics in Fourier series. Set which parameters to change in the step: +```{r} +# The range to select the number of harmonics parameter in +prm <- list(mu_tday__nharmonics = c(min=2, max=6)) +``` + +Several stepping procedures are implemented: + +- If the direction is "both", which is default (same as "backwardboth") then the +stepping is: + - In first step inputs are removed one-by-one. + - In following steps, inputs still in the model are removed one-by-one, and inputs not in the model are added one-by-one. +- If the direction is "backwards": + - Inputs are only removed in each step. +- If the direction is "forwardboth": + - In the first step all inputs are removed. + - In following steps (same as "both"). +- If the direction is "forward": + - In the first step all inputs are removed and from there inputs are only added. + + +The default procedure is backward selection with stepping in both +directions. To make compilation of the vignette feasible some arguments were +set, for real applications change the argument "control=list(maxit=1)" and +"mc.cores=1": +```{r, message=FALSE, results="hide"} +# Run the default selection, which is "both" and equivalent to "backwadboth" +# Note the control argument, which is passed to optim, it's now set to few +# iterations in the prm optimization +Lboth <- step_optim(model, D, prm, direction="both", control=list(maxit=1), mc.cores=1) +``` +We now have the models selected in each step in and we see that the final model +is decreased: +```{r} +getse(Lboth, "model") +``` + +Forward selection: +```{r, message=FALSE, results="hide"} +Lforward <- step_optim(model, D, prm, "forward", control=list(maxit=1), mc.cores=1) +``` +```{r} +getse(Lforward, "model") +``` +Same model is selected, which is also the case in backwards selection: +```{r, message=FALSE, results="hide"} +Lbackward <- step_optim(model, D, prm, "backward", control=list(maxit=1), mc.cores=1) +``` +```{r} +getse(Lbackward, "model") +``` + + +Give a starting model. The selection will start from this model: +```{r, message=FALSE, results="hide"} +# Clone the model to make a starting model +modelstart <- model$clone_deep() +# Remove two inputs +modelstart$inputs[2:3] <- NULL +# Run the selection +L <- step_optim(model, D, prm, modelstart=modelstart, control=list(maxit=1), mc.cores=1) +``` +```{r} +getse(L, "model") +``` + +Note, that caching can be really smart (the cache files are located in the +cachedir folder (folder in current working directory, can be removed with +`unlink(foldername))`. See e.g. `?rls_optim` for how the caching works. Give the +arguments 'cachedir="cache", cachererun=FALSE)', which will be passed on to `rls_optim()`. diff --git a/vignettes/online-updating.Rmd b/vignettes/online-updating.Rmd index 3f5a3f10881372d41f8727261b7a8625d0f73a52..d9ef959ea1e62999e968a81861979642b7dd1cc7 100644 --- a/vignettes/online-updating.Rmd +++ b/vignettes/online-updating.Rmd @@ -32,6 +32,7 @@ figheight2 <- 6.5 figheight3 <- 8 figheight4 <- 9.5 figheight5 <- 11 + # Set the size of squared figures (same height as full: figheight/figwidth) owsval <- 0.35 ows <- paste0(owsval*100,"%") @@ -46,7 +47,7 @@ fhs <- figwidth * owsval # Set the knitr options knitr::opts_chunk$set( collapse = TRUE, - comment = "## ", + comment = "##!! ", prompt = FALSE, cache = TRUE, cache.path = paste0("../tmp/vignettes/tmp-",vignettename,"/"), @@ -58,27 +59,33 @@ knitr::opts_chunk$set( ) options(digits=3) -hook_output <- knit_hooks$get("output") -knit_hooks$set(output = function(x, options) { + +# For cropping output and messages +cropfun <- function(x, options, func){ lines <- options$output.lines - if (is.null(lines)) { - return(hook_output(x, options)) # pass to default hook + ## if (is.null(lines)) { + ## return(func(x, options)) # pass to default hook + ## } + if(!is.null(lines)){ + x <- unlist(strsplit(x, "\n")) + i <- grep("##!!",x) + if(length(i) > lines){ + # truncate the output, but add .... + x <- x[-i[(lines+1):length(i)]] + x[i[lines]] <- pst(x[i[lines]], "\n\n## ...output cropped") + } + # paste these lines together + x <- paste(c(x, ""), collapse = "\n") } - x <- unlist(strsplit(x, "\n")) - more <- "## ...output cropped" - if (length(lines)==1) { # first n lines - if (length(x) > lines) { - # truncate the output, but add .... - x <- c(head(x, lines), more) - } - } else { - x <- c(more, x[lines], more) - } - # paste these lines together - x <- paste(c(x, ""), collapse = "\n") - hook_output(x, options) -}) + x <- gsub("!!","",x) + func(x, options) +} + +hook_chunk <- knit_hooks$get("chunk") +knit_hooks$set(chunk = function(x, options) { + cropfun(x, options, hook_chunk) +}) ``` [onlineforecasting]: https://onlineforecasting.org/articles/onlineforecasting.pdf @@ -88,7 +95,11 @@ knit_hooks$set(output = function(x, options) { ## Intro -This vignette explains how to +This vignette explains how to use the package in a real online operation, where +the following is repeated in real time: new data is received, model is updated +and forecasts are calculated. At a lower frequency the parameters are +optimized, e.g. the updating is executed every hour and the parameters are +optimized once a week. Load the package: ```{r, echo=1:2, purl=1:2} @@ -114,77 +125,100 @@ model$add_inputs(Ta = "lp(Ta, a1=0.9)", model$add_regprm("rls_prm(lambda=0.9)") model$add_prmbounds(Ta__a1 = c(0.5, 0.9, 0.9999), lambda = c(0.9, 0.99, 0.9999)) -model$kseq <- c(3,18) -# Optimize the parameters -model$prm <- rls_optim(model, subset(D,D$trainperiod))$par +model$kseq <- 1:36 +# Optimize the parameters on the train period (i.e. until 2011-02-01) +rls_optim(model, subset(D,D$trainperiod), kseq = c(3,18)) ``` - ## Recursive update and prediction -How to get new data and update and predict. +Here we go through the steps of getting new data, running a model update and +making predictions. -First fit on a period +First fit on a period: ```{r} +# Keep a sequence with these points iseq <- which(in_range("2010-12-15",D$t,"2011-01-01")) -Dfit <- subset(D, iseq) -model$kseq <- 1:36 -rls_fit(model$prm, model, Dfit) +# Fit the model +rls_fit(model$prm, model, subset(D, iseq)) ``` -Now the fits are saved in the model object (its an R6 object, hence passed by reference to the functions and can be changed inside the functions). A list of fits with an entry for each horizon is in Lfits, see the two first +Now the fits are saved in the model object (its an R6 object, hence passed by reference to the functions and can be changed inside the functions). A list of fits with an entry for each horizon is in Lfits, see the two first: ```{r} +# The data of a fit is saved in this list str(model$Lfits[1:2]) ``` -Now new data arrives, take the point right after the fit period +We step to the next time step, where new data arrives. Take the point right +after the fit period and take the data for this time point: ```{r} +# Next time step (i <- iseq[length(iseq)] + 1) +# The new data for this time step Dnew <- subset(D, i) ``` -First we need to transform the new data (This must only be done once for each -new data, since some transform functions, e.g. lp(), actually keep states, see -the detailed description in ) +To update and predict, we need to transform the new data (this must only be done +only once for each new data, since some transform functions, e.g. lp(), actually +keep state data, see some on this in \texttt{?lp} and \texttt{?forecastmodel} +under \texttt{\$reset\_state()}): ```{r} -Dnew_transformed <- model$transform_data(Dnew) +# Transform the new data +DnewTransformed <- model$transform_data(Dnew) ``` -Then we can update the parameters using the transformed data +Then we can update the regression coefficients using the transformed data ```{r} -rls_update(model, Dnew_transformed, Dnew[[model$output]]) +# The value of the coefficients for horizon 1, before the update +model$Lfits$k1$theta +# Update the coefficients +rls_update(model, DnewTransformed, Dnew[[model$output]]) +# The value of the coefficients for horizon 1, after the update +model$Lfits$k1$theta ``` -Calculate predictions using the new data and the updated fits (rls coefficient estimates in model$Lfits[[k]]$theta) +Calculate predictions using the new data and the updated fits: ```{r} -yhat <- rls_predict(model, Dnew_transformed) +# Calculate the predictions +yhat <- rls_predict(model, DnewTransformed) ``` -Plot to see that it fits the observations +Plot to see the predictions with the observations: ```{r} +# The index for the predicted steps ahead iseq <- i+model$kseq +# Plot the observations and predictions plot(D$t[iseq], D$heatload[iseq], type = "b", xlab = "t", ylab = "y") lines(D$t[iseq], yhat, type = "b", col = 2) -legend("topright", c("observations",pst("predictions (",min(model$kseq)," to ",max(model$kseq)," steps ahead)")), lty = 1, col = 1:2) +legend("topright", c("Observations",pst("Predictions (",min(model$kseq)," to ",max(model$kseq)," steps ahead)")), lty = 1, col = 1:2) ``` Run this for a longer period to verify that the same forecasts are obtained (in one go vs. iteratively) -First in one go +First in one go on all data: ```{r} -val <- rls_fit(model$prm, model, D, returnanalysis = TRUE) +# Fit and predict on entire data +val <- rls_fit(model$prm, model, D) +# Keep the forecasts D$Yhat1 <- val$Yhat ``` -and then iteratively +and then run iteratively through: ```{r} +# The indexes of training period itrain <- which(in_range("2010-12-15",D$t,"2011-01-01")) +# The indexes of the test period itest <- which(in_range("2011-01-01",D$t,"2011-01-04")) + +# Fit on the training period rls_fit(model$prm, model, subset(D, itrain)) +# Prepare for the forecasts D$Yhat2 <- data.frame(matrix(NA, nrow(D$Yhat1), ncol(D$Yhat1))) names(D$Yhat2) <- names(D$Yhat1) + +# Step through the test period: for(i in itest){ Dnew <- subset(D, i) Dnewtr <- model$transform_data(Dnew) @@ -193,9 +227,7 @@ for(i in itest){ } ``` -Compare to see the difference between the one step forecasts +Compare to see the difference between the one step forecasts: ```{r} D$Yhat1$k1[itest] - D$Yhat2$k1[itest] ``` - -Note about model$reset_states() diff --git a/vignettes/setup-and-use-model.Rmd b/vignettes/setup-and-use-model.Rmd index 73b537ec5f68ba62b60c6089b5b36bcf3be400fa..da8044e0981e0283952086089f30a87554d46524 100644 --- a/vignettes/setup-and-use-model.Rmd +++ b/vignettes/setup-and-use-model.Rmd @@ -46,7 +46,7 @@ fhs <- figwidth * owsval # Set the knitr options knitr::opts_chunk$set( collapse = TRUE, - comment = "## ", + comment = "##!! ", prompt = FALSE, cache = TRUE, cache.path = paste0("../tmp/vignettes/tmp-",vignettename,"/"), @@ -58,25 +58,33 @@ knitr::opts_chunk$set( ) options(digits=3) -hook_output <- knit_hooks$get("output") -knit_hooks$set(output = function(x, options) { + +# For cropping output and messages +cropfun <- function(x, options, func){ lines <- options$output.lines - if (is.null(lines)) { - return(hook_output(x, options)) # pass to default hook - } - x <- unlist(strsplit(x, "\n")) - more <- "## ...output cropped" - if (length(lines)==1) { # first n lines - if (length(x) > lines) { - # truncate the output, but add .... - x <- c(head(x, lines), more) - } - } else { - x <- c(more, x[lines], more) + ## if (is.null(lines)) { + ## return(func(x, options)) # pass to default hook + ## } + if(!is.null(lines)){ + x <- unlist(strsplit(x, "\n")) + i <- grep("##!!",x) + if(length(i) > lines){ + # truncate the output, but add .... + #x <- c(x[-i[(lines+1):length(i)]], "```", "## ...output cropped", "```") + x <- x[-i[(lines+1):length(i)]] + x[i[lines]] <- pst(x[i[lines]], "\n\n## ...output cropped") + } + # paste these lines together + x <- paste(c(x, ""), collapse = "\n") } - # paste these lines together - x <- paste(c(x, ""), collapse = "\n") - hook_output(x, options) + x <- gsub("!!","",x) + func(x, options) +} + +hook_chunk <- knit_hooks$get("chunk") + +knit_hooks$set(chunk = function(x, options) { + cropfun(x, options, hook_chunk) }) ``` @@ -148,7 +156,7 @@ model$add_regprm("rls_prm(lambda=0.9)") # Optimization bounds for parameters model$add_prmbounds(lambda = c(0.9, 0.99, 0.9999)) # Set the horizons for which the model will be fitted -model$kseq <- c(3,18) +model$kseq <- 1:36 ``` @@ -206,7 +214,7 @@ model$add_prmbounds(lambda = c(0.9, 0.99, 0.9999)) Finally, we set the horizons for which to fit: ```{r} # Set the horizons for which the model will be fitted -model$kseq <- c(3,18) +model$kseq <- 1:36 ``` The horizons to fit for is actually not directly related to the model, but rather the fitting of the model. In principle, it would be more "clean" if the @@ -220,25 +228,24 @@ We have set up the model and can now tune the `lambda` with the `rls_optim()`, which is a wrapper for the `optim()` function: ```{r, output.lines=15} # Call the optim() wrapper -model$prm <- rls_optim(model, D)$par +rls_optim(model, D, kseq = c(3,18)) ``` Note, how it only calculated a score for the 3 and 18 steps -horizons - as we specified with `model$kseq` above. The parameters could be +horizons - since we gave it `kseq` as an argument, which then overwrites +`model$kseq` for the optimization only. The parameters could be optimized separately for each horizon, for example it is often such that for the first horizons a very low forgetting factor is optimal (e.g. 0.9). Currently, however, the parameters can only be optimized together. By optimizing for a short (3 steps) and a long horizon (18 steps), we obtain a balance - using less computations compared to optimizing on all horizons. -The optimization converge and the tuned parameter becomes: +The optimization converge and the tuned parameter was inserted: ```{r} # Optimized lambda model$prm ``` -Now we can fit with the optimized `lambda` on all horizons over the entire period: +Now we can fit with the optimized `lambda` on the horizons in `model$kseq` over the entire period: ```{r} -# Set to fit for all horizons -model$kseq <- 1:36 # Fit for all on entire period in D fit1 <- rls_fit(model$prm, model, D) ``` @@ -324,7 +331,7 @@ model <- forecastmodel$new() model$output = "heatload" model$add_inputs(Ta = "lp(Ta, a1=0.9)", mu = "one()") -model$add_regprm("rls_prm(lambda=0.9)") +model$add_regprm("rls_prm(lambda=0.99)") model$add_prmbounds(Ta__a1 = c(0.5, 0.9, 0.9999), lambda = c(0.9, 0.99, 0.9999)) model$kseq <- c(3,18) @@ -362,6 +369,7 @@ There are quite a few functions available for input transformations: - `one()` generates an matrix of ones (for including an intercept). - `fs()` generate Fourier series for modelling harmonic functions. - `bspline()` wraps the `bs()` function for generating base splines. +- `pbspline()` wraps the `pbs()` function for generating periodic base splines. - `AR()` generates auto-regressive model inputs. and they can even be combined, see more details in [onlineforecasting] and in their help diff --git a/vignettes/setup-data.Rmd b/vignettes/setup-data.Rmd index 7c646169a942aad3b53353c2f7e6a86652c74793..10af6910d92fb3999b05625a1227f885ae0ceb45 100644 --- a/vignettes/setup-data.Rmd +++ b/vignettes/setup-data.Rmd @@ -47,7 +47,7 @@ fhs <- figwidth * owsval # Set the knitr options knitr::opts_chunk$set( collapse = TRUE, - comment = "## ", + comment = "##!! ", prompt = FALSE, cache = TRUE, cache.path = paste0("../tmp/vignettes/tmp-",vignettename,"/"), @@ -59,25 +59,33 @@ knitr::opts_chunk$set( ) options(digits=3) -hook_output <- knit_hooks$get("output") -knit_hooks$set(output = function(x, options) { + +# For cropping output and messages +cropfun <- function(x, options, func){ lines <- options$output.lines - if (is.null(lines)) { - return(hook_output(x, options)) # pass to default hook - } - x <- unlist(strsplit(x, "\n")) - more <- "## ...output cropped" - if (length(lines)==1) { # first n lines - if (length(x) > lines) { - # truncate the output, but add .... - x <- c(head(x, lines), more) - } - } else { - x <- c(more, x[lines], more) + ## if (is.null(lines)) { + ## return(func(x, options)) # pass to default hook + ## } + if(!is.null(lines)){ + x <- unlist(strsplit(x, "\n")) + i <- grep("##!!",x) + if(length(i) > lines){ + # truncate the output, but add .... + #x <- c(x[-i[(lines+1):length(i)]], "```", "## ...output cropped", "```") + x <- x[-i[(lines+1):length(i)]] + x[i[lines]] <- pst(x[i[lines]], "\n\n## ...output cropped") + } + # paste these lines together + x <- paste(c(x, ""), collapse = "\n") } - # paste these lines together - x <- paste(c(x, ""), collapse = "\n") - hook_output(x, options) + x <- gsub("!!","",x) + func(x, options) +} + +hook_chunk <- knit_hooks$get("chunk") + +knit_hooks$set(chunk = function(x, options) { + cropfun(x, options, hook_chunk) }) ```