Skip to content
Snippets Groups Projects
Commit 755b4b5b authored by pbac's avatar pbac
Browse files

Close to version 1

parent fc2994cf
No related branches found
No related tags found
No related merge requests found
......@@ -21,10 +21,11 @@ Suggests:
knitr,
rmarkdown,
R.rsp,
testthat (>= 2.1.0),
testthat (>= 3.0.0),
data.table,
plotly
VignetteBuilder: knitr
RoxygenNote: 7.1.1
URL: https://onlineforecasting.org
BugReports: https://lab.compute.dtu.dk/packages/onlineforecast/-/issues
Config/testthat/edition: 3
......@@ -86,7 +86,7 @@ bspline <- function(X, Boundary.knots = NA, intercept = FALSE, df = NULL, knots
# First find the horizons, they are used in the end
nms <- nams(X)
# All columns must be like "k12"
#if(length(grep("^[k|h][[:digit:]]+$", nms)) != length(nms)){ stop("All column names must indicate a horizon, so start with k and then an integer") }
#if(length(grep("[k|h][[:digit:]]+$", nms)) != length(nms)){ stop("All column names must indicate a horizon, so start with k and then an integer") }
# Run for each horizon and calculate the basis splines
L <- lapply(1:ncol(X), function(i) {
if (is.na(Boundary.knots[1])) {
......
......@@ -40,16 +40,17 @@
#' # In real use this should not be temporary! (changes between R sessions with tempdir())
#' cachedir <- tempdir()
#'
#' # Uncomment to run:
#' # First time it takes at least 1 sec.
#' fun(x=2,y=2)
#' #fun(x=2,y=2)
#' # Second time it loads the cache and is much faster
#' fun(x=2,y=2)
#' #fun(x=2,y=2)
#' # Try changing the arguments (x,y) and run again
#'
#' # See the cache file(s)
#' dir(cachedir)
#' #dir(cachedir)
#' # Delete the cache folder
#' unlink(cachedir, recursive=TRUE)
#' #unlink(cachedir, recursive=TRUE)
#'
#' # Demonstrate how cache_name() is functioning
#' # Cache using the all objects given in the function calling, i.e. both x and y
......@@ -81,7 +82,14 @@
cache_name <- function(..., cachedir = "cache"){
# Get the name, definition and arguments of the function from which cache_name was called
funname <- strsplit(deparse(sys.calls()[[sys.nframe()-1]]), "\\(")[[1]][1]
fundef <- digest::digest(attr(eval(parse(text=funname)), "srcref"))
# Find the function in the nearest environment in the stack (i.e. parent calls)
for(i in rev(sys.parents())){
if(funname %in% ls(parent.frame(i+1))){
val <- mget(funname, parent.frame(i+1))
break
}
}
fundef <- digest::digest(attr(eval(val[[funname]]), "srcref"))
# if no arguments were given, then use the arguments function from which cache_name was called
if(length(list(...)) == 0){
funargs <- digest::digest(as.list( match.call(definition = sys.function( -1 ), call = sys.call(-1)))[-1])
......
#' Returns a logical vector indicating the time points which
#'
#' Given a forecast matrix the forecasts are lagged "+k" steps to align them and
#' then 'complete.cases()' is run on that .
#'
#' Gieven a list of forecast matrices the points where all are complete (also all horizons) are complete are TRUE.
#'
#' @title Find complete cases in forecast matrices
#' @param object A data.frame (with columns named 'kxx') or a list of
#' data.frames.
#' @param kseq integer vector: If given then only these horizons are processed.
#' @return A logical vector specifying if there is no missing
#' values across all horizonsd.
#' @author Peder Bacher
#'
#' @examples
#' # Take a small data set
#' D <- subset(Dbuilding, 1:20, kseq=1:5)
#' # Check the forecast matrix of ambient temperature
#' D$Ta
#' # Which are complete over all horizons? The first are not since not all horizons
#' # have a value there (after lagging)
#' complete_cases(D$Ta)
#' # Same goes if given as a list
#' complete_cases(D["Ta"])
#' # and if more than one is given
#' complete_cases(D[c("Ta","I")])
#'
#' # Set some NA of some horizon
#' D$I$k3[8:9] <- NA
#' # Now they are recognized as not complete
#' complete_cases(D[c("Ta","I")])
#'
#' # If we deal with residuals, which are observations and there for have column names "hxx"
#' Resid <- residuals(D$Ta, D$Taobs)
#' names(Resid)
#' # With columns with "h" instead of "k" no lagging occurs in complete_cases
#' complete_cases(Resid)
#' #
#' Resid2 <- Resid
#' Resid$h3[8:9] <- NA
#' complete_cases(list(Resid,Resid2))
#'
#' @rdname complete_cases
#' @importFrom stats complete.cases
#' @export
complete_cases <- function(object, kseq = NA){
UseMethod("complete_cases")
}
#' @rdname complete_cases
#' @importFrom stats complete.cases
#' @export
complete_cases.list <- function(object, kseq=NA){
if(length(grep("[k][[:digit:]]+$", nams(object[[1]])))){
prefix <- "k"
}else{
prefix <- "h"
}
# Do it only for the given kseq horizons, if kseq is not set, then take it from the first
if(is.na(kseq[1])){
kseq <- as.integer(gsub(prefix, "", nams(object[[1]])))
}
# Check that they all have kseq horizons
ok <- rep(TRUE, nrow(object[[1]]))
#
for(i in 1:length(object)){
if(!all(kseq %in% as.integer(gsub(prefix, "", nams(object[[i]]))))){
stop("Not all forecast matrices have kseq horizons: ",kseq)
}
ok <- ok & !is.na(object[[i]][ ,pst(prefix,kseq)])
}
#
ok <- as.data.frame(ok)
if(prefix == "k"){
# Lag to match resiuduals in time
names(ok) <- pst("k",kseq)
ok <- lagdf(ok, "+k")
}
# Finally, the vector with TRUE for all points with no NAs for any forecast
ok <- apply(ok, 1, all)
# Just set NAs to false
ok[is.na(ok)] <- FALSE
#
return(ok)
}
#' @rdname complete_cases
#' @importFrom stats complete.cases
#' @export
complete_cases.data.frame <- function(object, kseq=NA){
# Make a distinction between "kxx" which are forecasts or "hxx", which are observations
if(length(grep("[k][[:digit:]]+$", nams(object)[1]))){
# Forecasts, so lag them
object <- lagdf(object, "+k")
}
complete.cases(object)
}
......@@ -117,7 +117,7 @@ subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts =
X <- D[[nms[i]]]
if(class(X)[1] == "data.frame" ){
# Check if holds forecasts by checking if any name is "kxx"
if(length(grep("^k[[:digit:]]+$", names(X))) > 0){
if(length(grep("k[[:digit:]]+$", names(X))) > 0){
# If it holds forecasts, check that they are all there
if( !all(pst("k",kseq) %in% names(X)) ){
warning(pst("The variable ",nms[i]," contain ",pst(names(X),collapse=",")," hence doesn't contain all k in kseq = ",pst(kseq,collapse=",")))
......@@ -160,7 +160,7 @@ subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts =
val <- lapply(D[nms], function(X) {
if (any(class(X) == "data.frame")) {
# Check if holds forecasts by checking if any name is "kxx"
if(length(grep("^k[[:digit:]]+$", names(X))) > 0){
if(length(grep("k[[:digit:]]+$", names(X))) > 0){
return(X[subset,pst("k",kseq), drop=FALSE])
}else{
return(X[subset, , drop=FALSE])
......@@ -173,7 +173,7 @@ subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts =
# Lag the forecasts k if specified
if(lagforecasts){
val <- lapply(val, function(X){
if(any(class(X) == "data.frame") & length(grep("^k[[:digit:]]+$",names(X))) > 0) {
if(any(class(X) == "data.frame") & length(grep("k[[:digit:]]+$",names(X))) > 0) {
return(lagdf.data.frame(X, lagseq="+k"))
}else{
return(X)
......@@ -213,7 +213,7 @@ as.data.frame.data.list <- function(x, row.names=NULL, optional=FALSE, ...){
val <- as.data.frame(val)
}
# Fix names of data.frames (i.e. forecasts, their names are now "kxx", but should be X.kxx)
i <- grep("^k[[:digit:]]+$", names(val))
i <- grep("k[[:digit:]]+$", names(val))
if(length(i) > 0){
names(val)[i] <- pst(names(x)[i],".",names(val)[i])
}
......@@ -382,7 +382,7 @@ check.data.list <- function(object){
Forecasts$nrow[i] <- nrow(D[[nm]])
}
# Check the colnames, are they unique and all k+integer?
if(!length(unique(grep("^k[[:digit:]]+$",colnms,value=TRUE))) == length(colnms)){
if(!length(unique(grep("k[[:digit:]]+$",colnms,value=TRUE))) == length(colnms)){
Forecasts$colnames[i] <- "X"
}
if(!length(unique(sapply(colnms, function(colnm){ class(D[[nm]][ ,colnm]) }))) == 1){
......
......@@ -395,7 +395,7 @@ print.forecastmodel <- function(x, ...){
cat("\nOutput:",model$output)
cat("\nInputs: ")
if(length(model$inputs) == 0 ){
cat("\nNo inputs")
cat("\nNo inputs\n\n")
}else{
cat(names(model$inputs)[1],"=",model$inputs[[1]]$expr,"\n")
if(length(model$inputs) > 1){
......
......@@ -48,15 +48,21 @@
#' @export
in_range <- function(tstart, time, tend=NA, timezone=NA) {
if (class(tstart)[1] == "character")
tstart <- ct(tstart)
if (is.na(tend))
if (is.na(tend)){
tend <- time[length(time)]
if (class(tend)[1] == "character")
}
if(is.numeric(time)){
return(tstart < time & time <= tend)
}else{
if (class(tstart)[1] == "character"){
tstart <- ct(tstart)
}
if (class(tend)[1] == "character"){
tend <- ct(tend)
}
if (is.na(timezone)){
timezone <- attr(D$t, "tzone")
timezone <- attr(time, "tzone")
}
return(ct(tstart, tz = timezone) < time & time <= ct(tend, tz = timezone))
}
ct(tstart, tz = timezone) < time & time <= ct(tend, tz = timezone)
}
......@@ -114,12 +114,12 @@ lagdf.data.frame <- function(x, lagseq) {
if (lagseq %in% c("+k","+h")) {
lagseq <- rep(0, length(nms))
## lagseq according to the k value of the columnnames
i <- grep("^[k|h][[:digit:]]+$", nms)
i <- grep("[k|h][[:digit:]]+$", nms)
lagseq[i] <- as.integer(sapply(strsplit(nms[i], "[k|h]"), function(x){ x[length(x)] }))
} else if (lagseq %in% c("-k","-h")) {
lagseq <- rep(0, length(nms))
## lagseq according to the negative k value of the columnnames
i <- grep("^[k|h][[:digit:]]+$", nms)
i <- grep("[k|h][[:digit:]]+$", nms)
lagseq[i] <- -as.integer(sapply(strsplit(nms[i], "[k|h]"), function(x){ x[length(x)] }))
}
}
......
......@@ -10,19 +10,18 @@
#'
#'
#' @title Lagging which returns a data.list
#' @param x The data.list to be lagged.
#' @param DL The data.list to be lagged.
#' @param lagseq The integer(s) setting the lag steps.
#' @return A data.list.
#' @seealso \code{\link{lagdl.data.frame}} which is run when \code{x} is a data.frame.
#' @examples
#' # The values are simply shifted in each data.frame with lagdf
#'
#'@export
lagdl <- function(D, lagseq){
iseq <- which(sapply(D,class) %in% c("data.frame","matrix"))
D[iseq] <- lapply(iseq, function(i){
lagdf(D[[i]], lagseq)
lagdl <- function(DL, lagseq){
iseq <- which(sapply(DL,class) %in% c("data.frame","matrix"))
DL[iseq] <- lapply(iseq, function(i){
lagdf(DL[[i]], lagseq)
})
return(D)
return(DL)
}
......@@ -37,9 +37,8 @@
#' # Define a simple model
#' model <- forecastmodel$new()
#' model$output <- "y"
#' model$add_inputs(Ta = "Ta",
#' model$add_inputs(Ta = "lp(Ta, a1=0.9)",
#' mu = "one()")
#' model$add_regprm("rls_prm(lambda=0.99)")
#'
#' # Before fitting the model, define which points to include in the evaluation of the score function
#' D$scoreperiod <- in_range("2010-12-20", D$t)
......@@ -53,13 +52,13 @@
#' class(fit)
#' # The one-step forecast
#' plot(D$y, type="l")
#' lines(fit$Yhat$k1, col=2)
#' lines(lagvec(fit$Yhat$k1,-1), col=2)
#' # Get the residuals
#' plot(residuals(fit)$h1)
#' # Score for each horizon
#' score_fit(fit)
#' score(residuals(fit))
#'
#' # The lm_fit don't put anything in
#' # The lm_fit don't put anything in this field
#' fit$Lfitval
#' # Find the lm fits here
#' model$Lfits
......@@ -68,6 +67,10 @@
#' # Some diurnal pattern is present
#' acf(residuals(fit)$h1, na.action=na.pass, lag.max=96)
#'
#' # Run with other parameters and return the RMSE
#' lm_fit(c(Ta__a1=0.8), model, D, scorefun=rmse, returnanalysis=FALSE)
#' lm_fit(c(Ta__a1=0.9), model, D, scorefun=rmse, returnanalysis=FALSE)
#'
#' @importFrom stats lm residuals
#' @export
lm_fit <- function(prm=NA, model, data, scorefun = NA, returnanalysis = TRUE, printout = TRUE){
......
......@@ -15,6 +15,7 @@
#' @param kseq The horizons to fit for (if not set, then model$kseq is used)
#' @param scorefun The function to be score used for calculating the score to be optimized.
#' @param cachedir A character specifying the path (and prefix) of the cache file name. If set to \code{""}, then no cache will be loaded or written. See \url{https://onlineforecasting.org/vignettes/nice-tricks.html} for examples.
#' @param cacheload A logical controlling whether to load the cache if it exists.
#' @param printout A logical determining if the score function is printed out in each iteration of the optimization.
#' @param method The method argument for \code{\link{optim}}.
#' @param ... Additional parameters to \code{\link{optim}}
......@@ -56,7 +57,7 @@
#'
#' @importFrom stats optim
#' @export
lm_optim <- function(model, data, kseq = NA, scorefun = rmse, cachedir="", cachererun=FALSE, printout=TRUE, method="L-BFGS-B", ...){
lm_optim <- function(model, data, kseq = NA, scorefun = rmse, cachedir="", cacheload=FALSE, printout=TRUE, method="L-BFGS-B", ...){
## Take the parameters bounds from the parameter bounds set in the model
init <- model$get_prmbounds("init")
lower <- model$get_prmbounds("lower")
......@@ -80,7 +81,7 @@ lm_optim <- function(model, data, kseq = NA, scorefun = rmse, cachedir="", cache
cnm <- cache_name(lm_fit, lm_optim, m$outputrange, m$regprm, m$transform_data(data),
data[[m$output]], scorefun, init, lower, upper, cachedir = cachedir)
# Load the cached result if it exists
if(file.exists(cnm) & !cachererun){
if(file.exists(cnm) & !cacheload){
res <- readRDS(cnm)
# Set the optimized parameters into the model
model$insert_prm(res$par)
......
......@@ -24,6 +24,7 @@
#' @param mains A character vector with the main for each plot.
#' @param mainouter A character with the main at the top of the plot (can also be added afterwards with \code{title(main, outer=TRUE)}).
#' @param legendtexts A list with the legend texts for each plot (replaces the names of the variables).
#' @param colormaps A list of colormaps, which will be used in each plot.
#' @param xat POSIXt specifying where the ticks on x-axis should be put.
#' @param usely If TRUE then plotly will be used.
#' @param plotit If FALSE then the plot will not be generated, only data returned.
......@@ -91,7 +92,7 @@ plot_ts.data.list <- function(object, patterns=".*", xlim = NA, ylims = NA, xlab
# set kseq if not specified
if( is.na(kseq[1]) ){
tmp <- unique(unlist(lapply(DL, function(x){names(x)})))
tmp <- tmp[grep("^[k|h][[:digit:]]+$", tmp)]
tmp <- tmp[grep("[k|h][[:digit:]]+$", tmp)]
if( length(tmp) > 0 ){ kseq <- sort(as.integer(gsub("[k|h]","",tmp))) }
}
# Generate a data.frame with the series to be plotted
......@@ -173,7 +174,8 @@ plot_ts.data.list <- function(object, patterns=".*", xlim = NA, ylims = NA, xlab
#' @importFrom graphics par title
#' @export
plot_ts.data.frame <- function(object, patterns=".*", xlim = NA, ylims = NA, xlab = "", ylabs = NA,
mains = NA, mainouter="", legendtexts = NA, colormaps = NA, xat = NA, usely=FALSE, plotit = TRUE, p = NA, namesdata=NA, ...) {
mains = NA, mainouter="", legendtexts = NA, colormaps = NA, xat = NA,
usely=FALSE, plotit = TRUE, p = NA, namesdata=NA, ...) {
# Take par_ts setup parameters from options if there
p <- par_ts(fromoptions=TRUE, p=p, ...)
#
......@@ -301,7 +303,8 @@ plot_ts_iseq <- function(data, pattern, xnm, namesdata){
# Plot all columns found with regex pattern
#' @importFrom graphics plot lines axis title axis.POSIXct mtext par legend
plot_ts_series <- function(data, pattern, iplot = 1,
ylim = NA, xlab = "", main = "", mainline = -1.2, colormap = NA, legendtext = NA, xat = NA, plotit = TRUE, p = NA, namesdata = NA, xaxis = TRUE, ...) {
ylim = NA, xlab = "", main = "", mainline = -1.2, colormap = NA, legendtext = NA,
xat = NA, plotit = TRUE, p = NA, namesdata = NA, xaxis = TRUE, ...) {
#
# Take par_ts setup parameters from options or defaults
p <- par_ts(fromoptions=TRUE, p=p, ...)
......@@ -476,7 +479,7 @@ plot_ts_series <- function(data, pattern, iplot = 1,
#' @export
plot_ts.rls_fit <- function(object, patterns = c("^y$|^Yhat$","^Residuals$","CumAbsResiduals$",pst("^",names(fit$Lfitval[[1]]),"$")),
xlim = NA, ylims = NA, xlab = "", ylabs = NA, mains = "", mainouter="", legendtexts = NA,
xat = NA, usely=FALSE, plotit=TRUE, p=NA, kseq = NA, ...){
colormaps = NA, xat = NA, usely=FALSE, plotit=TRUE, p=NA, kseq = NA, ...){
fit <- object
# Calculate the residuals
Residuals <- residuals(fit)
......@@ -531,7 +534,7 @@ plot_ts.rls_fit <- function(object, patterns = c("^y$|^Yhat$","^Residuals$","Cum
#patterns[patterns == "!!RLSinputs!!"] <- pst("^",nmsinput,"$"))
# Make a plot of the RLS coefficients for each horizon
plot_ts(data, patterns, xlim = xlim, ylims = ylims, xlab = xlab, ylabs = ylabs,
mains = mains, mainouter=mainouter, legendtexts = legendtexts, xat = xat, usely=usely, p=p, kseq = kseq, ...)
mains = mains, mainouter=mainouter, legendtexts = legendtexts, colormaps=colormaps, xat = xat, usely=usely, p=p, kseq = kseq, ...)
}
# Return the data
invisible(data)
......
......@@ -8,9 +8,11 @@
#' @param object The forecast matrix (a data.frame with kxx as column names, Yhat in returned fits).
#' @param y The observations vector.
#' @param ... Not used.
#' @return A data.frame with the residuals for each horizon.
#' @return If object is a matrix or data.frame: a data.frame with the residuals for each horizon.
#' If object is a list: A list with residuals from each element.
#'
#' @examples
#' # ?? list example
#' # Just a vector to be forecasted
#' n <- 100
#' D <- data.list()
......@@ -41,11 +43,11 @@
#' @rdname residuals
#' @export
residuals.data.frame <- function(object, y, ...){
Yhat <- object
# Add some checking at some point
Residuals <- y - lagdf(Yhat, "+k")
# Improvements: Add some checking at some point
# Calculate the residuals
Residuals <- y - lagdf(object, "+k")
# Named with hxx (it's not a forecast, but an observation available at t)
names(Residuals) <- gsub("k","h",names(Residuals))
nams(Residuals) <- gsub("k","h",nams(Residuals))
#
return(Residuals)
}
......@@ -55,10 +57,16 @@ residuals.data.frame <- function(object, y, ...){
residuals.matrix <- residuals.data.frame
#' @rdname residuals
#' @param object The value from a fit a forecastmodel (currently \code{\link{lm_fit}} or \code{\link{rls_fit}}.
#' @param ... Not used.
#' @export
residuals.list <- function(object, y, ...){
# Each element is a forecast matrix, so do it on each
return(lapply(object, residuals, y=y))
}
#' @rdname residuals
#' @export
residuals.forecastmodel_fit <- function(object, ...){
fit <- object
residuals(fit$Yhat, fit$data[[fit$model$output]])
return(residuals(fit$Yhat, fit$data[[fit$model$output]]))
}
......@@ -96,7 +96,7 @@
#' # See rmse as a function of horizon
#' fit <- rls_fit(val$par, model, D, scorefun = rmse)
#' plot(fit$scoreval, xlab="Horizon k", ylab="RMSE")
#' # See ?score_fit for a little more consistent way of calculating this
#' # See ?score for a little more consistent way of calculating this
#'
#'
#' # Try adding a low-pass filter to Ta
......
......@@ -17,10 +17,11 @@
#'
#' @title Optimize parameters for onlineforecast model fitted with RLS
#' @param model The onlineforecast model, including inputs, output, kseq, p
#' @param data The data.list including the variables used in the model.
#' @param data The data.list which holds the data on which the model is fitted.
#' @param kseq The horizons to fit for (if not set, then model$kseq is used)
#' @param scorefun The function to be score used for calculating the score to be optimized.
#' @param cachedir A character specifying the path (and prefix) of the cache file name. If set to \code{""}, then no cache will be loaded or written. See \url{https://onlineforecasting.org/vignettes/nice-tricks.html} for examples.
#' @param cacheload A logical controlling whether to load the cache if it exists.
#' @param printout A logical determining if the score function is printed out in each iteration of the optimization.
#' @param method The method argument for \code{\link{optim}}.
#' @param ... Additional parameters to \code{\link{optim}}
......@@ -60,7 +61,7 @@
#'
#'
#' @export
rls_optim <- function(model, data, kseq = NA, scorefun = rmse, cachedir="", cachererun=FALSE, printout=TRUE, method="L-BFGS-B", ...){
rls_optim <- function(model, data, kseq = NA, scorefun = rmse, cachedir="", cacheload=FALSE, printout=TRUE, method="L-BFGS-B", ...){
# Take the parameters bounds from the parameter bounds set in the model
init <- model$get_prmbounds("init")
lower <- model$get_prmbounds("lower")
......@@ -85,7 +86,7 @@ rls_optim <- function(model, data, kseq = NA, scorefun = rmse, cachedir="", cach
# This is maybe smarter, don't have to calculate the transformation of the data: cnm <- cache_name(m$regprm, getse(m$inputs, nms="expr"), m$output, m$prmbounds, m$kseq, data, objfun, init, lower, upper, cachedir = cachedir)
cnm <- cache_name(rls_fit, rls_optim, m$outputrange, m$regprm, m$transform_data(data), data[[m$output]], scorefun, init, lower, upper, kseq, cachedir = cachedir)
# Load the cached result if it exists
if(file.exists(cnm) & !cachererun){
if(file.exists(cnm) & !cacheload){
res <- readRDS(cnm)
# Set the optimized parameters into the model
model$insert_prm(res$par)
......
## #' @importFrom parallel mclapply
## rls_reduce <- function(model, data, prmreduce=list(NA), kseq = NA, scorefun = rmse){
## ## prm test
## ##prmreduce <- list(I__degree = c(min=1, init=7), mu_tday__nharmonics = c(min=1, init=7))
## prmin <- unlist(getse(prmreduce, 1))
## pr <- unlist(getse(prmreduce, 2))
## #
## m <- model$clone_deep()
## # Insert the starting p reduction values
## if(!is.na(prmreduce[1])){
## m$insert_prm(pr)
## }
## #
## valref <- rls_optim(m, data, kseq, printout=FALSE)$value
## #
## while(TRUE){
## #
## message("------------------------------------")
## message("Reference score value",valref)
## # --------
## # Remove inputs one by one
## message("\nRemoving inputs one by one")
## valsrm <- mclapply(1:length(model$inputs), function(i){
## mr <- m$clone_deep()
## mr$inputs[[i]] <- NULL
## rls_optim(mr, data, kseq, printout=FALSE)$value
## })
## valsrm <- unlist(valsrm)
## names(valsrm) <- names(m$inputs)
## message("Scores")
## print(valsrm)
## # --------
## # Reduce parameter values if specified
## if(!is.na(pr[1])){
## message("\nReducing prm with -1 one by one")
## valspr <- mclapply(1:length(pr), function(i){
## mr <- m$clone_deep()
## p <- pr
## # Only count down if above minimum
## if( p[i] >= prmin[i] ){
## p[i] <- p[i] - 1
## }
## mr$insert_prm(p)
## val <- rls_optim(mr, data, kseq, printout=FALSE)$value
## #
## return(val)
## })
## valspr <- unlist(valspr)
## names(valspr) <- names(pr)
## message("Scores")
## print(valspr)
## }
## # Is one the reduced smaller than the current ref?
## if( min(c(valsrm,valspr)) < valref ){
## if(which.min(c(min(valsrm),min(valspr))) == 1){
## # One of the models with one of the inputs removed is best
## imin <- which.min(valsrm)
## message("Removing input",names(m$inputs)[imin])
## m$inputs[[imin]] <- NULL
## }else{
## # One of the models with reduced parameter values is best
## imin <- which.min(valspr)
## pr[imin] <- pr[imin] - 1
## m$insert_prm(pr)
## message("Reduced parameter",names(pr)[imin],"to:",pr[imin])
## }
## valref <- min(c(valsrm,valspr))
## }else{
## # No improvement obtained from reduction, so return the current model
## message("------------------------------------\n\nDone")
## return(m)
## }
## }
## }
......@@ -27,7 +27,6 @@
#' @param object of class \code{rls_fit}, so a fit calculated by \code{\link{rls_fit}}.
#' @param scoreperiod logical (or index). If this scoreperiod is given, then it will be used over the one in the fit.
#' @param scorefun The score function to be applied on each horizon.
#' @param usecomplete Use on the set of observations which is complete on all horizons.
#' @param printit Print the result.
#' @param ... Not used.
#' @return A list of:
......@@ -67,15 +66,18 @@
#'
#' @importFrom stats sd
#' @export
rls_summary <- function(object, scoreperiod = NA, scorefun = rmse, usecomplete = TRUE, printit = TRUE, ...){
rls_summary <- function(object, scoreperiod = NA, scorefun = rmse, printit = TRUE, ...){
fit <- object
#
scipen <- options(scipen=10)$scipen
if(is.na(scoreperiod[1])){
scoreperiod <- fit$data$scoreperiod
}
#
tmp <- score_fit(fit, scoreperiod, usecomplete, scorefun)
scoreval <- tmp$scoreval
scoreperiodused <- tmp$scoreperiod
retval <- list(scorefun = scorefun, scoreval = scoreval, scoreperiod = scoreperiodused)
scipen <- options(scipen=10)$scipen
# Calculate the score for each horizon
scoreval <- score(residuals(fit), scoreperiod, scorefun=scorefun)
# The result to return
retval <- list(scorefun = scorefun, scoreval = scoreval, scoreperiod = scoreperiod)
# Return the result before print?
if(!printit){
return(retval)
......@@ -91,12 +93,12 @@ rls_summary <- function(object, scoreperiod = NA, scorefun = rmse, usecomplete =
cat(" ",names(m$regprm)[i],"=",unlist(m$regprm[i]),"\n")
}
#
cat("\nScoreperiod:",sum(scoreperiodused),"observations are included.\n")
cat("\nScoreperiod:",sum(scoreperiod),"observations are included.\n")
#
cat("\nRLS coeffients summary stats (cannot be used for significance tests):\n")
coef <- t(sapply(1:length(fit$Lfitval[[1]]), function(i){
val <- sapply(fit$Lfitval, function(Theta){
Theta[scoreperiodused,i]
Theta[scoreperiod,i]
})
#
m <- mean(val,na.rm=TRUE)
......@@ -126,7 +128,7 @@ rls_summary <- function(object, scoreperiod = NA, scorefun = rmse, usecomplete =
cat(pst("\n",toupper(scorename),":\n"))
print(tmp)
cat("\n")
invisible(list(scorefun = scorefun, scoreval = scoreval, scoreperiod = scoreperiodused))
invisible(list(scorefun = scorefun, scoreval = scoreval, scoreperiod = scoreperiod))
}
#' @importFrom stats sd
......
......@@ -13,7 +13,7 @@
#' @title Computes the RMSE score.
#' @param x a numerical vector of residuals.
#' @return The RMSE score.
#' @seealso \code{\link{score}()} for calculation of a score for the k'th horizon, and \code{\link{score_fit}()} which takes a forecastmodel fit and returns score taking scoreperiod etc. into account.
#' @seealso \code{\link{score}()} for calculation of a score for the k'th horizon
#' @name rmse
#' @examples
#'
......
......@@ -9,10 +9,11 @@
#' Applies the \code{scorefun} on all horizons (each column) of the residuals matrix. See the description of each parameter for more details.
#'
#' @title Calculate the score for each horizon.
#' @param Residuals A matrix with residuals (columns named \code{hxx}) for which to calculate the score for each horizon.
#' @param scoreperiod as a logical vector controlling which points to be included in the score calculation. If NA then all values are included.
#' @param usecomplete if TRUE then only the values available for all horizons are included (i.e. if at one time point there is a missing value, then values for this time point is removed for all horizons in the calculation).
#' @param object ??list or A matrix with residuals (columns named \code{hxx}) for which to calculate the score for each horizon.
#' @param scoreperiod as a logical vector controlling which points to be included in the score calculation. If NA then all values are included (depeding on 'complete').
#' @param usecomplete TRUE then only the values available for all horizons are included (i.e. if at one time point there is a missing value, then values for this time point is removed for all horizons in the calculation).
#' @param scorefun The score function.
#' @param ... is passed on to the score function.
#' @return A list with the a numeric vector with the score value for each horizon and the applied \code{scoreperiod} (note can be different from the given scoreperiod, if only complete observations are used (as per default)).
#' @examples
#'
......@@ -24,36 +25,70 @@
#' Resid <- residuals(Yhat, y)
#'
#' # Calculate the score for the k1 horizon
#' score(Resid)$scoreval
#' score(Resid)
#'
#' # The first values were excluded, since there are NAs
#' # In the beginning the horizons have NAs
#' head(Resid)
#' score(Resid)$scoreperiod
#' # Default is that only complete cases over all horizons are included
#' score(Resid)
#' # So including all cases for each horizon will give different values
#' score(Resid, usecomplete=FALSE)
#'
#' @importFrom stats complete.cases
#' # Given a list
#' # The residuals for each horizon
#' Resid2 <- residuals(persistence(y,kseq=1:4)+rnorm(100), y)
#'
#' score(list(Resid,Resid2))
#' @rdname score
#' @export
score <- function(Residuals, scoreperiod = NA, usecomplete = TRUE, scorefun = rmse){
# If no scoreperiod is given, then use all
if(is.na(scoreperiod[1])){
scoreperiod <- rep(TRUE,nrow(Residuals))
score <- function(object, scoreperiod = NA, usecomplete = TRUE, scorefun = rmse, ...){
UseMethod("score")
}
#' @rdname score
#' @export
score.list <- function(object, scoreperiod = NA, usecomplete = TRUE, scorefun = rmse, ...){
# Use only complete cases
if(usecomplete){
tmp <- complete_cases(object)
}else{
# Do checking of scoreperiod
txt <- "It must be set to an index (int or logical) defining which points to be evaluated in the scorefun()."
if( length(scoreperiod) != nrow(Residuals) ){
stop("scoreperiod is not same length as nrow(Residuals): ",txt)
tmp <- rep(TRUE,nrow(object))
}
if(!is.na(scoreperiod[1])){
scoreperiod <- tmp & scoreperiod
}else{
if( all(is.na(scoreperiod)) ){ stop("scoreperiod is all NA: ",txt) }
scoreperiod <- tmp
}
# Run on each element, usecomplete has been dealt with
sapply(object, score, usecomplete=FALSE, scoreperiod=scoreperiod, scorefun=scorefun, ...=...)
}
#' @rdname score
#' @export
score.data.frame <- function(object, scoreperiod = NA, usecomplete = TRUE, scorefun = rmse, ...){
if(is.na(scoreperiod[1])){
scoreperiod <- rep(TRUE,nrow(object))
}
# Take only the rows which have a value for each horizon?
if(usecomplete){
scoreperiod <- scoreperiod & complete.cases(Residuals)
scoreperiod <- complete_cases(object) & scoreperiod
}
# If no scoreperiod is given, then use all
# Do checking of scoreperiod
txt <- "It must be set to an index (int or logical) defining which points to be evaluated in the scorefun()."
if( length(scoreperiod) != nrow(object) ){
stop("scoreperiod is not same length as nrow(object): ",txt)
}else{
if( all(is.na(scoreperiod)) ){ stop("scoreperiod is all NA: ",txt) }
}
# Calculate the objective function for each horizon
scoreval <- sapply(1:ncol(Residuals), function(i){
scorefun(Residuals[scoreperiod,i])
scoreval <- sapply(1:ncol(object), function(i){
scorefun(object[scoreperiod,i], ...)
})
nams(scoreval) <- gsub("h","k",nams(Residuals))
nams(scoreval) <- gsub("h","k",nams(object))
#
return(list(scoreval=scoreval,scoreperiod=scoreperiod))
return(scoreval)
}
#' Calculate the score for each horizon for a forecast model fit.
#'
#' For evaluation of the score on each horizon, as specified in the fit. Use it for a consistent evaluation.
#'
#' @title Calculates scores for a forecast model fit.
#' @param fit A model fit
#' @param scoreperiod as an index (logical or integer) defining which points to inlude in the score calculation. If NA, the \code{scoreperiod} from the \code{fit$model} object is used.
#' @param usecomplete Only use points where
#' @param scorefun The score function applied, per default \code{\link{rmse}}.
#' @seealso \code{\link{rmse}} and \code{\link{score}} which are used in this function.
#' @return A list with:
#' - \code{scoreval} is the score value
#' - \code{scoreperiod} is the score period used (can be different that the one in arguments \code{fit} or \code{scoreperiod})
#' - \code{scorename} is the name of the score function applied
#' @export
score_fit <- function(fit, scoreperiod = NA, usecomplete = TRUE, scorefun = rmse){
# Calculate the score for each horizon
if("scorefun" %in% names(as.list(match.call()))){
scorename <- as.list(match.call())$scorefun
}else{
scorename = "rmse"
}
# Check score period
txt <- "It must be set to an index (int or logical) defining which points to be evaluated in the scorefun()."
if(is.na(scoreperiod[1])){
if("scoreperiod" %in% nams(fit$data)){
scoreperiod <- fit$data$scoreperiod
}else{
stop("scoreperiod is not set. Set it in the data used in the fit function or as argument in the present call: ",txt)
}
}
# Calculate the Residuals if they were not in fit
if( !"Residuals" %in% names(fit) ){
# Calculate the residuals
Residuals <- residuals(fit$Yhat, fit$data[fit$model$output])
}else{
Residuals <- fit$Residuals
}
# Calculate the score
tmp <- score(Residuals, scoreperiod, usecomplete)
scoreval <- tmp$scoreval
scoreperiod <- tmp$scoreperiod
# Give a warning if the score
if(!is.na(fit$scoreval[1])){
if(length(fit$scoreval) != length(scoreval)){
warning("fit contains 'scoreval', which is different in length than the scoreval calculated here (probably for different horizons (kseq))")
}else if(!all(fit$scoreval == scoreval)){
warning("fit contains 'scoreval' which is different from the",scorename,"score calculated now (can also be because of 'usecomplete = TRUE', such that only points which have forecasts for all horizons are included.")
}
}
#
list(scoreval=scoreval, scoreperiod=scoreperiod, scorename=scorename)
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment