diff --git a/DESCRIPTION b/DESCRIPTION index f985e655383cf0af9099486fca1f4c1e131e1164..4e0d43ed97bc301118c8e12b94d063abc904a15e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: onlineforecast Type: Package Title: Forecast Modelling for Online Applications -Version: 1.0.0 +Version: 1.0.1 Description: A framework for fitting adaptive forecasting models. Provides a way to use forecasts as input to models, e.g. weather forecasts for energy related forecasting. The models can be fitted recursively and can easily be setup for updating parameters when new data arrives. See the included vignettes, the website <https://onlineforecasting.org> and the pre-print paper "onlineforecast: An R package for adaptive and recursive forecasting" <arXiv:2109.12915>. License: GPL-3 Encoding: UTF-8 @@ -25,7 +25,7 @@ Suggests: data.table, plotly VignetteBuilder: knitr -RoxygenNote: 7.1.1 +RoxygenNote: 7.1.2 URL: https://onlineforecasting.org BugReports: https://lab.compute.dtu.dk/packages/onlineforecast/-/issues Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 8cdc590471f423d4a90d08079eff3579ffabce87..4ec65ced8ff446d4da4d7cb20b10d5ee66600ed4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,7 +7,6 @@ S3method(aslt,POSIXct) S3method(aslt,POSIXlt) S3method(aslt,character) S3method(aslt,numeric) -S3method(check,data.list) S3method(complete_cases,data.frame) S3method(complete_cases,list) S3method(ct,POSIXct) @@ -36,6 +35,7 @@ S3method(residuals,matrix) S3method(score,data.frame) S3method(score,list) S3method(subset,data.list) +S3method(summary,data.list) S3method(summary,rls_fit) export("%**%") export("nams<-") @@ -45,7 +45,6 @@ export(aslt) export(bspline) export(cache_name) export(cache_save) -export(check) export(complete_cases) export(ct) export(data.list) diff --git a/R/aslt.R b/R/aslt.R index 9704860c0e7801a56cccc3643343fbadb24e8c52..20141930b04a6b6884c8e775fe60231581d8a34a 100644 --- a/R/aslt.R +++ b/R/aslt.R @@ -15,7 +15,7 @@ #' @param ... Arguments to be passed to methods. #' @return An object of class POSIXlt #' @section Methods: -#' #' @examples +#' @examples #' #' # Create a POSIXlt with tz="GMT" #' aslt("2019-01-01") diff --git a/R/bspline.R b/R/bspline.R index 56ae11f859ce024c8859212e5f6cedc448ffa8ca..c7f473fe93052278200ebddedf600f53bb76ebfc 100644 --- a/R/bspline.R +++ b/R/bspline.R @@ -28,30 +28,32 @@ #' @examples #' #' # How to make a diurnal curve using splines -#' # Select first 54 hours from the load data -#' D <- subset(Dbuilding, 1:76, kseq=1:4) -#' # Make the hour of the day as a forecast input -#' D$tday <- make_tday(D$t, kseq=1:4) -#' D$tday +#' # Select first 54 hours from the load data +#' D <- subset(Dbuilding, 1:76, kseq=1:4) +#' # Make the hour of the day as a forecast input +#' D$tday <- make_tday(D$t, kseq=1:4) +#' D$tday #' -#' # Calculate the base splines for each column in tday -#' L <- bspline(D$tday) +#' # Calculate the base splines for each column in tday +#' L <- bspline(D$tday) #' -#' # Now L holds a data.frame for each base spline -#' str(L) -#' # Hence this will result in four inputs for the regression model +#' # Now L holds a data.frame for each base spline +#' str(L) +#' # Hence this will result in four inputs for the regression model #' -#' # Plot (note that the splines period starts at tday=0) -#' plot(D$t, L$bs1$k1, type="s") -#' for(i in 2:length(L)){ -#' lines(D$t, L[[i]]$k1, col=i, type="s") -#' } +#' # Plot (note that the splines period starts at tday=0) +#' plot(D$t, L$bs1$k1, type="s") +#' for(i in 2:length(L)){ +#' lines(D$t, L[[i]]$k1, col=i, type="s") +#' } #' -#' # In a model formulation it will be: -#' model <- forecastmodel$new() -#' model$add_inputs(mutday = "bspline(tday)") -#' # Such that at the transform stage will give the same as above -#' model$transform_data(D) +#' # In a model formulation it will be: +#' model <- forecastmodel$new() +#' model$add_inputs(mutday = "bspline(tday)") +#' # We set the horizons (actually not needed for the transform, only required for data checks) +#' model$kseq <- 1:4 +#' # Such that at the transform stage will give the same as above +#' model$transform_data(D) #' #' # Periodic splines are useful for modelling a diurnal harmonical functions #' L <- bspline(D$tday, bknots=c(0,24), df=4, periodic=TRUE) @@ -62,7 +64,7 @@ #' # Plot #' plot(D$t, L$bs1$k1, type="s") #' for(i in 2:length(L)){ -#' lines(D$t, L[[i]]$k1, col=i, type="s") +#' lines(D$t, L[[i]]$k1, col=i, type="s") #' } #' #' @export @@ -72,7 +74,7 @@ bspline <- function(X, Boundary.knots = NA, intercept = FALSE, df = NULL, knots Boundary.knots <- bknots } # If a list, then call on each element - if (class(X) == "list") { + if (inherits(X,"list")) { # Call again for each element val <- lapply(1:length(X), function(i) { bspline(X[[i]], df = df, knots = knots, degree = degree, intercept = intercept, @@ -82,7 +84,7 @@ bspline <- function(X, Boundary.knots = NA, intercept = FALSE, df = NULL, knots return(val) } # X must be a data.frame or matrix - if(!(class(X) %in% c("data.frame","matrix"))){ stop("X must be a data.frame or matrix") } + if(!inherits(X,c("data.frame","matrix"))){ stop("X must be a data.frame or matrix") } # First find the horizons, they are used in the end nms <- nams(X) # All columns must be like "k12" diff --git a/R/data.list.R b/R/data.list.R index c1155730f7d90142669ec2059e06391bc9348417..e71083c417c91b5bbd1da0ebcbd7f4e8638e6da0 100644 --- a/R/data.list.R +++ b/R/data.list.R @@ -12,8 +12,11 @@ #' See the vignette 'setup-data' on how a data.list must be setup. #' #' It's simply a list of class \code{data.list} holding: +#' #' - vector \code{t} +#' #' - vector(s) of observations +#' #' - data.frames (or matrices) of forecast inputs #' #' @@ -26,14 +29,14 @@ #' time <- seq(ct("2019-01-01"),ct("2019-01-02"),by=3600) #' # Observations time series (as vector) #' xobs <- rnorm(length(time)) -#' # Forecast input as data.frame +#' # Forecast input as a data.frame with columns names 'kxx', where 'xx' is the horizon #' X <- data.frame(matrix(rnorm(length(time)*3), ncol=3)) #' names(X) <- pst("k",1:3) #' #' D <- data.list(t=time, xobs=xobs, X=X) #' -#' # Check it -#' check(D) +#' # Check it (see \code{?\link{summary.data.list}}) +#' summary(D) #' #' @export data.list <- function(...) { @@ -90,7 +93,7 @@ data.list <- function(...) { #' plot(X$Ta$k10, X$Taobs) #' #' # Fit a model for the 10-step horizon -#' abline(lm(Taobs ~ Ta.k10, X), col=2) +#' abline(lm(Taobs ~ Ta.k10, as.data.frame(X)), col=2) #' #' @export subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts = FALSE, pattern = NA, ...) { @@ -135,20 +138,20 @@ subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts = subset <- 1:dim(D[[1]])[1] } }else if(length(subset) == 2){ - if(any(class(subset) %in% c("character","POSIXlt","POSIXct","POSIXt"))){ + if(inherits(subset,c("character","POSIXlt","POSIXct","POSIXt"))){ # Start and end of a period is given subset <- in_range(subset[1], D$t, subset[2]) } }else{ # Check if a non-meaningful subset is given - if(any(class(subset) == "character")){ + if(inherits(subset,"character")){ stop("subset cannot be a character, except if it is of length 2 and can be converted in a POSIX, e.g. subset=c('2020-01-01','2020-01-10'. ") } } # Take all horizons k? if(is.na(kseq[1])){ val <- lapply(D[nms], function(X) { - if (any(class(X) == "data.frame")) { + if (inherits(X,"data.frame")) { return(X[subset, , drop=FALSE]) # drop = FALSE needed in case data frame only has 1 column, otherwise this does not return a data frame } else { return(X[subset]) @@ -158,7 +161,7 @@ subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts = # Multiple horizons (hence length(kseq) > 1) # Take the specified horizons val <- lapply(D[nms], function(X) { - if (any(class(X) == "data.frame")) { + if (inherits(X,"data.frame")) { # Check if holds forecasts by checking if any name is "kxx" if(length(grep("k[[:digit:]]+$", names(X))) > 0){ return(X[subset,pst("k",kseq), drop=FALSE]) @@ -173,7 +176,7 @@ subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts = # Lag the forecasts k if specified if(lagforecasts){ val <- lapply(val, function(X){ - if(any(class(X) == "data.frame") & length(grep("k[[:digit:]]+$",names(X))) > 0) { + if(inherits(X,"data.frame") & length(grep("k[[:digit:]]+$",names(X))) > 0) { return(lagdf.data.frame(X, lagseq="+k")) }else{ return(X) @@ -209,11 +212,11 @@ subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts = as.data.frame.data.list <- function(x, row.names=NULL, optional=FALSE, ...){ # Then convert into a data.frame val <- do.call("cbind", x) - if(class(val) == "matrix"){ + if(inherits(val,"matrix")){ val <- as.data.frame(val) } - # Fix names of data.frames (i.e. forecasts, their names are now "kxx", but should be X.kxx) - i <- grep("k[[:digit:]]+$", names(val)) + # Fix names of data.frames (i.e. forecasts, if their names are now "kxx", but should be X.kxx) + i <- grep("^k[[:digit:]]+$", names(val)) if(length(i) > 0){ names(val)[i] <- pst(names(x)[i],".",names(val)[i]) } @@ -262,81 +265,105 @@ pairs.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts = } - -#' Checking the object for appropriate form. +#' Summary including checks of the data.list for appropriate form. #' -#' Prints on table form the result of the check. +#' Prints on table form the result of the checks. #' -#' @title Checking the object for appropriate form. -#' @param object The object to be checked. +#' @title Summary with checks of the data.list for appropriate form. +#' @param object The object to be summarized and checked +#' @param printit A boolean deciding if check results tables are printed +#' @param stopit A boolean deciding if the function stop with an error if the check is not ok +#' @param nms A character vector. If given specifies the variables (vectors or matrices) in object to check +#' @param msgextra A character which is added in the printout of an (potential) error message +#' @param ... Not used #' @return The tables generated. #' -#' # Check a data.list (see \code{?\link{check.data.list}}) -#' check(Dbuilding) -#' -#' @export -check <- function(object){ - UseMethod("check") -} - -#' Checking the data.list for appropriate form. +#' Checking the data.list for appropriate form: #' -#' Prints a check of the time vector t, which must have equidistant time points and no NAs. +#' A check of the time vector t, which must have equidistant time points and no NAs. #' -#' Then the results of checking vectors (observations): -#' - ok: A 'V' indicates a successful check -#' - maxNAs: Proportion of NAs -#' - length: printed if not the same as the 't' vector -#' - class: the class +#' Then the results of checks of vectors (observations): +#' +#' - NAs: Proportion of NAs +#' +#' - length: Same length as the 't' vector? +#' +#' - class: The class of the vector #' #' Then the results of checking data.frames and matrices (forecasts): -#' - ok: a 'V' indicates a successful check -#' - maxNAs: the proportion of NAs for the horizon (i.e. column) with the highest proportion of NAs -#' - meanNAs: the proportion of NAs of the entire data.frame -#' - nrow: printed if not the same as the 't' vector length -#' - colnames: columns must be names 'kxx', where 'xx' is the horizon -#' - sameclass: 'X' if not all columns are the same class -#' - class: prints the class of the columns if they are all the same -#' -#' @title Checking the data.list for appropriate form. -#' @param object The object to be checked. -#' @return The tables generated. -#' -#' # Check a data.list (see \code{?\link{check.data.list}}) -#' check(Dbuilding) -#' -#' # Vector with observations not same length as t -#' D <- Dbuilding -#' D$heatload <- D$heatload[1:10] -#' check(D) -#' +#' +#' - maxHorizonNAs: The proportion of NAs for the horizon (i.e. column) with the highest proportion of NAs +#' +#' - meanNAs: The proportion of NAs of the entire matrix +#' +#' - nrow: Same length as the 't' vector? +#' +#' - colnames: Columns must be names 'kx', where 'x' is the horizon (e.g. k12 is 12-step horizon) +#' +#' - sameclass: Error if not all columns are the same class +#' +#' - class: Prints the class of the columns if they are all the same +#' +#' @examples +#' +#' summary(Dbuilding) +#' #' # Some NAs in k1 forecast #' D <- Dbuilding #' D$Ta$k1[1:1500] <- NA -#' check(D) +#' summary(D) #' +#' # Vector with observations not same length as t throws error +#' D <- Dbuilding +#' D$heatload <- D$heatload[1:10] +#' try(summary(D)) +#' +#' # Forecasts wrong count +#' D <- Dbuilding +#' D$Ta <- D$Ta[1:10, ] +#' try(summary(D)) +#' #' # Wrong column names -#' names(D$Ta) +#' D <- Dbuilding +#' names(D$Ta)[4] <- "xk" +#' names(D$Ta)[2] <- "x2" +#' try(summary(D)) +#' +#' # No column names +#' D <- Dbuilding +#' names(D$Ta) <- NULL +#' try(summary(D)) +#' +#' # Don't stop or only print if stopped +#' onlineforecast:::summary.data.list(D, stopit=FALSE) +#' try(onlineforecast:::summary.data.list(D, printit=FALSE)) #' +#' # Only check for specified variables +#' # (e.g. do like this in model functions to check only variables used in model) +#' onlineforecast:::summary.data.list(D, nms=c("heatload","I")) +#' #' @export -check.data.list <- function(object){ - # Check if how the data.list is setup and report potential issues +summary.data.list <- function(object, printit=TRUE, stopit=TRUE, nms=names(object), msgextra="", ...){ D <- object - if(!"t" %in% names(D)){ stop("'t' is missing in the data.list: It must be a vector of equidistant time points (can be an integer, but preferably POSIXct class with tz 'GMT' or 'UTC'.)") } - if(length(unique(diff(D$t))) != 1){ stop("'t' is not equidistant and have no NA values")} - message("\nTime t is fine: Length ",length(D$t),"\n") + # The final message + msg <- NULL + + # Check the time vector + if(!"t" %in% names(D)){ msg <- c(msg,"'t' is missing in the data.list: It must be a vector of equidistant time points (can be an integer, but preferably POSIXct class with tz 'GMT' or 'UTC'.)")} + if(length(D$t) > 1){ + if(length(unique(diff(D$t))) != 1){ msg <- c(msg,"'t' is not equidistant or have NA values.") } + } - # Which is data.frame or matrix? - dfOrMat <- sapply(D, function(x){ (class(x) %in% c("matrix","data.frame"))[1] }) + # Which elements are data.frame or matrix? + isMatrix <- sapply(D, function(x){ inherits(x,c("matrix","data.frame")) }) + # Vectors check - vecseq <- which(!dfOrMat & names(dfOrMat) != "t") + vecseq <- which(!isMatrix & names(isMatrix) != "t" & names(isMatrix) %in% nms) Observations <- NA if(length(vecseq) > 0){ - cat("Observation vectors:\n") - vecchecks <- c("ok","NAs","length","class") - Observations <- data.frame(matrix("", nrow=length(vecseq), ncol=length(vecchecks), dimnames=list(names(vecseq),vecchecks)), stringsAsFactors=FALSE) - Observations$ok <- "V" + vecchecks <- c("NAs","length","class") + Observations <- data.frame(matrix("ok", nrow=length(vecseq), ncol=length(vecchecks), dimnames=list(pst("$",names(vecseq)),vecchecks)), stringsAsFactors=FALSE) # for(i in 1:length(vecseq)){ # @@ -346,58 +373,83 @@ check.data.list <- function(object){ Observations$NAs[i] <- pst(NAs,"%") # Check the length if(length(D[[nm]]) != length(D$t)){ - Observations$length[i] <- length(D[[nm]]) + Observations$length[i] <- "ERROR" + msg <- c(msg,pst(rownames(Observations)[i]," (length ",length(D[[nm]]),"), not same length as t (length ",length(D$t),")")) } # Its class Observations$class[i] <- class(D[[nm]]) - # Not ok? - if(sum(Observations[i, 3] == "") < 1){ - Observations$ok[i] <- "" - } } - print(Observations) } - # - # For forecasts - dfseq <- which(dfOrMat) + + # Forecasts check + dfseq <- which(isMatrix & names(isMatrix) %in% nms) Forecasts <- NA if(length(dfseq) > 0){ - cat("\nForecast data.frames or matrices:\n") - dfchecks <- c("ok","maxNAs","meanNAs","nrow","colnames","sameclass","class") - Forecasts <- data.frame(matrix("", nrow=length(dfseq), ncol=length(dfchecks), dimnames=list(names(dfseq),dfchecks)), stringsAsFactors=FALSE) - Forecasts$ok <- "V" + dfchecks <- c("maxHorizonNAs","NAs","nrow","colnames","sameclass","class") + Forecasts <- data.frame(matrix("ok", nrow=length(dfseq), ncol=length(dfchecks), dimnames=list(pst("$",names(dfseq)),dfchecks)), stringsAsFactors=FALSE) # for(i in 1:length(dfseq)){ # nm <- names(dfseq)[i] colnms <- nams(D[[nm]]) - # max NAs - maxNAs <- round(max(sapply(colnms, function(colnm){ 100*sum(is.na(D[[nm]][ ,colnm])) / nrow(D[[nm]]) }))) - Forecasts$maxNAs[i] <- pst(maxNAs,"%") - # Mean NAs - meanNAs <- round(mean(sapply(colnms, function(colnm){ 100*sum(is.na(D[[nm]][ ,colnm])) / nrow(D[[nm]]) }))) - Forecasts$meanNAs[i] <- pst(meanNAs,"%") - # Check the number of rows - if(nrow(D[[nm]]) != length(D$t)){ - Forecasts$nrow[i] <- nrow(D[[nm]]) - } - # Check the colnames, are they unique and all k+integer? - if(!length(unique(grep("k[[:digit:]]+$",colnms,value=TRUE))) == length(colnms)){ - Forecasts$colnames[i] <- "X" - } - if(!length(unique(sapply(colnms, function(colnm){ class(D[[nm]][ ,colnm]) }))) == 1){ - Forecasts$sameclass[i] <- "X" + if(is.null(colnms)){ + msg <- c(msg, pst("'",nm,"' has no column names! Columns in forecast matrices must be named 'kx', where x is the horizon (e.g. 'k12' is the column with the 12 step forecast)")) + Forecasts[i, ] <- rep(NA,ncol(Forecasts)) }else{ - Forecasts$class[i] <- class(D[[nm]][ ,1]) - } - # Not ok? - if(sum(Forecasts[i, ] == "") < (length(dfchecks)-4)){ - Forecasts$ok[i] <- "" + # max NAs + tmp <- round(max(sapply(colnms, function(colnm){ 100*sum(is.na(D[[nm]][ ,colnm])) / nrow(D[[nm]]) }))) + Forecasts$maxHorizonNAs[i] <- pst(tmp,"%") + # Mean NAs + tmp <- round(mean(sapply(colnms, function(colnm){ 100*sum(is.na(D[[nm]][ ,colnm])) / nrow(D[[nm]]) }))) + Forecasts$NAs[i] <- pst(tmp,"%") + # Check the number of rows + if(nrow(D[[nm]]) != length(D$t)){ + Forecasts$nrow[i] <- "ERROR" + msg <- c(msg, pst(nm," has ",nrow(D[[nm]])," rows, must be equal to length of t (n=",length(D$t),")")) + } + # Check the colnames, are they unique and all k+integer? + tmp <- unique(grep("k[[:digit:]]+$",colnms,value=TRUE)) + if(!length(tmp) == length(colnms)){ + Forecasts$colnames[i] <- "ERROR" + msg <- c(msg, pst(nm," has columns named: '",pst(colnms[!(colnms %in% tmp)],collapse="','"),"'. Columns in forecast matrices must be named 'kx', where x is the horizon (e.g. 'k12' is the column with the 12 step forecast)")) + } + if(!length(unique(sapply(colnms, function(colnm){ class(D[[nm]][ ,colnm]) }))) == 1){ + Forecasts$sameclass[i] <- "ERROR" + msg <- c(msg, pst(nm," doesn't have same class for all columns")) + }else{ + Forecasts$class[i] <- class(D[[nm]][ ,1]) + } } } - print(Forecasts) } + # Print the results + if(printit){ + cat("\nLength of time vector 't': ",length(D$t),"\n\n", sep="") + if(length(vecseq) > 0){ + # cat("\n- Observation vectors:\n") + print(Observations) + } + if(length(dfseq) > 0){ + # cat("\n- Forecast data.frames or matrices:\n") + cat("\n") + print(Forecasts) + } + } + + # Error message to print? + if(length(msg) > 0){ + cat("\n") + msg <- c(msg,"\nSee '?summary.data.list' for more information") + # Stop or just print + if(stopit){ + stop(pst(msg,collapse="\n")) + }else{ + cat("ERRORS: \n",pst(msg,collapse="\n"),"\n") + } + } + + # Return invisible(list(Observations=Observations, Forecasts=Forecasts)) } diff --git a/R/forecastmodel.R b/R/forecastmodel.R index 1de0860a1e9aed3662ab4556a1fdd52f47e86635..738060375322cda5b8e34b2945bd6388668f556c 100644 --- a/R/forecastmodel.R +++ b/R/forecastmodel.R @@ -213,6 +213,9 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( #---------------------------------------------------------------- # Function for transforming the input data to the regression data transform_data = function(data){ + # Do a check of the data + self$check(data, checkoutput=FALSE) + # Evaluate for each input the expresssion to generate the model input data L <- lapply(self$inputs, function(input){ # Evaluate the expression (input$expr) @@ -248,53 +251,58 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list( #---------------------------------------------------------------- # Check if the model and data is setup correctly - check = function(data = NA){ - # some checks are done here, maybe more should be added (??also when transforming inputs, if something goes wrong its caught and message is printed) - # - # ################################ - # First check if the output is set correctly - if( is.na(self$output) ){ - stop("Model output is NA, it must be set to the name of a variable in the data.list used.") - } - if( !(self$output %in% names(data)) ){ - stop("Model output '",self$output,"' is not in the data provided: It must be set to the name of a variable in the data.list used.") - } - if( !(is.numeric(data[[self$output]])) ){ - stop("The model output '",self$output,"' is not a numeric. It has to be a vector of numbers.") - } - if( length(data[[self$output]]) != length(data$t) ){ - stop("The length of the model output '",self$output,"' is ",length(data[[self$output]]),", which is not equal to the length of the time vector (t), which is ",length(data$t)) - } + check = function(data = NA, checkoutput = TRUE, checkinputs = TRUE){ + # some checks are done here, this one is called in transform_data() # ################################ - # Check that the kseq is set in the model - if( !is.numeric(self$kseq) ){ - stop("'model$kseq' is not set. Must be an integer (or numeric) vector.") + if(checkoutput){ + # Check if the output is set correctly + if( is.na(self$output) ){ + stop("Model output is NA, it must be set to the name of a variable in the data.list used.") + } + if( !(self$output %in% names(data)) ){ + stop("Model output '",self$output,"' is not in the data provided: It must be set to the name of a variable in the data.list used.") + } + if( !(is.numeric(data[[self$output]])) ){ + stop("The model output '",self$output,"' is not a numeric. It has to be a vector of numbers.") + } + if( length(data[[self$output]]) != length(data$t) ){ + stop("The length of the model output '",self$output,"' is ",length(data[[self$output]]),", which is not equal to the length of the time vector (t), which is ",length(data$t)) + } } # ################################ - # Check all input variables are correctly set data - for(i in 1:length(self$inputs)){ - # Find all the variables in the expression - nms <- all.vars(parse(text=self$inputs[[i]]$expr[[1]])) + if(checkinputs){ + # Check that the kseq is set in the model + if( !is.numeric(self$kseq) ){ + stop("'model$kseq' is not set. Must be an integer (or numeric) vector.") + } + # ################################ + # Check all input variables are correctly set in data + # Find all the variable names used in the expressions + tmp <- lapply(self$inputs, function(input){ + all.vars(parse(text=input$expr[[1]])) + }) + nms <- unique(unlist(tmp)) + + # Do the default test of the data.list, only for the variables used in the expressions + summary.data.list(data, printit=FALSE, nms=nms) + + # Do a bit of extra check + # Are all variables used available in data? + notindata <- nms[!(nms %in% names(data)) & nms != "pi"] + if(length(notindata) > 0){ + stop("Variables ",pst("'",notindata,"'",collapse="','")," are used in input expressions, but are not in data") + } + + # Check each variable for(nm in nms){ - if(class(data[[nm]]) %in% c("data.frame","matrix")){ - # It's a forecast input, hence must have the k columns in kseq - if(!all(self$kseq %in% as.integer(gsub("k","",names(data[[nm]]))))){ - missingk <- which(!self$kseq %in% as.integer(gsub("k","",names(data[[nm]])))) - stop("The input variable '",nm,"' doesn't have all needed horizons.\nIt has ",pst(names(data[[nm]]),collapse=","),"\nIt is missing ",pst("k",self$kseq[missingk],collapse=",")) - } - # Check if the number of observations match - if( nrow(data[[nm]]) != length(data$t) ){ - stop(pst("The input variable '",nm,"' doesn't have the same number of observations as time vector 't' in the data. It has ",nrow(data[[nm]]),", but 't' has ",length(data$t))) - } - }else if(class(data[[nm]]) == "numeric"){ - # Observation input, check the length - if( length(data[[nm]]) != length(data$t) ){ - stop("The input variable '",nm,"' doesn't have the same number of observations as time vector 't' in the data. It has ",length(data[[nm]]),", but 't' has ",length(data$t)) - } - }else{ - if(!nm == "pi"){ - stop("The variable '",nm,"' is missing in data, or it has the wrong class.\nIt must be class: data.frame, matrix or vector.\nIt is needed for the input expression '",self$inputs[[i]]$expr[[1]],"'") - } + # Are the inputs forecast matrices? + if(!inherits(data[[nm]],c("data.frame","matrix"))){ + stop(nm," must be a forecast matrix (in 'data' as a data.frame or matrix with columns named 'kxx', see ?data.list), since it is used as a variable in an input expression") + } + # It's a forecast input, hence must have the k columns in kseq + if(!all(self$kseq %in% as.integer(gsub("k","",names(data[[nm]]))))){ + missingk <- which(!self$kseq %in% as.integer(gsub("k","",names(data[[nm]])))) + stop("The input variable '",nm,"' doesn't have all needed horizons.\nIt has ",pst(names(data[[nm]]),collapse=","),"\nIt is missing ",pst("k",self$kseq[missingk],collapse=",")) } } } diff --git a/R/lm_fit.R b/R/lm_fit.R index 4c3db31f5539b288fb5f21498d159268d89db4ad..26ff23742c4ea14351d343d4688b1a6d5eff46e1 100644 --- a/R/lm_fit.R +++ b/R/lm_fit.R @@ -74,13 +74,13 @@ #' @importFrom stats lm residuals #' @export lm_fit <- function(prm=NA, model, data, scorefun = NA, returnanalysis = TRUE, printout = TRUE){ - # Check that the model is setup correctly, it will stop and print a message if not - model$check(data) - # Function for initializing an lm fit: # - it will change the "model" input (since it an R6 class and thus passed by reference # - If scorefun is given, e.g. rmse() then the value of this is returned + # Check the model output data (input is check in the transform function) + model$check(data, checkinputs=FALSE) + if(printout){ # Should here actually only print the one that were found and changed? message("----------------") @@ -101,7 +101,7 @@ lm_fit <- function(prm=NA, model, data, scorefun = NA, returnanalysis = TRUE, pr # ################################ # Init the inputs states (and some more is reset) model$reset_state() - # Generate the 2nd stage inputs (i.e. the transformed data) + # Generate the 2nd stage inputs (i.e. the transformed data). Input data is checked in the transform function. datatr <- model$transform_data(data) # diff --git a/R/rls_fit.R b/R/rls_fit.R index 794f3271be992c23142c8e2a26032c97b56119b3..d017a0d9eaff18e4c7045861b17c98f4dc7e115f 100644 --- a/R/rls_fit.R +++ b/R/rls_fit.R @@ -118,19 +118,19 @@ rls_fit <- function(prm=NA, model, data, scorefun = NA, returnanalysis = TRUE, runcpp = TRUE, printout = TRUE){ - # Check that the model is setup correctly, it will stop and print a message if not - model$check(data) - # Function for initializing an rls fit: # - it will change the "model" input (since it an R6 class and thus passed by reference # - If scorefun is given, e.g. rmse() then the value of this is returned # + # Check the model output data (input is check in the transform function) + model$check(data, checkinputs=FALSE) + if(printout){ # Should here actually only print the ones that were found and changed? message("----------------") if(is.na(prm[1])){ - message("prm=NA, so current parameters are used.") + message("Argument 'prm' is NA, so parameters in 'model$prm' are used.") }else{ print_to_message(prm) } @@ -145,7 +145,7 @@ rls_fit <- function(prm=NA, model, data, scorefun = NA, returnanalysis = TRUE, # Reset the model state (e.g. inputs state, stored iterative data, ...) model$reset_state() - # Generate the 2nd stage inputs (i.e. the transformed data) + # Generate the 2nd stage inputs (i.e. the transformed data). Input data is checked in the transform function. datatr <- model$transform_data(data) # Initialize the fit for each horizon diff --git a/R/rls_summary.R b/R/rls_summary.R index 15e1610442543b6c21f489bfc61ffe6fdc34af17..1c28e3623bd3a8bfc785b43ab5db43e49eac796a 100644 --- a/R/rls_summary.R +++ b/R/rls_summary.R @@ -70,7 +70,12 @@ rls_summary <- function(object, scoreperiod = NA, scorefun = rmse, printit = TRU fit <- object # if(is.na(scoreperiod[1])){ - scoreperiod <- fit$data$scoreperiod + if(is.null(fit$data$scoreperiod)){ + warning("No scoreperiod set, so using all data for score calculation.") + scoreperiod <- rep(TRUE, length(residuals(fit))) + }else{ + scoreperiod <- fit$data$scoreperiod + } } # scipen <- options(scipen=10)$scipen diff --git a/R/step_optim.R b/R/step_optim.R index 3c84ec3b2bfc017adf59dea9716ee0b69bf0f905..381dbfbc81d52f19e5fe95831ae84188175fe219 100644 --- a/R/step_optim.R +++ b/R/step_optim.R @@ -197,11 +197,12 @@ step_optim <- function(modelfull, data, prm=list(NA), direction = c("both","back # For keeping all the results L <- list() # The first value in direction is default - if(length(direction) > 1){ direction <- direction[1] } + direction <- match.arg(direction) # Init istep <- 1 # Different start up, if a start model is given - if( class(modelstart)[1] == "forecastmodel" ){ + # Note the use of inherits() instead of: class(modelstart)[1] == "forecastmodel". See https://developer.r-project.org/Blog/public/2019/11/09/when-you-think-class.-think-again/ + if( inherits(modelstart, "forecastmodel") ){ # The full model will not be changed from here, so no need to clone it mfull <- modelfull m <- modelstart$clone() @@ -239,12 +240,14 @@ step_optim <- function(modelfull, data, prm=list(NA), direction = c("both","back } } # Find the inputs to keep, if any - if(class(keepinputs) == "logical"){ + if(inherits(keepinputs,"logical")){ if(keepinputs){ keepinputs <- nams(mfull$inputs) }else{ keepinputs <- "" } + }else{ + if(!inherits(keepinputs,"character")){ stop("keepinputs must be a logical or a character with the names of the inputs to keep in the model.") } } # Helper c_mStep <- function(l, nms){ @@ -263,7 +266,7 @@ step_optim <- function(modelfull, data, prm=list(NA), direction = c("both","back # Optimize res <- optimfun(m, data, printout=printout, scorefun=scorefun, ...) # Should we forecast only on the complete cases? - if(class(fitfun) == "function"){ + if(inherits(fitfun,"function")){ # Forecast to get the complete cases mtmp <- m$clone_deep() # If kseqopt is set, then make sure that it is used when fitting here @@ -283,7 +286,7 @@ step_optim <- function(modelfull, data, prm=list(NA), direction = c("both","back } message("Current score: ",format(scoreCurrent,digits=7)) - if(class(fitfun) == "function"){ + if(inherits(fitfun,"function")){ message("Current complete cases: ",sum(casesCurrent)," (Diff in score from optim:",L[[istep]]$optimresult$value-scoreCurrent,")") } # Next step @@ -379,7 +382,7 @@ step_optim <- function(modelfull, data, prm=list(NA), direction = c("both","back names(Lstep) <- names(mStep) # Complete cases considered: Should we forecast and recalculate the score on complete cases from all models? - if(class(fitfun) == "function"){ + if(inherits(fitfun,"function")){ LYhat <- mclapply(1:length(mStep), function(i){ mtmp <- mStep[[i]]$clone_deep() # If kseqopt is set, then make sure that it is used when fitting here @@ -406,7 +409,7 @@ step_optim <- function(modelfull, data, prm=list(NA), direction = c("both","back tmp[ ,1] <- pst(format(100 * (scoreCurrent - tmp) / scoreCurrent, digits=2),"%") nams(tmp) <- "Improvement" } - if(class(fitfun) == "function"){ + if(inherits(fitfun,"function")){ tmp <- cbind(tmp, apply(casesStep != casesCurrent, 2, sum)) nams(tmp)[2] <- "CasesDiff" } diff --git a/cran-comments.md b/cran-comments.md index b5cc3648ff9a2a5ddcbd175eafd60459fcf2b34f..913b2324288362f7541e833ef794edc0683b172e 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,3 +1,10 @@ +#---------------------------------------------------------------- +# v1.0.1 + +We have changed a few minor things: +- Added better checks and error messages for input data +- Small bug fixes + #---------------------------------------------------------------- # v1.0.0 diff --git a/inst/CITATION b/inst/CITATION index ccd75abe09ae9f87213aba168e52054769763aad..df780c262033dbba3ca00f55949ff847b28b5696 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -16,7 +16,7 @@ citEntry( title = "{{onlineforecast}: Forecast Modelling for Online Applications}", author = "Peder Bacher and Hjörleifur G. Bergsteinsson", year = "2021", - note = "R package version 1.0.0", + note = "R package version 1.0.1", url = "https://onlineforecasting.org", textVersion = "" ) diff --git a/make.R b/make.R index 2c38cc63f845bc9f9d060b5d79c84664c551a20d..2a7e3c551de192b1af6a0a280377d6af312e86bb 100644 --- a/make.R +++ b/make.R @@ -14,7 +14,7 @@ ## install.packages("R.rsp") ## install.packages("data.table") ## install.packages("plotly") - +## install.packages("pbs") #---------------------------------------------------------------- # Go @@ -46,12 +46,12 @@ library(roxygen2) #use_test("newtest") # # Run all tests -#document() -#test() +document() +test() # # Run the examples -#load_all(as.package("../onlineforecast")) -#run_examples() +load_all(as.package("../onlineforecast")) +run_examples() # # Run tests in a single file # test_file("tests/testthat/test-rls-heat-load.R") @@ -69,6 +69,7 @@ write.table(txt2, "inst/CITATION", row.names=FALSE, col.names=FALSE, quote=FALSE # ---------------------------------------------------------------- # Build the package document() +# Run the "vignettes/make.R" to build the cache build(".", vignettes=TRUE) # Install it diff --git a/man/aslt.Rd b/man/aslt.Rd index aa86128e6723d279ce67a15b92c992a767d573a9..43bc4c17a1022c5e2b84fbb9226bba267cb3f908 100644 --- a/man/aslt.Rd +++ b/man/aslt.Rd @@ -33,19 +33,6 @@ The argument is converted into POSIXlt with tz="GMT". } \section{Methods}{ -#' @examples - -# Create a POSIXlt with tz="GMT" -aslt("2019-01-01") -class(aslt("2019-01-01")) -aslt("2019-01-01 01:00:05") - -# Convert between time zones -x <- aslt("2019-01-01", tz="CET") -aslt(x,tz="GMT") - -# To seconds and back again -aslt(as.numeric(x, units="sec")) - aslt.character: Simply a wrapper for \code{as.POSIXlt} @@ -60,3 +47,18 @@ aslt(as.numeric(x, units="sec")) - aslt.numeric: Converts from UNIX time in seconds to POSIXlt. } +\examples{ + +# Create a POSIXlt with tz="GMT" +aslt("2019-01-01") +class(aslt("2019-01-01")) +aslt("2019-01-01 01:00:05") + +# Convert between time zones +x <- aslt("2019-01-01", tz="CET") +aslt(x,tz="GMT") + +# To seconds and back again +aslt(as.numeric(x, units="sec")) + +} diff --git a/man/bs.Rd b/man/bs.Rd index 1356ccfede1417905e4bf8a09d3d00165205e543..348a401d9517136072520a447c976d1845b62c2a 100644 --- a/man/bs.Rd +++ b/man/bs.Rd @@ -46,30 +46,32 @@ See the example \url{https://onlineforecasting.org/examples/solar-power-forecast \examples{ # How to make a diurnal curve using splines - # Select first 54 hours from the load data - D <- subset(Dbuilding, 1:76, kseq=1:4) - # Make the hour of the day as a forecast input - D$tday <- make_tday(D$t, kseq=1:4) - D$tday - - # Calculate the base splines for each column in tday - L <- bspline(D$tday) - - # Now L holds a data.frame for each base spline - str(L) - # Hence this will result in four inputs for the regression model - - # Plot (note that the splines period starts at tday=0) - plot(D$t, L$bs1$k1, type="s") - for(i in 2:length(L)){ - lines(D$t, L[[i]]$k1, col=i, type="s") - } +# Select first 54 hours from the load data +D <- subset(Dbuilding, 1:76, kseq=1:4) +# Make the hour of the day as a forecast input +D$tday <- make_tday(D$t, kseq=1:4) +D$tday + +# Calculate the base splines for each column in tday +L <- bspline(D$tday) + +# Now L holds a data.frame for each base spline +str(L) +# Hence this will result in four inputs for the regression model + +# Plot (note that the splines period starts at tday=0) +plot(D$t, L$bs1$k1, type="s") +for(i in 2:length(L)){ + lines(D$t, L[[i]]$k1, col=i, type="s") +} - # In a model formulation it will be: - model <- forecastmodel$new() - model$add_inputs(mutday = "bspline(tday)") - # Such that at the transform stage will give the same as above - model$transform_data(D) +# In a model formulation it will be: +model <- forecastmodel$new() +model$add_inputs(mutday = "bspline(tday)") +# We set the horizons (actually not needed for the transform, only required for data checks) +model$kseq <- 1:4 +# Such that at the transform stage will give the same as above +model$transform_data(D) # Periodic splines are useful for modelling a diurnal harmonical functions L <- bspline(D$tday, bknots=c(0,24), df=4, periodic=TRUE) @@ -80,7 +82,7 @@ L <- pbspline(D$tday, bknots=c(0,24), df=4) # Plot plot(D$t, L$bs1$k1, type="s") for(i in 2:length(L)){ - lines(D$t, L[[i]]$k1, col=i, type="s") + lines(D$t, L[[i]]$k1, col=i, type="s") } } diff --git a/man/check.Rd b/man/check.Rd deleted file mode 100644 index 50f6de5cbd6f72bd3f448fa524346f86ea9d6083..0000000000000000000000000000000000000000 --- a/man/check.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.list.R -\name{check} -\alias{check} -\title{Checking the object for appropriate form.} -\usage{ -check(object) -} -\arguments{ -\item{object}{The object to be checked.} -} -\value{ -The tables generated. - -# Check a data.list (see \code{?\link{check.data.list}}) -check(Dbuilding) -} -\description{ -Checking the object for appropriate form. -} -\details{ -Prints on table form the result of the check. -} diff --git a/man/check.data.list.Rd b/man/check.data.list.Rd deleted file mode 100644 index 765165402045babc53bdf564b2b1cabfd3319f98..0000000000000000000000000000000000000000 --- a/man/check.data.list.Rd +++ /dev/null @@ -1,51 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.list.R -\name{check.data.list} -\alias{check.data.list} -\title{Checking the data.list for appropriate form.} -\usage{ -\method{check}{data.list}(object) -} -\arguments{ -\item{object}{The object to be checked.} -} -\value{ -The tables generated. - -# Check a data.list (see \code{?\link{check.data.list}}) -check(Dbuilding) - -# Vector with observations not same length as t -D <- Dbuilding -D$heatload <- D$heatload[1:10] -check(D) - -# Some NAs in k1 forecast -D <- Dbuilding -D$Ta$k1[1:1500] <- NA -check(D) - -# Wrong column names -names(D$Ta) -} -\description{ -Checking the data.list for appropriate form. -} -\details{ -Prints a check of the time vector t, which must have equidistant time points and no NAs. - -Then the results of checking vectors (observations): - - ok: A 'V' indicates a successful check - - maxNAs: Proportion of NAs - - length: printed if not the same as the 't' vector - - class: the class - -Then the results of checking data.frames and matrices (forecasts): - - ok: a 'V' indicates a successful check - - maxNAs: the proportion of NAs for the horizon (i.e. column) with the highest proportion of NAs - - meanNAs: the proportion of NAs of the entire data.frame - - nrow: printed if not the same as the 't' vector length - - colnames: columns must be names 'kxx', where 'xx' is the horizon - - sameclass: 'X' if not all columns are the same class - - class: prints the class of the columns if they are all the same -} diff --git a/man/data.list.Rd b/man/data.list.Rd index 81840c491b023e2b9e48c02be8fcae6162d67802..a71e1e0854a2695c8f333ec2b1c5b06a7eb9a82d 100644 --- a/man/data.list.Rd +++ b/man/data.list.Rd @@ -19,8 +19,11 @@ Make a data.list of the vectors and data.frames given. See the vignette 'setup-data' on how a data.list must be setup. It's simply a list of class \code{data.list} holding: + - vector \code{t} + - vector(s) of observations + - data.frames (or matrices) of forecast inputs } \examples{ @@ -29,13 +32,13 @@ It's simply a list of class \code{data.list} holding: time <- seq(ct("2019-01-01"),ct("2019-01-02"),by=3600) # Observations time series (as vector) xobs <- rnorm(length(time)) -# Forecast input as data.frame +# Forecast input as a data.frame with columns names 'kxx', where 'xx' is the horizon X <- data.frame(matrix(rnorm(length(time)*3), ncol=3)) names(X) <- pst("k",1:3) D <- data.list(t=time, xobs=xobs, X=X) -# Check it -check(D) +# Check it (see \code{?\link{summary.data.list}}) +summary(D) } diff --git a/man/subset.data.list.Rd b/man/subset.data.list.Rd index 9d7be6629a20a10ca7ffad68641ee7ac41fa0d6a..ebc5b457214b5254dd225d7191496cc313d37446 100644 --- a/man/subset.data.list.Rd +++ b/man/subset.data.list.Rd @@ -74,6 +74,6 @@ X <- subset(Dbuilding, 1:1000, pattern="^Ta", kseq = 10, lagforecasts = TRUE) plot(X$Ta$k10, X$Taobs) # Fit a model for the 10-step horizon -abline(lm(Taobs ~ Ta.k10, X), col=2) +abline(lm(Taobs ~ Ta.k10, as.data.frame(X)), col=2) } diff --git a/man/summary.data.list.Rd b/man/summary.data.list.Rd new file mode 100644 index 0000000000000000000000000000000000000000..c1e48960013a689a154c6b8165c859984f1255dc --- /dev/null +++ b/man/summary.data.list.Rd @@ -0,0 +1,102 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.list.R +\name{summary.data.list} +\alias{summary.data.list} +\title{Summary with checks of the data.list for appropriate form.} +\usage{ +\method{summary}{data.list}( + object, + printit = TRUE, + stopit = TRUE, + nms = names(object), + msgextra = "", + ... +) +} +\arguments{ +\item{object}{The object to be summarized and checked} + +\item{printit}{A boolean deciding if check results tables are printed} + +\item{stopit}{A boolean deciding if the function stop with an error if the check is not ok} + +\item{nms}{A character vector. If given specifies the variables (vectors or matrices) in object to check} + +\item{msgextra}{A character which is added in the printout of an (potential) error message} + +\item{...}{Not used} +} +\value{ +The tables generated. + +Checking the data.list for appropriate form: + +A check of the time vector t, which must have equidistant time points and no NAs. + +Then the results of checks of vectors (observations): + + - NAs: Proportion of NAs + + - length: Same length as the 't' vector? + + - class: The class of the vector + +Then the results of checking data.frames and matrices (forecasts): + + - maxHorizonNAs: The proportion of NAs for the horizon (i.e. column) with the highest proportion of NAs + + - meanNAs: The proportion of NAs of the entire matrix + + - nrow: Same length as the 't' vector? + + - colnames: Columns must be names 'kx', where 'x' is the horizon (e.g. k12 is 12-step horizon) + + - sameclass: Error if not all columns are the same class + + - class: Prints the class of the columns if they are all the same +} +\description{ +Summary including checks of the data.list for appropriate form. +} +\details{ +Prints on table form the result of the checks. +} +\examples{ + +summary(Dbuilding) + +# Some NAs in k1 forecast +D <- Dbuilding +D$Ta$k1[1:1500] <- NA +summary(D) + +# Vector with observations not same length as t throws error +D <- Dbuilding +D$heatload <- D$heatload[1:10] +try(summary(D)) + +# Forecasts wrong count +D <- Dbuilding +D$Ta <- D$Ta[1:10, ] +try(summary(D)) + +# Wrong column names +D <- Dbuilding +names(D$Ta)[4] <- "xk" +names(D$Ta)[2] <- "x2" +try(summary(D)) + +# No column names +D <- Dbuilding +names(D$Ta) <- NULL +try(summary(D)) + +# Don't stop or only print if stopped +onlineforecast:::summary.data.list(D, stopit=FALSE) +try(onlineforecast:::summary.data.list(D, printit=FALSE)) + +# Only check for specified variables +# (e.g. do like this in model functions to check only variables used in model) +onlineforecast:::summary.data.list(D, nms=c("heatload","I")) + +} diff --git a/vignettes/make.R b/vignettes/make.R index e7a457777369b4d6a8ccfc5139b5724a32622313..388cb353a3189840a43f4e2f6671a904aa1c1544 100644 --- a/vignettes/make.R +++ b/vignettes/make.R @@ -33,5 +33,5 @@ unlink(paste0(dirnam,"tmp-model-selection/"), recursive=TRUE) makeit("model-selection", openit=FALSE) # -unlink(paste0(dirnam,"tmp-output/tmp-online-updating/"), recursive=TRUE) +unlink(paste0(dirnam,"tmp-online-updating/"), recursive=TRUE) makeit("online-updating", openit=FALSE) diff --git a/vignettes/setup-and-use-model.Rmd b/vignettes/setup-and-use-model.Rmd index da8044e0981e0283952086089f30a87554d46524..2018be0ea7cb76f41a36505dea286eec0e09cf57 100644 --- a/vignettes/setup-and-use-model.Rmd +++ b/vignettes/setup-and-use-model.Rmd @@ -299,14 +299,14 @@ however we can see the result of the evaluation by: # Evaluate input expressions datatr <- model$transform_data(D) # See what came out -summary(datatr) +summary.default(datatr) # In particular for the mu = "one()" head(datatr$mu) ``` If we wanted to debug we could: ```{r, eval=FALSE} -# Set to debug +# Set the function to debug (uncomment the line) #debug(one) # Run the input transformation now and it will stop in one() datatr <- model$transform_data(D) diff --git a/vignettes/setup-data.Rmd b/vignettes/setup-data.Rmd index 10af6910d92fb3999b05625a1227f885ae0ceb45..51f78fe8e2f7e66d81ceddf1568597ff04be0688 100644 --- a/vignettes/setup-data.Rmd +++ b/vignettes/setup-data.Rmd @@ -140,16 +140,17 @@ names(D) An overview of the content can be generated by: ```{r} -summary(D) +summary.default(D) ``` where it can be seen that `t` is a time vector, `heatload` is a vector, and `Ta` and `I` are data.frames. -A function for carrying out a check of the format of the 'data.list' is: +A function giving a summary, including checks of the format of the 'data.list' is: ```{r} -check(D) +summary(D) ``` -Basically, if there is a `V` in the `ok` column, then the format of this -variable in `D` is correct. See the help with `?check.data.list` to learn what the printed output means. +The 'NA' columns indicate the proportion of NAs. If there is a `ok` in a column, +then the check of the variables format is passed. See the help with +`?summary.data.list` to learn which checks are performed. ### Time @@ -184,7 +185,7 @@ operations can be done with: ``` -A helper function is provided with the `asp` function which can be called using `?`, or `?asp`. See example below: +A helper function is provided with the `ct` function which can be called using `?`, or `?ct`. See example below: ```{r} ## Convert from a time stamp (tz="GMT" per default) @@ -202,13 +203,13 @@ be very helpful. Note the rules for observations: -- In a `data.list` observations must be numeric vectors. +- In a `data.list` observations must be vectors. - The vectors must have the same length as the time `t` vector. -- Observation as vectors can be used directly as model output (if observations - are to used as model inputs, they must be setup in a data.frame as explained - below in Section [Forecasts]). +- Observation as numerical vectors can be used directly as model output (if + observations are to used as model inputs, they must be setup in a data.frame + as explained below in Section [Forecasts]). In the current data, a time series of hourly heat load observations is included: @@ -327,7 +328,7 @@ plotly_ts(D, patterns=c("heatload$","^I"), c("2010-12-15","2010-12-18"), kseq=c( ``` ```{r, warning=FALSE, message=FALSE, echo=FALSE, purl=FALSE, output="hide", eval=FALSE} L <- plotly_ts(D, patterns=c("heatload$","^I"), c("2010-12-15","2010-12-18"), kseq=c(1,8,24,36), plotit=FALSE) -subplot(L, shareX=TRUE, nrows=length(L), titleY = TRUE) +plotly::subplot(L, shareX=TRUE, nrows=length(L), titleY = TRUE) ``` Note that the `patterns` argument is a vector of regular expressions, which determines which variables from `D` to plot.