checking data and more

f15fbbcd · pbac · 45d7e384 · f15fbbcd · f15fbbcd · f15fbbcd
Commit f15fbbcd authored 3 years ago by pbac
--- a/DESCRIPTION
+++ b/DESCRIPTION
 Package: onlineforecast
 Type: Package
 Title: Forecast Modelling for Online Applications
-Version: 1.0.0
+Version: 1.0.1
 Description: A framework for fitting adaptive forecasting models. Provides a way to use forecasts as input to models, e.g. weather forecasts for energy related forecasting. The models can be fitted recursively and can easily be setup for updating parameters when new data arrives. See the included vignettes, the website <https://onlineforecasting.org> and the pre-print paper "onlineforecast: An R package for adaptive and recursive forecasting" <arXiv:2109.12915>.
 License: GPL-3
 Encoding: UTF-8
@@ -25,7 +25,7 @@ Suggests:
    data.table,
    plotly
 VignetteBuilder: knitr
-RoxygenNote: 7.1.1
+RoxygenNote: 7.1.2
 URL: https://onlineforecasting.org
 BugReports: https://lab.compute.dtu.dk/packages/onlineforecast/-/issues
 Config/testthat/edition: 3
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -7,7 +7,6 @@ S3method(aslt,POSIXct)
 S3method(aslt,POSIXlt)
 S3method(aslt,character)
 S3method(aslt,numeric)
-S3method(check,data.list)
 S3method(complete_cases,data.frame)
 S3method(complete_cases,list)
 S3method(ct,POSIXct)
@@ -36,6 +35,7 @@ S3method(residuals,matrix)
 S3method(score,data.frame)
 S3method(score,list)
 S3method(subset,data.list)
+S3method(summary,data.list)
 S3method(summary,rls_fit)
 export("%**%")
 export("nams<-")
@@ -45,7 +45,6 @@ export(aslt)
 export(bspline)
 export(cache_name)
 export(cache_save)
-export(check)
 export(complete_cases)
 export(ct)
 export(data.list)

--- a/R/aslt.R
+++ b/R/aslt.R
@@ -15,7 +15,7 @@
 #' @param ... Arguments to be passed to methods.
 #' @return An object of class POSIXlt
 #' @section Methods:
-#' #' @examples
+#' @examples
 #' 
 #' # Create a POSIXlt with tz="GMT"
 #' aslt("2019-01-01")

--- a/R/bspline.R
+++ b/R/bspline.R
@@ -50,6 +50,8 @@
 #' # In a model formulation it will be:
 #' model <- forecastmodel$new()
 #' model$add_inputs(mutday = "bspline(tday)")
+#' # We set the horizons (actually not needed for the transform, only required for data checks)
+#' model$kseq <- 1:4
 #' # Such that at the transform stage will give the same as above
 #' model$transform_data(D)
 #'
@@ -72,7 +74,7 @@ bspline <- function(X, Boundary.knots = NA, intercept = FALSE, df = NULL, knots
        Boundary.knots <- bknots
    }
    # If a list, then call on each element
-    if (class(X) == "list") {
+    if (inherits(X,"list")) {
        # Call again for each element
        val <- lapply(1:length(X), function(i) {
            bspline(X[[i]], df = df, knots = knots, degree = degree, intercept = intercept, 
@@ -82,7 +84,7 @@ bspline <- function(X, Boundary.knots = NA, intercept = FALSE, df = NULL, knots
        return(val)
    }
    # X must be a data.frame or matrix
-    if(!(class(X) %in% c("data.frame","matrix"))){ stop("X must be a data.frame or matrix") }
+    if(!inherits(X,c("data.frame","matrix"))){ stop("X must be a data.frame or matrix") }
    # First find the horizons, they are used in the end
    nms <- nams(X)
    # All columns must be like "k12"

--- a/R/data.list.R
+++ b/R/data.list.R
@@ -12,8 +12,11 @@
 #' See the vignette 'setup-data' on how a data.list must be setup.
 #' 
 #' It's simply a list of class \code{data.list} holding:
+#' 
 #'   - vector \code{t}
+#' 
 #'   - vector(s) of observations
+#' 
 #'   - data.frames (or matrices) of forecast inputs
 #' 
 #' 
@@ -26,14 +29,14 @@
 #' time <- seq(ct("2019-01-01"),ct("2019-01-02"),by=3600)
 #' # Observations time series (as vector)
 #' xobs <- rnorm(length(time))
-#' # Forecast input as data.frame
+#' # Forecast input as a data.frame with columns names 'kxx', where 'xx' is the horizon
 #' X <- data.frame(matrix(rnorm(length(time)*3), ncol=3))
 #' names(X) <- pst("k",1:3)
 #' 
 #' D <- data.list(t=time, xobs=xobs, X=X)
 #'
-#' # Check it
-#' check(D)
+#' # Check it (see \code{?\link{summary.data.list}})
+#' summary(D)
 #' 
 #' @export
 data.list <- function(...) {
@@ -90,7 +93,7 @@ data.list <- function(...) {
 #' plot(X$Ta$k10, X$Taobs)
 #'
 #' # Fit a model for the 10-step horizon
-#' abline(lm(Taobs ~ Ta.k10, X), col=2)
+#' abline(lm(Taobs ~ Ta.k10, as.data.frame(X)), col=2)
 #'
 #' @export
 subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts = FALSE, pattern = NA, ...) {
@@ -135,20 +138,20 @@ subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts =
            subset <- 1:dim(D[[1]])[1]
        }
    }else if(length(subset) == 2){
-        if(any(class(subset) %in% c("character","POSIXlt","POSIXct","POSIXt"))){
+        if(inherits(subset,c("character","POSIXlt","POSIXct","POSIXt"))){
            # Start and end of a period is given
            subset <- in_range(subset[1], D$t, subset[2])
        }
    }else{
        # Check if a non-meaningful subset is given
-        if(any(class(subset) == "character")){
+        if(inherits(subset,"character")){
            stop("subset cannot be a character, except if it is of length 2 and can be converted in a POSIX, e.g. subset=c('2020-01-01','2020-01-10'. ")
        }
    }
    # Take all horizons k?
    if(is.na(kseq[1])){
        val <- lapply(D[nms], function(X) {
-            if (any(class(X) == "data.frame")) {
+            if (inherits(X,"data.frame")) {
                return(X[subset, , drop=FALSE]) # drop = FALSE needed in case data frame only has 1 column, otherwise this does not return a data frame
            } else {
                return(X[subset])
@@ -158,7 +161,7 @@ subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts =
        # Multiple horizons (hence length(kseq) > 1)
        # Take the specified horizons
        val <- lapply(D[nms], function(X) {
-            if (any(class(X) == "data.frame")) {
+            if (inherits(X,"data.frame")) {
                # Check if holds forecasts by checking if any name is "kxx"
                if(length(grep("k[[:digit:]]+$", names(X))) > 0){
                    return(X[subset,pst("k",kseq), drop=FALSE])
@@ -173,7 +176,7 @@ subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts =
    # Lag the forecasts k if specified
    if(lagforecasts){
        val <- lapply(val, function(X){
-            if(any(class(X) == "data.frame") & length(grep("k[[:digit:]]+$",names(X))) > 0) {
+            if(inherits(X,"data.frame") & length(grep("k[[:digit:]]+$",names(X))) > 0) {
                return(lagdf.data.frame(X, lagseq="+k"))
            }else{
                return(X)
@@ -209,11 +212,11 @@ subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts =
 as.data.frame.data.list <- function(x, row.names=NULL, optional=FALSE, ...){
    # Then convert into a data.frame
    val <- do.call("cbind", x)
-    if(class(val) == "matrix"){
+    if(inherits(val,"matrix")){
        val <- as.data.frame(val)
    }
-    # Fix names of data.frames (i.e. forecasts, their names are now "kxx", but should be X.kxx)
-    i <- grep("k[[:digit:]]+$", names(val))
+    # Fix names of data.frames (i.e. forecasts, if their names are now "kxx", but should be X.kxx)
+    i <- grep("^k[[:digit:]]+$", names(val))
    if(length(i) > 0){
        names(val)[i] <- pst(names(x)[i],".",names(val)[i])
    }
@@ -262,81 +265,105 @@ pairs.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts =
 }


-
-#' Checking the object for appropriate form. 
+#' Summary including checks of the data.list for appropriate form. 
 #'
-#' Prints on table form the result of the check.
+#' Prints on table form the result of the checks.
 #' 
-#' @title Checking the object for appropriate form. 
-#' @param object The object to be checked.
+#' @title Summary with checks of the data.list for appropriate form. 
+#' @param object The object to be summarized and checked
+#' @param printit A boolean deciding if check results tables are printed
+#' @param stopit A boolean deciding if the function stop with an error if the check is not ok
+#' @param nms A character vector. If given specifies the variables (vectors or matrices) in object to check
+#' @param msgextra A character which is added in the printout of an (potential) error message
+#' @param ... Not used
 #' @return The tables generated.
 #'
-#' # Check a data.list (see \code{?\link{check.data.list}})
-#' check(Dbuilding)
+#' Checking the data.list for appropriate form:
 #'
-#' @export
-check <- function(object){
-    UseMethod("check")
-}
-
-#' Checking the data.list for appropriate form. 
+#' A check of the time vector t, which must have equidistant time points and no NAs.
+#'
+#' Then the results of checks of vectors (observations):
+#' 
+#'   - NAs: Proportion of NAs
 #' 
-#' Prints a check of the time vector t, which must have equidistant time points and no NAs.
+#'   - length: Same length as the 't' vector?
 #' 
-#' Then the results of checking vectors (observations):
-#'   - ok: A 'V' indicates a successful check
-#'   - maxNAs: Proportion of NAs
-#'   - length: printed if not the same as the 't' vector
-#'   - class: the class
+#'   - class: The class of the vector
 #' 
 #' Then the results of checking data.frames and matrices (forecasts):
-#'   - ok: a 'V' indicates a successful check
-#'   - maxNAs: the proportion of NAs for the horizon (i.e. column) with the highest proportion of NAs
-#'   - meanNAs: the proportion of NAs of the entire data.frame
-#'   - nrow: printed if not the same as the 't' vector length
-#'   - colnames: columns must be names 'kxx', where 'xx' is the horizon
-#'   - sameclass: 'X' if not all columns are the same class
-#'   - class: prints the class of the columns if they are all the same
-#' 
-#' @title Checking the data.list for appropriate form. 
-#' @param object The object to be checked.
-#' @return The tables generated.
 #' 
-#' # Check a data.list (see \code{?\link{check.data.list}})
-#' check(Dbuilding)
+#'   - maxHorizonNAs: The proportion of NAs for the horizon (i.e. column) with the highest proportion of NAs
 #' 
-#' # Vector with observations not same length as t
-#' D <- Dbuilding
-#' D$heatload <- D$heatload[1:10]
-#' check(D)
+#'   - meanNAs: The proportion of NAs of the entire matrix
+#' 
+#'   - nrow: Same length as the 't' vector?
+#' 
+#'   - colnames: Columns must be names 'kx', where 'x' is the horizon (e.g. k12 is 12-step horizon)
+#' 
+#'   - sameclass: Error if not all columns are the same class
+#' 
+#'   - class: Prints the class of the columns if they are all the same
+#' 
+#' @examples
+#' 
+#' summary(Dbuilding)
 #' 
 #' # Some NAs in k1 forecast
 #' D <- Dbuilding
 #' D$Ta$k1[1:1500] <- NA
-#' check(D)
+#' summary(D)
+#'
+#' # Vector with observations not same length as t throws error
+#' D <- Dbuilding
+#' D$heatload <- D$heatload[1:10]
+#' try(summary(D))
+#' 
+#' # Forecasts wrong count
+#' D <- Dbuilding
+#' D$Ta <- D$Ta[1:10, ]
+#' try(summary(D))
 #' 
 #' # Wrong column names
-#' names(D$Ta)
+#' D <- Dbuilding
+#' names(D$Ta)[4] <- "xk"
+#' names(D$Ta)[2] <- "x2"
+#' try(summary(D))
+#' 
+#' # No column names
+#' D <- Dbuilding
+#' names(D$Ta) <- NULL
+#' try(summary(D))
+#' 
+#' # Don't stop or only print if stopped 
+#' onlineforecast:::summary.data.list(D, stopit=FALSE)
+#' try(onlineforecast:::summary.data.list(D, printit=FALSE))
+#'
+#' # Only check for specified variables
+#' # (e.g. do like this in model functions to check only variables used in model)
+#' onlineforecast:::summary.data.list(D, nms=c("heatload","I"))
 #' 
 #' @export
-check.data.list <- function(object){
-    # Check if how the data.list is setup and report potential issues
+summary.data.list <- function(object, printit=TRUE, stopit=TRUE, nms=names(object), msgextra="", ...){
    D <- object
-    if(!"t" %in% names(D)){ stop("'t' is missing in the data.list: It must be a vector of equidistant time points (can be an integer, but preferably POSIXct class with tz 'GMT' or 'UTC'.)") }

-    if(length(unique(diff(D$t))) != 1){ stop("'t' is not equidistant and have no NA values")}
-    message("\nTime t is fine: Length ",length(D$t),"\n")
+    # The final message
+    msg <- NULL
+
+    # Check the time vector
+    if(!"t" %in% names(D)){ msg <- c(msg,"'t' is missing in the data.list: It must be a vector of equidistant time points (can be an integer, but preferably POSIXct class with tz 'GMT' or 'UTC'.)")}
+    if(length(D$t) > 1){
+        if(length(unique(diff(D$t))) != 1){ msg <- c(msg,"'t' is not equidistant or have NA values.") }
+    }
+
+    # Which elements are data.frame or matrix?
+    isMatrix <- sapply(D, function(x){ inherits(x,c("matrix","data.frame")) })
    
-    # Which is data.frame or matrix?
-    dfOrMat <- sapply(D, function(x){ (class(x) %in% c("matrix","data.frame"))[1] })
    # Vectors check
-    vecseq <- which(!dfOrMat & names(dfOrMat) != "t")
+    vecseq <- which(!isMatrix  &  names(isMatrix) != "t"  & names(isMatrix) %in% nms)
    Observations <- NA
    if(length(vecseq) > 0){
-        cat("Observation vectors:\n")
-        vecchecks <- c("ok","NAs","length","class")
-        Observations <- data.frame(matrix("", nrow=length(vecseq), ncol=length(vecchecks), dimnames=list(names(vecseq),vecchecks)), stringsAsFactors=FALSE)
-        Observations$ok <- "V"
+        vecchecks <- c("NAs","length","class")
+        Observations <- data.frame(matrix("ok", nrow=length(vecseq), ncol=length(vecchecks), dimnames=list(pst("$",names(vecseq)),vecchecks)), stringsAsFactors=FALSE)
        #
        for(i in 1:length(vecseq)){
            #
@@ -346,58 +373,83 @@ check.data.list <- function(object){
            Observations$NAs[i] <- pst(NAs,"%")
            # Check the length
            if(length(D[[nm]]) != length(D$t)){
-                Observations$length[i] <- length(D[[nm]])
+                Observations$length[i] <- "ERROR"
+                msg <- c(msg,pst(rownames(Observations)[i]," (length ",length(D[[nm]]),"), not same length as t (length ",length(D$t),")"))
            }
            # Its class
            Observations$class[i] <- class(D[[nm]])
-            # Not ok?
-            if(sum(Observations[i, 3] == "") < 1){
-                Observations$ok[i] <- ""
-            }
        }
-        print(Observations)
    }
-    #
-    # For forecasts
-    dfseq <- which(dfOrMat)
+
+    # Forecasts check
+    dfseq <- which(isMatrix  &  names(isMatrix) %in% nms)
    Forecasts <- NA
    if(length(dfseq) > 0){
-        cat("\nForecast data.frames or matrices:\n")
-        dfchecks <- c("ok","maxNAs","meanNAs","nrow","colnames","sameclass","class")
-        Forecasts <- data.frame(matrix("", nrow=length(dfseq), ncol=length(dfchecks), dimnames=list(names(dfseq),dfchecks)), stringsAsFactors=FALSE)
-        Forecasts$ok <- "V"
+        dfchecks <- c("maxHorizonNAs","NAs","nrow","colnames","sameclass","class")
+        Forecasts <- data.frame(matrix("ok", nrow=length(dfseq), ncol=length(dfchecks), dimnames=list(pst("$",names(dfseq)),dfchecks)), stringsAsFactors=FALSE)
        #
        for(i in 1:length(dfseq)){
            #
            nm <- names(dfseq)[i]
            colnms <- nams(D[[nm]])
+            if(is.null(colnms)){
+                msg <- c(msg, pst("'",nm,"' has no column names! Columns in forecast matrices must be named 'kx', where x is the horizon (e.g. 'k12' is the column with the 12 step forecast)"))
+                Forecasts[i, ] <- rep(NA,ncol(Forecasts))
+            }else{
                # max NAs
-            maxNAs <- round(max(sapply(colnms, function(colnm){ 100*sum(is.na(D[[nm]][ ,colnm])) / nrow(D[[nm]]) })))
-            Forecasts$maxNAs[i] <- pst(maxNAs,"%")
+                tmp <- round(max(sapply(colnms, function(colnm){ 100*sum(is.na(D[[nm]][ ,colnm])) / nrow(D[[nm]]) })))
+                Forecasts$maxHorizonNAs[i] <- pst(tmp,"%")
                # Mean NAs
-            meanNAs <- round(mean(sapply(colnms, function(colnm){ 100*sum(is.na(D[[nm]][ ,colnm])) / nrow(D[[nm]]) })))
-            Forecasts$meanNAs[i] <- pst(meanNAs,"%")
+                tmp <- round(mean(sapply(colnms, function(colnm){ 100*sum(is.na(D[[nm]][ ,colnm])) / nrow(D[[nm]]) })))
+                Forecasts$NAs[i] <- pst(tmp,"%")
                # Check the number of rows
                if(nrow(D[[nm]]) != length(D$t)){
-                Forecasts$nrow[i] <- nrow(D[[nm]])
+                    Forecasts$nrow[i] <- "ERROR"
+                    msg <- c(msg, pst(nm," has ",nrow(D[[nm]])," rows, must be equal to length of t (n=",length(D$t),")"))
                }
                # Check the colnames, are they unique and all k+integer?
-            if(!length(unique(grep("k[[:digit:]]+$",colnms,value=TRUE))) == length(colnms)){
-                Forecasts$colnames[i] <- "X"
+                tmp <- unique(grep("k[[:digit:]]+$",colnms,value=TRUE))
+                if(!length(tmp) == length(colnms)){
+                    Forecasts$colnames[i] <- "ERROR"
+                    msg <- c(msg, pst(nm," has columns named: '",pst(colnms[!(colnms %in% tmp)],collapse="','"),"'. Columns in forecast matrices must be named 'kx', where x is the horizon (e.g. 'k12' is the column with the 12 step forecast)"))
                }
                if(!length(unique(sapply(colnms, function(colnm){ class(D[[nm]][ ,colnm]) }))) == 1){
-                Forecasts$sameclass[i] <- "X"
+                    Forecasts$sameclass[i] <- "ERROR"
+                    msg <- c(msg, pst(nm," doesn't have same class for all columns"))
                }else{
                    Forecasts$class[i] <- class(D[[nm]][ ,1])
                }
-            # Not ok?
-            if(sum(Forecasts[i, ] == "") < (length(dfchecks)-4)){
-                Forecasts$ok[i] <- ""
            }
        }
+    }
+
+    # Print the results
+    if(printit){
+        cat("\nLength of time vector 't': ",length(D$t),"\n\n", sep="")
+        if(length(vecseq) > 0){
+        #    cat("\n- Observation vectors:\n")
+            print(Observations)
+        }
+        if(length(dfseq) > 0){
+            #   cat("\n- Forecast data.frames or matrices:\n")
+            cat("\n")
            print(Forecasts)
        }
+    }
+
+    # Error message to print?
+    if(length(msg) > 0){
+        cat("\n")
+        msg <- c(msg,"\nSee '?summary.data.list' for more information")
+        # Stop or just print
+        if(stopit){
+            stop(pst(msg,collapse="\n"))
+        }else{
+            cat("ERRORS: \n",pst(msg,collapse="\n"),"\n")
+        }
+    }

+    # Return
    invisible(list(Observations=Observations, Forecasts=Forecasts))
 }


--- a/R/forecastmodel.R
+++ b/R/forecastmodel.R
@@ -213,6 +213,9 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list(
    #----------------------------------------------------------------
    # Function for transforming the input data to the regression data
    transform_data = function(data){
+        # Do a check of the data
+        self$check(data, checkoutput=FALSE)
+        
        # Evaluate for each input the expresssion to generate the model input data
        L <- lapply(self$inputs, function(input){
            # Evaluate the expression (input$expr)
@@ -248,11 +251,11 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list(

    #----------------------------------------------------------------
    # Check if the model and data is setup correctly
-    check = function(data = NA){
-        # some checks are done here, maybe more should be added (??also when transforming inputs, if something goes wrong its caught and message is printed)
-        #
+    check = function(data = NA, checkoutput = TRUE, checkinputs = TRUE){
+        # some checks are done here, this one is called in transform_data()
        # ################################
-        # First check if the output is set correctly
+        if(checkoutput){
+            # Check if the output is set correctly
            if( is.na(self$output) ){
                stop("Model output is NA, it must be set to the name of a variable in the data.list used.")
            }
@@ -265,37 +268,42 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list(
            if( length(data[[self$output]]) != length(data$t) ){
                stop("The length of the model output '",self$output,"' is ",length(data[[self$output]]),", which is not equal to the length of the time vector (t), which is ",length(data$t))
            }
+        }
        # ################################
+        if(checkinputs){
            # Check that the kseq is set in the model
            if( !is.numeric(self$kseq) ){
                stop("'model$kseq' is not set. Must be an integer (or numeric) vector.")
            }
            # ################################
-        # Check all input variables are correctly set data
-        for(i in 1:length(self$inputs)){
-            # Find all the variables in the expression
-            nms <- all.vars(parse(text=self$inputs[[i]]$expr[[1]]))
+            # Check all input variables are correctly set in data
+            # Find all the variable names used in the expressions
+            tmp <- lapply(self$inputs, function(input){
+                all.vars(parse(text=input$expr[[1]]))
+            })
+            nms <- unique(unlist(tmp))
+            
+            # Do the default test of the data.list, only for the variables used in the expressions
+            summary.data.list(data, printit=FALSE, nms=nms)
+            
+            # Do a bit of extra check
+            # Are all variables used available in data?
+            notindata <- nms[!(nms %in% names(data)) & nms != "pi"]
+            if(length(notindata) > 0){
+                stop("Variables ",pst("'",notindata,"'",collapse="','")," are used in input expressions, but are not in data")
+            }
+            
+            # Check each variable
            for(nm in nms){
-                if(class(data[[nm]]) %in% c("data.frame","matrix")){
+                # Are the inputs forecast matrices?
+                if(!inherits(data[[nm]],c("data.frame","matrix"))){
+                    stop(nm," must be a forecast matrix (in 'data' as a data.frame or matrix with columns named 'kxx', see ?data.list), since it is used as a variable in an input expression")
+                }
                # It's a forecast input, hence must have the k columns in kseq
                if(!all(self$kseq %in% as.integer(gsub("k","",names(data[[nm]]))))){
                    missingk <- which(!self$kseq %in% as.integer(gsub("k","",names(data[[nm]]))))
                    stop("The input variable '",nm,"' doesn't have all needed horizons.\nIt has ",pst(names(data[[nm]]),collapse=","),"\nIt is missing ",pst("k",self$kseq[missingk],collapse=","))
                }
-                    # Check if the number of observations match
-                    if( nrow(data[[nm]]) != length(data$t) ){
-                        stop(pst("The input variable '",nm,"' doesn't have the same number of observations as time vector 't' in the data. It has ",nrow(data[[nm]]),", but 't' has ",length(data$t)))
-                    }
-                }else if(class(data[[nm]]) == "numeric"){
-                    # Observation input, check the length
-                    if( length(data[[nm]]) != length(data$t) ){
-                        stop("The input variable '",nm,"' doesn't have the same number of observations as time vector 't' in the data. It has ",length(data[[nm]]),", but 't' has ",length(data$t))
-                    }
-                }else{
-                    if(!nm == "pi"){
-                        stop("The variable '",nm,"' is missing in data, or it has the wrong class.\nIt must be class: data.frame, matrix or vector.\nIt is needed for the input expression '",self$inputs[[i]]$expr[[1]],"'")
-                    }
-                }
            }
        }
    },

--- a/R/lm_fit.R
+++ b/R/lm_fit.R
@@ -74,13 +74,13 @@
 #' @importFrom stats lm residuals
 #' @export
 lm_fit <- function(prm=NA, model, data, scorefun = NA, returnanalysis = TRUE, printout = TRUE){
-    # Check that the model is setup correctly, it will stop and print a message if not
-    model$check(data)
-    
    # Function for initializing an lm fit:
    # - it will change the "model" input (since it an R6 class and thus passed by reference
    # - If scorefun is given, e.g. rmse() then the value of this is returned

+    # Check the model output data (input is check in the transform function)
+    model$check(data, checkinputs=FALSE)
+
    if(printout){
        # Should here actually only print the one that were found and changed?
        message("----------------")
@@ -101,7 +101,7 @@ lm_fit <- function(prm=NA, model, data, scorefun = NA, returnanalysis = TRUE, pr
    # ################################ 
    # Init the inputs states (and some more is reset)
    model$reset_state()
-    # Generate the 2nd stage inputs (i.e. the transformed data)
+    # Generate the 2nd stage inputs (i.e. the transformed data). Input data is checked in the transform function.
    datatr <- model$transform_data(data)

    #

--- a/R/rls_fit.R
+++ b/R/rls_fit.R
@@ -118,19 +118,19 @@

 rls_fit <- function(prm=NA, model, data, scorefun = NA, returnanalysis = TRUE,
                    runcpp = TRUE, printout = TRUE){
-    # Check that the model is setup correctly, it will stop and print a message if not
-    model$check(data)
-    
    # Function for initializing an rls fit:
    # - it will change the "model" input (since it an R6 class and thus passed by reference
    # - If scorefun is given, e.g. rmse() then the value of this is returned
    #

+    # Check the model output data (input is check in the transform function)
+    model$check(data, checkinputs=FALSE)
+    
    if(printout){
        # Should here actually only print the ones that were found and changed?
        message("----------------")
        if(is.na(prm[1])){
-            message("prm=NA, so current parameters are used.")
+            message("Argument 'prm' is NA, so parameters in 'model$prm' are used.")
        }else{
            print_to_message(prm)
        }
@@ -145,7 +145,7 @@ rls_fit <- function(prm=NA, model, data, scorefun = NA, returnanalysis = TRUE,

    # Reset the model state (e.g. inputs state, stored iterative data, ...)
    model$reset_state()
-    # Generate the 2nd stage inputs (i.e. the transformed data)
+    # Generate the 2nd stage inputs (i.e. the transformed data). Input data is checked in the transform function.
    datatr <- model$transform_data(data)

    # Initialize the fit for each horizon

--- a/R/rls_summary.R
+++ b/R/rls_summary.R
@@ -70,8 +70,13 @@ rls_summary <- function(object, scoreperiod = NA, scorefun = rmse, printit = TRU
    fit <- object
    #
    if(is.na(scoreperiod[1])){
+        if(is.null(fit$data$scoreperiod)){
+            warning("No scoreperiod set, so using all data for score calculation.")
+            scoreperiod <- rep(TRUE, length(residuals(fit)))
+        }else{
            scoreperiod <- fit$data$scoreperiod
        }
+    }
    #
    scipen <- options(scipen=10)$scipen
    # Calculate the score for each horizon

--- a/R/step_optim.R
+++ b/R/step_optim.R
@@ -197,11 +197,12 @@ step_optim <- function(modelfull, data, prm=list(NA), direction = c("both","back
    # For keeping all the results
    L <- list()
    # The first value in direction is default
-    if(length(direction) > 1){ direction <- direction[1] }
+    direction <- match.arg(direction)
    # Init
    istep <- 1
    # Different start up, if a start model is given
-    if( class(modelstart)[1] == "forecastmodel" ){
+    # Note the use of inherits() instead of: class(modelstart)[1] == "forecastmodel". See https://developer.r-project.org/Blog/public/2019/11/09/when-you-think-class.-think-again/
+    if( inherits(modelstart, "forecastmodel") ){
        # The full model will not be changed from here, so no need to clone it
        mfull <- modelfull
        m <- modelstart$clone()
@@ -239,12 +240,14 @@ step_optim <- function(modelfull, data, prm=list(NA), direction = c("both","back
        }
    }
    # Find the inputs to keep, if any
-    if(class(keepinputs) == "logical"){
+    if(inherits(keepinputs,"logical")){
        if(keepinputs){
            keepinputs <- nams(mfull$inputs)
        }else{
            keepinputs <- ""
        }
+    }else{
+        if(!inherits(keepinputs,"character")){ stop("keepinputs must be a logical or a character with the names of the inputs to keep in the model.") }
    }
    # Helper
    c_mStep <- function(l, nms){
@@ -263,7 +266,7 @@ step_optim <- function(modelfull, data, prm=list(NA), direction = c("both","back
            # Optimize
            res <- optimfun(m, data, printout=printout, scorefun=scorefun, ...)
            # Should we forecast only on the complete cases?
-            if(class(fitfun) == "function"){
+            if(inherits(fitfun,"function")){
                # Forecast to get the complete cases
                mtmp <- m$clone_deep()
                # If kseqopt is set, then make sure that it is used when fitting here
@@ -283,7 +286,7 @@ step_optim <- function(modelfull, data, prm=list(NA), direction = c("both","back
        }
        
        message("Current score: ",format(scoreCurrent,digits=7))
-        if(class(fitfun) == "function"){
+        if(inherits(fitfun,"function")){
            message("Current complete cases: ",sum(casesCurrent)," (Diff in score from optim:",L[[istep]]$optimresult$value-scoreCurrent,")")
        }
        # Next step
@@ -379,7 +382,7 @@ step_optim <- function(modelfull, data, prm=list(NA), direction = c("both","back
            names(Lstep) <- names(mStep)

            # Complete cases considered: Should we forecast and recalculate the score on complete cases from all models?
-            if(class(fitfun) == "function"){
+            if(inherits(fitfun,"function")){
                LYhat <- mclapply(1:length(mStep), function(i){
                    mtmp <- mStep[[i]]$clone_deep()
                    # If kseqopt is set, then make sure that it is used when fitting here
@@ -406,7 +409,7 @@ step_optim <- function(modelfull, data, prm=list(NA), direction = c("both","back
                tmp[ ,1] <- pst(format(100 * (scoreCurrent - tmp) / scoreCurrent, digits=2),"%")
                nams(tmp) <- "Improvement"
            }
-            if(class(fitfun) == "function"){
+            if(inherits(fitfun,"function")){
                tmp <- cbind(tmp, apply(casesStep != casesCurrent, 2, sum))
                nams(tmp)[2] <- "CasesDiff"
            }

--- a/cran-comments.md
+++ b/cran-comments.md
+#----------------------------------------------------------------
+# v1.0.1
+
+We have changed a few minor things:
+- Added better checks and error messages for input data
+- Small bug fixes 
+
 #----------------------------------------------------------------
 # v1.0.0


--- a/inst/CITATION
+++ b/inst/CITATION
@@ -16,7 +16,7 @@ citEntry(
  title    = "{{onlineforecast}: Forecast Modelling for Online Applications}",
  author   = "Peder Bacher and Hjörleifur G. Bergsteinsson",
  year     = "2021",
-  note     = "R package version 1.0.0",
+  note     = "R package version 1.0.1",
  url      = "https://onlineforecasting.org",
  textVersion = ""
 )
--- a/make.R
+++ b/make.R
@@ -14,7 +14,7 @@
 ## install.packages("R.rsp")
 ## install.packages("data.table")
 ## install.packages("plotly")
-
+## install.packages("pbs")

 #----------------------------------------------------------------
 # Go
@@ -46,12 +46,12 @@ library(roxygen2)
 #use_test("newtest")

 # # Run all tests
-#document()
-#test()
+document()
+test()

 # # Run the examples
-#load_all(as.package("../onlineforecast"))
-#run_examples()
+load_all(as.package("../onlineforecast"))
+run_examples()

 # # Run tests in a single file
 # test_file("tests/testthat/test-rls-heat-load.R")
@@ -69,6 +69,7 @@ write.table(txt2, "inst/CITATION", row.names=FALSE, col.names=FALSE, quote=FALSE
 # ----------------------------------------------------------------
 # Build the package
 document()
+# Run the "vignettes/make.R" to build the cache
 build(".", vignettes=TRUE)

 # Install it

--- a/man/aslt.Rd
+++ b/man/aslt.Rd
@@ -33,19 +33,6 @@ The argument is converted into POSIXlt with tz="GMT".
 }
 \section{Methods}{

-#' @examples
-
-# Create a POSIXlt with tz="GMT"
-aslt("2019-01-01")
-class(aslt("2019-01-01"))
-aslt("2019-01-01 01:00:05")
-
-# Convert between time zones
-x <- aslt("2019-01-01", tz="CET")
-aslt(x,tz="GMT")
-
-# To seconds and back again
-aslt(as.numeric(x, units="sec"))


    - aslt.character: Simply a wrapper for \code{as.POSIXlt}
@@ -60,3 +47,18 @@ aslt(as.numeric(x, units="sec"))
    - aslt.numeric: Converts from UNIX time in seconds to POSIXlt.
 }

+\examples{
+
+# Create a POSIXlt with tz="GMT"
+aslt("2019-01-01")
+class(aslt("2019-01-01"))
+aslt("2019-01-01 01:00:05")
+
+# Convert between time zones
+x <- aslt("2019-01-01", tz="CET")
+aslt(x,tz="GMT")
+
+# To seconds and back again
+aslt(as.numeric(x, units="sec"))
+
+}
--- a/man/bs.Rd
+++ b/man/bs.Rd
@@ -68,6 +68,8 @@ See the example \url{https://onlineforecasting.org/examples/solar-power-forecast
 # In a model formulation it will be:
 model <- forecastmodel$new()
 model$add_inputs(mutday = "bspline(tday)")
+# We set the horizons (actually not needed for the transform, only required for data checks)
+model$kseq <- 1:4
 # Such that at the transform stage will give the same as above
 model$transform_data(D)


--- a/man/check.Rd
+++ b/man/check.Rd
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.list.R
-\name{check}
-\alias{check}
-\title{Checking the object for appropriate form.}
-\usage{
-check(object)
-}
-\arguments{
-\item{object}{The object to be checked.}
-}
-\value{
-The tables generated.
-
-# Check a data.list (see \code{?\link{check.data.list}})
-check(Dbuilding)
-}
-\description{
-Checking the object for appropriate form.
-}
-\details{
-Prints on table form the result of the check.
-}
--- a/man/check.data.list.Rd
+++ b/man/check.data.list.Rd
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.list.R
-\name{check.data.list}
-\alias{check.data.list}
-\title{Checking the data.list for appropriate form.}
-\usage{
-\method{check}{data.list}(object)
-}
-\arguments{
-\item{object}{The object to be checked.}
-}
-\value{
-The tables generated.
-
-# Check a data.list (see \code{?\link{check.data.list}})
-check(Dbuilding)
-
-# Vector with observations not same length as t
-D <- Dbuilding
-D$heatload <- D$heatload[1:10]
-check(D)
-
-# Some NAs in k1 forecast
-D <- Dbuilding
-D$Ta$k1[1:1500] <- NA
-check(D)
-
-# Wrong column names
-names(D$Ta)
-}
-\description{
-Checking the data.list for appropriate form.
-}
-\details{
-Prints a check of the time vector t, which must have equidistant time points and no NAs.
-
-Then the results of checking vectors (observations):
-  - ok: A 'V' indicates a successful check
-  - maxNAs: Proportion of NAs
-  - length: printed if not the same as the 't' vector
-  - class: the class
-
-Then the results of checking data.frames and matrices (forecasts):
-  - ok: a 'V' indicates a successful check
-  - maxNAs: the proportion of NAs for the horizon (i.e. column) with the highest proportion of NAs
-  - meanNAs: the proportion of NAs of the entire data.frame
-  - nrow: printed if not the same as the 't' vector length
-  - colnames: columns must be names 'kxx', where 'xx' is the horizon
-  - sameclass: 'X' if not all columns are the same class
-  - class: prints the class of the columns if they are all the same
-}
--- a/man/data.list.Rd
+++ b/man/data.list.Rd
@@ -19,8 +19,11 @@ Make a data.list of the vectors and data.frames given.
 See the vignette 'setup-data' on how a data.list must be setup.

 It's simply a list of class \code{data.list} holding:
+
  - vector \code{t}
+
  - vector(s) of observations
+
  - data.frames (or matrices) of forecast inputs
 }
 \examples{
@@ -29,13 +32,13 @@ It's simply a list of class \code{data.list} holding:
 time <- seq(ct("2019-01-01"),ct("2019-01-02"),by=3600)
 # Observations time series (as vector)
 xobs <- rnorm(length(time))
-# Forecast input as data.frame
+# Forecast input as a data.frame with columns names 'kxx', where 'xx' is the horizon
 X <- data.frame(matrix(rnorm(length(time)*3), ncol=3))
 names(X) <- pst("k",1:3)

 D <- data.list(t=time, xobs=xobs, X=X)

-# Check it
-check(D)
+# Check it (see \code{?\link{summary.data.list}})
+summary(D)

 }
--- a/man/subset.data.list.Rd
+++ b/man/subset.data.list.Rd
@@ -74,6 +74,6 @@ X <- subset(Dbuilding, 1:1000, pattern="^Ta", kseq = 10, lagforecasts = TRUE)
 plot(X$Ta$k10, X$Taobs)

 # Fit a model for the 10-step horizon
-abline(lm(Taobs ~ Ta.k10, X), col=2)
+abline(lm(Taobs ~ Ta.k10, as.data.frame(X)), col=2)

 }
--- a/man/summary.data.list.Rd
+++ b/man/summary.data.list.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.list.R
+\name{summary.data.list}
+\alias{summary.data.list}
+\title{Summary with checks of the data.list for appropriate form.}
+\usage{
+\method{summary}{data.list}(
+  object,
+  printit = TRUE,
+  stopit = TRUE,
+  nms = names(object),
+  msgextra = "",
+  ...
+)
+}
+\arguments{
+\item{object}{The object to be summarized and checked}
+
+\item{printit}{A boolean deciding if check results tables are printed}
+
+\item{stopit}{A boolean deciding if the function stop with an error if the check is not ok}
+
+\item{nms}{A character vector. If given specifies the variables (vectors or matrices) in object to check}
+
+\item{msgextra}{A character which is added in the printout of an (potential) error message}
+
+\item{...}{Not used}
+}
+\value{
+The tables generated.
+
+Checking the data.list for appropriate form:
+
+A check of the time vector t, which must have equidistant time points and no NAs.
+
+Then the results of checks of vectors (observations):
+
+  - NAs: Proportion of NAs
+
+  - length: Same length as the 't' vector?
+
+  - class: The class of the vector
+
+Then the results of checking data.frames and matrices (forecasts):
+
+  - maxHorizonNAs: The proportion of NAs for the horizon (i.e. column) with the highest proportion of NAs
+
+  - meanNAs: The proportion of NAs of the entire matrix
+
+  - nrow: Same length as the 't' vector?
+
+  - colnames: Columns must be names 'kx', where 'x' is the horizon (e.g. k12 is 12-step horizon)
+
+  - sameclass: Error if not all columns are the same class
+
+  - class: Prints the class of the columns if they are all the same
+}
+\description{
+Summary including checks of the data.list for appropriate form.
+}
+\details{
+Prints on table form the result of the checks.
+}
+\examples{
+
+summary(Dbuilding)
+
+# Some NAs in k1 forecast
+D <- Dbuilding
+D$Ta$k1[1:1500] <- NA
+summary(D)
+
+# Vector with observations not same length as t throws error
+D <- Dbuilding
+D$heatload <- D$heatload[1:10]
+try(summary(D))
+
+# Forecasts wrong count
+D <- Dbuilding
+D$Ta <- D$Ta[1:10, ]
+try(summary(D))
+
+# Wrong column names
+D <- Dbuilding
+names(D$Ta)[4] <- "xk"
+names(D$Ta)[2] <- "x2"
+try(summary(D))
+
+# No column names
+D <- Dbuilding
+names(D$Ta) <- NULL
+try(summary(D))
+
+# Don't stop or only print if stopped 
+onlineforecast:::summary.data.list(D, stopit=FALSE)
+try(onlineforecast:::summary.data.list(D, printit=FALSE))
+
+# Only check for specified variables
+# (e.g. do like this in model functions to check only variables used in model)
+onlineforecast:::summary.data.list(D, nms=c("heatload","I"))
+
+}