Skip to content
Snippets Groups Projects
Commit f15fbbcd authored by pbac's avatar pbac
Browse files

checking data and more

parent 45d7e384
Branches
Tags
No related merge requests found
Package: onlineforecast
Type: Package
Title: Forecast Modelling for Online Applications
Version: 1.0.0
Version: 1.0.1
Description: A framework for fitting adaptive forecasting models. Provides a way to use forecasts as input to models, e.g. weather forecasts for energy related forecasting. The models can be fitted recursively and can easily be setup for updating parameters when new data arrives. See the included vignettes, the website <https://onlineforecasting.org> and the pre-print paper "onlineforecast: An R package for adaptive and recursive forecasting" <arXiv:2109.12915>.
License: GPL-3
Encoding: UTF-8
......@@ -25,7 +25,7 @@ Suggests:
data.table,
plotly
VignetteBuilder: knitr
RoxygenNote: 7.1.1
RoxygenNote: 7.1.2
URL: https://onlineforecasting.org
BugReports: https://lab.compute.dtu.dk/packages/onlineforecast/-/issues
Config/testthat/edition: 3
......@@ -7,7 +7,6 @@ S3method(aslt,POSIXct)
S3method(aslt,POSIXlt)
S3method(aslt,character)
S3method(aslt,numeric)
S3method(check,data.list)
S3method(complete_cases,data.frame)
S3method(complete_cases,list)
S3method(ct,POSIXct)
......@@ -36,6 +35,7 @@ S3method(residuals,matrix)
S3method(score,data.frame)
S3method(score,list)
S3method(subset,data.list)
S3method(summary,data.list)
S3method(summary,rls_fit)
export("%**%")
export("nams<-")
......@@ -45,7 +45,6 @@ export(aslt)
export(bspline)
export(cache_name)
export(cache_save)
export(check)
export(complete_cases)
export(ct)
export(data.list)
......
......@@ -15,7 +15,7 @@
#' @param ... Arguments to be passed to methods.
#' @return An object of class POSIXlt
#' @section Methods:
#' #' @examples
#' @examples
#'
#' # Create a POSIXlt with tz="GMT"
#' aslt("2019-01-01")
......
......@@ -50,6 +50,8 @@
#' # In a model formulation it will be:
#' model <- forecastmodel$new()
#' model$add_inputs(mutday = "bspline(tday)")
#' # We set the horizons (actually not needed for the transform, only required for data checks)
#' model$kseq <- 1:4
#' # Such that at the transform stage will give the same as above
#' model$transform_data(D)
#'
......@@ -72,7 +74,7 @@ bspline <- function(X, Boundary.knots = NA, intercept = FALSE, df = NULL, knots
Boundary.knots <- bknots
}
# If a list, then call on each element
if (class(X) == "list") {
if (inherits(X,"list")) {
# Call again for each element
val <- lapply(1:length(X), function(i) {
bspline(X[[i]], df = df, knots = knots, degree = degree, intercept = intercept,
......@@ -82,7 +84,7 @@ bspline <- function(X, Boundary.knots = NA, intercept = FALSE, df = NULL, knots
return(val)
}
# X must be a data.frame or matrix
if(!(class(X) %in% c("data.frame","matrix"))){ stop("X must be a data.frame or matrix") }
if(!inherits(X,c("data.frame","matrix"))){ stop("X must be a data.frame or matrix") }
# First find the horizons, they are used in the end
nms <- nams(X)
# All columns must be like "k12"
......
......@@ -12,8 +12,11 @@
#' See the vignette 'setup-data' on how a data.list must be setup.
#'
#' It's simply a list of class \code{data.list} holding:
#'
#' - vector \code{t}
#'
#' - vector(s) of observations
#'
#' - data.frames (or matrices) of forecast inputs
#'
#'
......@@ -26,14 +29,14 @@
#' time <- seq(ct("2019-01-01"),ct("2019-01-02"),by=3600)
#' # Observations time series (as vector)
#' xobs <- rnorm(length(time))
#' # Forecast input as data.frame
#' # Forecast input as a data.frame with columns names 'kxx', where 'xx' is the horizon
#' X <- data.frame(matrix(rnorm(length(time)*3), ncol=3))
#' names(X) <- pst("k",1:3)
#'
#' D <- data.list(t=time, xobs=xobs, X=X)
#'
#' # Check it
#' check(D)
#' # Check it (see \code{?\link{summary.data.list}})
#' summary(D)
#'
#' @export
data.list <- function(...) {
......@@ -90,7 +93,7 @@ data.list <- function(...) {
#' plot(X$Ta$k10, X$Taobs)
#'
#' # Fit a model for the 10-step horizon
#' abline(lm(Taobs ~ Ta.k10, X), col=2)
#' abline(lm(Taobs ~ Ta.k10, as.data.frame(X)), col=2)
#'
#' @export
subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts = FALSE, pattern = NA, ...) {
......@@ -135,20 +138,20 @@ subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts =
subset <- 1:dim(D[[1]])[1]
}
}else if(length(subset) == 2){
if(any(class(subset) %in% c("character","POSIXlt","POSIXct","POSIXt"))){
if(inherits(subset,c("character","POSIXlt","POSIXct","POSIXt"))){
# Start and end of a period is given
subset <- in_range(subset[1], D$t, subset[2])
}
}else{
# Check if a non-meaningful subset is given
if(any(class(subset) == "character")){
if(inherits(subset,"character")){
stop("subset cannot be a character, except if it is of length 2 and can be converted in a POSIX, e.g. subset=c('2020-01-01','2020-01-10'. ")
}
}
# Take all horizons k?
if(is.na(kseq[1])){
val <- lapply(D[nms], function(X) {
if (any(class(X) == "data.frame")) {
if (inherits(X,"data.frame")) {
return(X[subset, , drop=FALSE]) # drop = FALSE needed in case data frame only has 1 column, otherwise this does not return a data frame
} else {
return(X[subset])
......@@ -158,7 +161,7 @@ subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts =
# Multiple horizons (hence length(kseq) > 1)
# Take the specified horizons
val <- lapply(D[nms], function(X) {
if (any(class(X) == "data.frame")) {
if (inherits(X,"data.frame")) {
# Check if holds forecasts by checking if any name is "kxx"
if(length(grep("k[[:digit:]]+$", names(X))) > 0){
return(X[subset,pst("k",kseq), drop=FALSE])
......@@ -173,7 +176,7 @@ subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts =
# Lag the forecasts k if specified
if(lagforecasts){
val <- lapply(val, function(X){
if(any(class(X) == "data.frame") & length(grep("k[[:digit:]]+$",names(X))) > 0) {
if(inherits(X,"data.frame") & length(grep("k[[:digit:]]+$",names(X))) > 0) {
return(lagdf.data.frame(X, lagseq="+k"))
}else{
return(X)
......@@ -209,11 +212,11 @@ subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts =
as.data.frame.data.list <- function(x, row.names=NULL, optional=FALSE, ...){
# Then convert into a data.frame
val <- do.call("cbind", x)
if(class(val) == "matrix"){
if(inherits(val,"matrix")){
val <- as.data.frame(val)
}
# Fix names of data.frames (i.e. forecasts, their names are now "kxx", but should be X.kxx)
i <- grep("k[[:digit:]]+$", names(val))
# Fix names of data.frames (i.e. forecasts, if their names are now "kxx", but should be X.kxx)
i <- grep("^k[[:digit:]]+$", names(val))
if(length(i) > 0){
names(val)[i] <- pst(names(x)[i],".",names(val)[i])
}
......@@ -262,81 +265,105 @@ pairs.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts =
}
#' Checking the object for appropriate form.
#' Summary including checks of the data.list for appropriate form.
#'
#' Prints on table form the result of the check.
#' Prints on table form the result of the checks.
#'
#' @title Checking the object for appropriate form.
#' @param object The object to be checked.
#' @title Summary with checks of the data.list for appropriate form.
#' @param object The object to be summarized and checked
#' @param printit A boolean deciding if check results tables are printed
#' @param stopit A boolean deciding if the function stop with an error if the check is not ok
#' @param nms A character vector. If given specifies the variables (vectors or matrices) in object to check
#' @param msgextra A character which is added in the printout of an (potential) error message
#' @param ... Not used
#' @return The tables generated.
#'
#' # Check a data.list (see \code{?\link{check.data.list}})
#' check(Dbuilding)
#' Checking the data.list for appropriate form:
#'
#' @export
check <- function(object){
UseMethod("check")
}
#' Checking the data.list for appropriate form.
#' A check of the time vector t, which must have equidistant time points and no NAs.
#'
#' Then the results of checks of vectors (observations):
#'
#' - NAs: Proportion of NAs
#'
#' Prints a check of the time vector t, which must have equidistant time points and no NAs.
#' - length: Same length as the 't' vector?
#'
#' Then the results of checking vectors (observations):
#' - ok: A 'V' indicates a successful check
#' - maxNAs: Proportion of NAs
#' - length: printed if not the same as the 't' vector
#' - class: the class
#' - class: The class of the vector
#'
#' Then the results of checking data.frames and matrices (forecasts):
#' - ok: a 'V' indicates a successful check
#' - maxNAs: the proportion of NAs for the horizon (i.e. column) with the highest proportion of NAs
#' - meanNAs: the proportion of NAs of the entire data.frame
#' - nrow: printed if not the same as the 't' vector length
#' - colnames: columns must be names 'kxx', where 'xx' is the horizon
#' - sameclass: 'X' if not all columns are the same class
#' - class: prints the class of the columns if they are all the same
#'
#' @title Checking the data.list for appropriate form.
#' @param object The object to be checked.
#' @return The tables generated.
#'
#' # Check a data.list (see \code{?\link{check.data.list}})
#' check(Dbuilding)
#' - maxHorizonNAs: The proportion of NAs for the horizon (i.e. column) with the highest proportion of NAs
#'
#' # Vector with observations not same length as t
#' D <- Dbuilding
#' D$heatload <- D$heatload[1:10]
#' check(D)
#' - meanNAs: The proportion of NAs of the entire matrix
#'
#' - nrow: Same length as the 't' vector?
#'
#' - colnames: Columns must be names 'kx', where 'x' is the horizon (e.g. k12 is 12-step horizon)
#'
#' - sameclass: Error if not all columns are the same class
#'
#' - class: Prints the class of the columns if they are all the same
#'
#' @examples
#'
#' summary(Dbuilding)
#'
#' # Some NAs in k1 forecast
#' D <- Dbuilding
#' D$Ta$k1[1:1500] <- NA
#' check(D)
#' summary(D)
#'
#' # Vector with observations not same length as t throws error
#' D <- Dbuilding
#' D$heatload <- D$heatload[1:10]
#' try(summary(D))
#'
#' # Forecasts wrong count
#' D <- Dbuilding
#' D$Ta <- D$Ta[1:10, ]
#' try(summary(D))
#'
#' # Wrong column names
#' names(D$Ta)
#' D <- Dbuilding
#' names(D$Ta)[4] <- "xk"
#' names(D$Ta)[2] <- "x2"
#' try(summary(D))
#'
#' # No column names
#' D <- Dbuilding
#' names(D$Ta) <- NULL
#' try(summary(D))
#'
#' # Don't stop or only print if stopped
#' onlineforecast:::summary.data.list(D, stopit=FALSE)
#' try(onlineforecast:::summary.data.list(D, printit=FALSE))
#'
#' # Only check for specified variables
#' # (e.g. do like this in model functions to check only variables used in model)
#' onlineforecast:::summary.data.list(D, nms=c("heatload","I"))
#'
#' @export
check.data.list <- function(object){
# Check if how the data.list is setup and report potential issues
summary.data.list <- function(object, printit=TRUE, stopit=TRUE, nms=names(object), msgextra="", ...){
D <- object
if(!"t" %in% names(D)){ stop("'t' is missing in the data.list: It must be a vector of equidistant time points (can be an integer, but preferably POSIXct class with tz 'GMT' or 'UTC'.)") }
if(length(unique(diff(D$t))) != 1){ stop("'t' is not equidistant and have no NA values")}
message("\nTime t is fine: Length ",length(D$t),"\n")
# The final message
msg <- NULL
# Check the time vector
if(!"t" %in% names(D)){ msg <- c(msg,"'t' is missing in the data.list: It must be a vector of equidistant time points (can be an integer, but preferably POSIXct class with tz 'GMT' or 'UTC'.)")}
if(length(D$t) > 1){
if(length(unique(diff(D$t))) != 1){ msg <- c(msg,"'t' is not equidistant or have NA values.") }
}
# Which elements are data.frame or matrix?
isMatrix <- sapply(D, function(x){ inherits(x,c("matrix","data.frame")) })
# Which is data.frame or matrix?
dfOrMat <- sapply(D, function(x){ (class(x) %in% c("matrix","data.frame"))[1] })
# Vectors check
vecseq <- which(!dfOrMat & names(dfOrMat) != "t")
vecseq <- which(!isMatrix & names(isMatrix) != "t" & names(isMatrix) %in% nms)
Observations <- NA
if(length(vecseq) > 0){
cat("Observation vectors:\n")
vecchecks <- c("ok","NAs","length","class")
Observations <- data.frame(matrix("", nrow=length(vecseq), ncol=length(vecchecks), dimnames=list(names(vecseq),vecchecks)), stringsAsFactors=FALSE)
Observations$ok <- "V"
vecchecks <- c("NAs","length","class")
Observations <- data.frame(matrix("ok", nrow=length(vecseq), ncol=length(vecchecks), dimnames=list(pst("$",names(vecseq)),vecchecks)), stringsAsFactors=FALSE)
#
for(i in 1:length(vecseq)){
#
......@@ -346,58 +373,83 @@ check.data.list <- function(object){
Observations$NAs[i] <- pst(NAs,"%")
# Check the length
if(length(D[[nm]]) != length(D$t)){
Observations$length[i] <- length(D[[nm]])
Observations$length[i] <- "ERROR"
msg <- c(msg,pst(rownames(Observations)[i]," (length ",length(D[[nm]]),"), not same length as t (length ",length(D$t),")"))
}
# Its class
Observations$class[i] <- class(D[[nm]])
# Not ok?
if(sum(Observations[i, 3] == "") < 1){
Observations$ok[i] <- ""
}
}
print(Observations)
}
#
# For forecasts
dfseq <- which(dfOrMat)
# Forecasts check
dfseq <- which(isMatrix & names(isMatrix) %in% nms)
Forecasts <- NA
if(length(dfseq) > 0){
cat("\nForecast data.frames or matrices:\n")
dfchecks <- c("ok","maxNAs","meanNAs","nrow","colnames","sameclass","class")
Forecasts <- data.frame(matrix("", nrow=length(dfseq), ncol=length(dfchecks), dimnames=list(names(dfseq),dfchecks)), stringsAsFactors=FALSE)
Forecasts$ok <- "V"
dfchecks <- c("maxHorizonNAs","NAs","nrow","colnames","sameclass","class")
Forecasts <- data.frame(matrix("ok", nrow=length(dfseq), ncol=length(dfchecks), dimnames=list(pst("$",names(dfseq)),dfchecks)), stringsAsFactors=FALSE)
#
for(i in 1:length(dfseq)){
#
nm <- names(dfseq)[i]
colnms <- nams(D[[nm]])
if(is.null(colnms)){
msg <- c(msg, pst("'",nm,"' has no column names! Columns in forecast matrices must be named 'kx', where x is the horizon (e.g. 'k12' is the column with the 12 step forecast)"))
Forecasts[i, ] <- rep(NA,ncol(Forecasts))
}else{
# max NAs
maxNAs <- round(max(sapply(colnms, function(colnm){ 100*sum(is.na(D[[nm]][ ,colnm])) / nrow(D[[nm]]) })))
Forecasts$maxNAs[i] <- pst(maxNAs,"%")
tmp <- round(max(sapply(colnms, function(colnm){ 100*sum(is.na(D[[nm]][ ,colnm])) / nrow(D[[nm]]) })))
Forecasts$maxHorizonNAs[i] <- pst(tmp,"%")
# Mean NAs
meanNAs <- round(mean(sapply(colnms, function(colnm){ 100*sum(is.na(D[[nm]][ ,colnm])) / nrow(D[[nm]]) })))
Forecasts$meanNAs[i] <- pst(meanNAs,"%")
tmp <- round(mean(sapply(colnms, function(colnm){ 100*sum(is.na(D[[nm]][ ,colnm])) / nrow(D[[nm]]) })))
Forecasts$NAs[i] <- pst(tmp,"%")
# Check the number of rows
if(nrow(D[[nm]]) != length(D$t)){
Forecasts$nrow[i] <- nrow(D[[nm]])
Forecasts$nrow[i] <- "ERROR"
msg <- c(msg, pst(nm," has ",nrow(D[[nm]])," rows, must be equal to length of t (n=",length(D$t),")"))
}
# Check the colnames, are they unique and all k+integer?
if(!length(unique(grep("k[[:digit:]]+$",colnms,value=TRUE))) == length(colnms)){
Forecasts$colnames[i] <- "X"
tmp <- unique(grep("k[[:digit:]]+$",colnms,value=TRUE))
if(!length(tmp) == length(colnms)){
Forecasts$colnames[i] <- "ERROR"
msg <- c(msg, pst(nm," has columns named: '",pst(colnms[!(colnms %in% tmp)],collapse="','"),"'. Columns in forecast matrices must be named 'kx', where x is the horizon (e.g. 'k12' is the column with the 12 step forecast)"))
}
if(!length(unique(sapply(colnms, function(colnm){ class(D[[nm]][ ,colnm]) }))) == 1){
Forecasts$sameclass[i] <- "X"
Forecasts$sameclass[i] <- "ERROR"
msg <- c(msg, pst(nm," doesn't have same class for all columns"))
}else{
Forecasts$class[i] <- class(D[[nm]][ ,1])
}
# Not ok?
if(sum(Forecasts[i, ] == "") < (length(dfchecks)-4)){
Forecasts$ok[i] <- ""
}
}
}
# Print the results
if(printit){
cat("\nLength of time vector 't': ",length(D$t),"\n\n", sep="")
if(length(vecseq) > 0){
# cat("\n- Observation vectors:\n")
print(Observations)
}
if(length(dfseq) > 0){
# cat("\n- Forecast data.frames or matrices:\n")
cat("\n")
print(Forecasts)
}
}
# Error message to print?
if(length(msg) > 0){
cat("\n")
msg <- c(msg,"\nSee '?summary.data.list' for more information")
# Stop or just print
if(stopit){
stop(pst(msg,collapse="\n"))
}else{
cat("ERRORS: \n",pst(msg,collapse="\n"),"\n")
}
}
# Return
invisible(list(Observations=Observations, Forecasts=Forecasts))
}
......
......@@ -213,6 +213,9 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list(
#----------------------------------------------------------------
# Function for transforming the input data to the regression data
transform_data = function(data){
# Do a check of the data
self$check(data, checkoutput=FALSE)
# Evaluate for each input the expresssion to generate the model input data
L <- lapply(self$inputs, function(input){
# Evaluate the expression (input$expr)
......@@ -248,11 +251,11 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list(
#----------------------------------------------------------------
# Check if the model and data is setup correctly
check = function(data = NA){
# some checks are done here, maybe more should be added (??also when transforming inputs, if something goes wrong its caught and message is printed)
#
check = function(data = NA, checkoutput = TRUE, checkinputs = TRUE){
# some checks are done here, this one is called in transform_data()
# ################################
# First check if the output is set correctly
if(checkoutput){
# Check if the output is set correctly
if( is.na(self$output) ){
stop("Model output is NA, it must be set to the name of a variable in the data.list used.")
}
......@@ -265,37 +268,42 @@ forecastmodel <- R6::R6Class("forecastmodel", public = list(
if( length(data[[self$output]]) != length(data$t) ){
stop("The length of the model output '",self$output,"' is ",length(data[[self$output]]),", which is not equal to the length of the time vector (t), which is ",length(data$t))
}
}
# ################################
if(checkinputs){
# Check that the kseq is set in the model
if( !is.numeric(self$kseq) ){
stop("'model$kseq' is not set. Must be an integer (or numeric) vector.")
}
# ################################
# Check all input variables are correctly set data
for(i in 1:length(self$inputs)){
# Find all the variables in the expression
nms <- all.vars(parse(text=self$inputs[[i]]$expr[[1]]))
# Check all input variables are correctly set in data
# Find all the variable names used in the expressions
tmp <- lapply(self$inputs, function(input){
all.vars(parse(text=input$expr[[1]]))
})
nms <- unique(unlist(tmp))
# Do the default test of the data.list, only for the variables used in the expressions
summary.data.list(data, printit=FALSE, nms=nms)
# Do a bit of extra check
# Are all variables used available in data?
notindata <- nms[!(nms %in% names(data)) & nms != "pi"]
if(length(notindata) > 0){
stop("Variables ",pst("'",notindata,"'",collapse="','")," are used in input expressions, but are not in data")
}
# Check each variable
for(nm in nms){
if(class(data[[nm]]) %in% c("data.frame","matrix")){
# Are the inputs forecast matrices?
if(!inherits(data[[nm]],c("data.frame","matrix"))){
stop(nm," must be a forecast matrix (in 'data' as a data.frame or matrix with columns named 'kxx', see ?data.list), since it is used as a variable in an input expression")
}
# It's a forecast input, hence must have the k columns in kseq
if(!all(self$kseq %in% as.integer(gsub("k","",names(data[[nm]]))))){
missingk <- which(!self$kseq %in% as.integer(gsub("k","",names(data[[nm]]))))
stop("The input variable '",nm,"' doesn't have all needed horizons.\nIt has ",pst(names(data[[nm]]),collapse=","),"\nIt is missing ",pst("k",self$kseq[missingk],collapse=","))
}
# Check if the number of observations match
if( nrow(data[[nm]]) != length(data$t) ){
stop(pst("The input variable '",nm,"' doesn't have the same number of observations as time vector 't' in the data. It has ",nrow(data[[nm]]),", but 't' has ",length(data$t)))
}
}else if(class(data[[nm]]) == "numeric"){
# Observation input, check the length
if( length(data[[nm]]) != length(data$t) ){
stop("The input variable '",nm,"' doesn't have the same number of observations as time vector 't' in the data. It has ",length(data[[nm]]),", but 't' has ",length(data$t))
}
}else{
if(!nm == "pi"){
stop("The variable '",nm,"' is missing in data, or it has the wrong class.\nIt must be class: data.frame, matrix or vector.\nIt is needed for the input expression '",self$inputs[[i]]$expr[[1]],"'")
}
}
}
}
},
......
......@@ -74,13 +74,13 @@
#' @importFrom stats lm residuals
#' @export
lm_fit <- function(prm=NA, model, data, scorefun = NA, returnanalysis = TRUE, printout = TRUE){
# Check that the model is setup correctly, it will stop and print a message if not
model$check(data)
# Function for initializing an lm fit:
# - it will change the "model" input (since it an R6 class and thus passed by reference
# - If scorefun is given, e.g. rmse() then the value of this is returned
# Check the model output data (input is check in the transform function)
model$check(data, checkinputs=FALSE)
if(printout){
# Should here actually only print the one that were found and changed?
message("----------------")
......@@ -101,7 +101,7 @@ lm_fit <- function(prm=NA, model, data, scorefun = NA, returnanalysis = TRUE, pr
# ################################
# Init the inputs states (and some more is reset)
model$reset_state()
# Generate the 2nd stage inputs (i.e. the transformed data)
# Generate the 2nd stage inputs (i.e. the transformed data). Input data is checked in the transform function.
datatr <- model$transform_data(data)
#
......
......@@ -118,19 +118,19 @@
rls_fit <- function(prm=NA, model, data, scorefun = NA, returnanalysis = TRUE,
runcpp = TRUE, printout = TRUE){
# Check that the model is setup correctly, it will stop and print a message if not
model$check(data)
# Function for initializing an rls fit:
# - it will change the "model" input (since it an R6 class and thus passed by reference
# - If scorefun is given, e.g. rmse() then the value of this is returned
#
# Check the model output data (input is check in the transform function)
model$check(data, checkinputs=FALSE)
if(printout){
# Should here actually only print the ones that were found and changed?
message("----------------")
if(is.na(prm[1])){
message("prm=NA, so current parameters are used.")
message("Argument 'prm' is NA, so parameters in 'model$prm' are used.")
}else{
print_to_message(prm)
}
......@@ -145,7 +145,7 @@ rls_fit <- function(prm=NA, model, data, scorefun = NA, returnanalysis = TRUE,
# Reset the model state (e.g. inputs state, stored iterative data, ...)
model$reset_state()
# Generate the 2nd stage inputs (i.e. the transformed data)
# Generate the 2nd stage inputs (i.e. the transformed data). Input data is checked in the transform function.
datatr <- model$transform_data(data)
# Initialize the fit for each horizon
......
......@@ -70,8 +70,13 @@ rls_summary <- function(object, scoreperiod = NA, scorefun = rmse, printit = TRU
fit <- object
#
if(is.na(scoreperiod[1])){
if(is.null(fit$data$scoreperiod)){
warning("No scoreperiod set, so using all data for score calculation.")
scoreperiod <- rep(TRUE, length(residuals(fit)))
}else{
scoreperiod <- fit$data$scoreperiod
}
}
#
scipen <- options(scipen=10)$scipen
# Calculate the score for each horizon
......
......@@ -197,11 +197,12 @@ step_optim <- function(modelfull, data, prm=list(NA), direction = c("both","back
# For keeping all the results
L <- list()
# The first value in direction is default
if(length(direction) > 1){ direction <- direction[1] }
direction <- match.arg(direction)
# Init
istep <- 1
# Different start up, if a start model is given
if( class(modelstart)[1] == "forecastmodel" ){
# Note the use of inherits() instead of: class(modelstart)[1] == "forecastmodel". See https://developer.r-project.org/Blog/public/2019/11/09/when-you-think-class.-think-again/
if( inherits(modelstart, "forecastmodel") ){
# The full model will not be changed from here, so no need to clone it
mfull <- modelfull
m <- modelstart$clone()
......@@ -239,12 +240,14 @@ step_optim <- function(modelfull, data, prm=list(NA), direction = c("both","back
}
}
# Find the inputs to keep, if any
if(class(keepinputs) == "logical"){
if(inherits(keepinputs,"logical")){
if(keepinputs){
keepinputs <- nams(mfull$inputs)
}else{
keepinputs <- ""
}
}else{
if(!inherits(keepinputs,"character")){ stop("keepinputs must be a logical or a character with the names of the inputs to keep in the model.") }
}
# Helper
c_mStep <- function(l, nms){
......@@ -263,7 +266,7 @@ step_optim <- function(modelfull, data, prm=list(NA), direction = c("both","back
# Optimize
res <- optimfun(m, data, printout=printout, scorefun=scorefun, ...)
# Should we forecast only on the complete cases?
if(class(fitfun) == "function"){
if(inherits(fitfun,"function")){
# Forecast to get the complete cases
mtmp <- m$clone_deep()
# If kseqopt is set, then make sure that it is used when fitting here
......@@ -283,7 +286,7 @@ step_optim <- function(modelfull, data, prm=list(NA), direction = c("both","back
}
message("Current score: ",format(scoreCurrent,digits=7))
if(class(fitfun) == "function"){
if(inherits(fitfun,"function")){
message("Current complete cases: ",sum(casesCurrent)," (Diff in score from optim:",L[[istep]]$optimresult$value-scoreCurrent,")")
}
# Next step
......@@ -379,7 +382,7 @@ step_optim <- function(modelfull, data, prm=list(NA), direction = c("both","back
names(Lstep) <- names(mStep)
# Complete cases considered: Should we forecast and recalculate the score on complete cases from all models?
if(class(fitfun) == "function"){
if(inherits(fitfun,"function")){
LYhat <- mclapply(1:length(mStep), function(i){
mtmp <- mStep[[i]]$clone_deep()
# If kseqopt is set, then make sure that it is used when fitting here
......@@ -406,7 +409,7 @@ step_optim <- function(modelfull, data, prm=list(NA), direction = c("both","back
tmp[ ,1] <- pst(format(100 * (scoreCurrent - tmp) / scoreCurrent, digits=2),"%")
nams(tmp) <- "Improvement"
}
if(class(fitfun) == "function"){
if(inherits(fitfun,"function")){
tmp <- cbind(tmp, apply(casesStep != casesCurrent, 2, sum))
nams(tmp)[2] <- "CasesDiff"
}
......
#----------------------------------------------------------------
# v1.0.1
We have changed a few minor things:
- Added better checks and error messages for input data
- Small bug fixes
#----------------------------------------------------------------
# v1.0.0
......
......@@ -16,7 +16,7 @@ citEntry(
title = "{{onlineforecast}: Forecast Modelling for Online Applications}",
author = "Peder Bacher and Hjörleifur G. Bergsteinsson",
year = "2021",
note = "R package version 1.0.0",
note = "R package version 1.0.1",
url = "https://onlineforecasting.org",
textVersion = ""
)
......@@ -14,7 +14,7 @@
## install.packages("R.rsp")
## install.packages("data.table")
## install.packages("plotly")
## install.packages("pbs")
#----------------------------------------------------------------
# Go
......@@ -46,12 +46,12 @@ library(roxygen2)
#use_test("newtest")
# # Run all tests
#document()
#test()
document()
test()
# # Run the examples
#load_all(as.package("../onlineforecast"))
#run_examples()
load_all(as.package("../onlineforecast"))
run_examples()
# # Run tests in a single file
# test_file("tests/testthat/test-rls-heat-load.R")
......@@ -69,6 +69,7 @@ write.table(txt2, "inst/CITATION", row.names=FALSE, col.names=FALSE, quote=FALSE
# ----------------------------------------------------------------
# Build the package
document()
# Run the "vignettes/make.R" to build the cache
build(".", vignettes=TRUE)
# Install it
......
......@@ -33,19 +33,6 @@ The argument is converted into POSIXlt with tz="GMT".
}
\section{Methods}{
#' @examples
# Create a POSIXlt with tz="GMT"
aslt("2019-01-01")
class(aslt("2019-01-01"))
aslt("2019-01-01 01:00:05")
# Convert between time zones
x <- aslt("2019-01-01", tz="CET")
aslt(x,tz="GMT")
# To seconds and back again
aslt(as.numeric(x, units="sec"))
- aslt.character: Simply a wrapper for \code{as.POSIXlt}
......@@ -60,3 +47,18 @@ aslt(as.numeric(x, units="sec"))
- aslt.numeric: Converts from UNIX time in seconds to POSIXlt.
}
\examples{
# Create a POSIXlt with tz="GMT"
aslt("2019-01-01")
class(aslt("2019-01-01"))
aslt("2019-01-01 01:00:05")
# Convert between time zones
x <- aslt("2019-01-01", tz="CET")
aslt(x,tz="GMT")
# To seconds and back again
aslt(as.numeric(x, units="sec"))
}
......@@ -68,6 +68,8 @@ See the example \url{https://onlineforecasting.org/examples/solar-power-forecast
# In a model formulation it will be:
model <- forecastmodel$new()
model$add_inputs(mutday = "bspline(tday)")
# We set the horizons (actually not needed for the transform, only required for data checks)
model$kseq <- 1:4
# Such that at the transform stage will give the same as above
model$transform_data(D)
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data.list.R
\name{check}
\alias{check}
\title{Checking the object for appropriate form.}
\usage{
check(object)
}
\arguments{
\item{object}{The object to be checked.}
}
\value{
The tables generated.
# Check a data.list (see \code{?\link{check.data.list}})
check(Dbuilding)
}
\description{
Checking the object for appropriate form.
}
\details{
Prints on table form the result of the check.
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data.list.R
\name{check.data.list}
\alias{check.data.list}
\title{Checking the data.list for appropriate form.}
\usage{
\method{check}{data.list}(object)
}
\arguments{
\item{object}{The object to be checked.}
}
\value{
The tables generated.
# Check a data.list (see \code{?\link{check.data.list}})
check(Dbuilding)
# Vector with observations not same length as t
D <- Dbuilding
D$heatload <- D$heatload[1:10]
check(D)
# Some NAs in k1 forecast
D <- Dbuilding
D$Ta$k1[1:1500] <- NA
check(D)
# Wrong column names
names(D$Ta)
}
\description{
Checking the data.list for appropriate form.
}
\details{
Prints a check of the time vector t, which must have equidistant time points and no NAs.
Then the results of checking vectors (observations):
- ok: A 'V' indicates a successful check
- maxNAs: Proportion of NAs
- length: printed if not the same as the 't' vector
- class: the class
Then the results of checking data.frames and matrices (forecasts):
- ok: a 'V' indicates a successful check
- maxNAs: the proportion of NAs for the horizon (i.e. column) with the highest proportion of NAs
- meanNAs: the proportion of NAs of the entire data.frame
- nrow: printed if not the same as the 't' vector length
- colnames: columns must be names 'kxx', where 'xx' is the horizon
- sameclass: 'X' if not all columns are the same class
- class: prints the class of the columns if they are all the same
}
......@@ -19,8 +19,11 @@ Make a data.list of the vectors and data.frames given.
See the vignette 'setup-data' on how a data.list must be setup.
It's simply a list of class \code{data.list} holding:
- vector \code{t}
- vector(s) of observations
- data.frames (or matrices) of forecast inputs
}
\examples{
......@@ -29,13 +32,13 @@ It's simply a list of class \code{data.list} holding:
time <- seq(ct("2019-01-01"),ct("2019-01-02"),by=3600)
# Observations time series (as vector)
xobs <- rnorm(length(time))
# Forecast input as data.frame
# Forecast input as a data.frame with columns names 'kxx', where 'xx' is the horizon
X <- data.frame(matrix(rnorm(length(time)*3), ncol=3))
names(X) <- pst("k",1:3)
D <- data.list(t=time, xobs=xobs, X=X)
# Check it
check(D)
# Check it (see \code{?\link{summary.data.list}})
summary(D)
}
......@@ -74,6 +74,6 @@ X <- subset(Dbuilding, 1:1000, pattern="^Ta", kseq = 10, lagforecasts = TRUE)
plot(X$Ta$k10, X$Taobs)
# Fit a model for the 10-step horizon
abline(lm(Taobs ~ Ta.k10, X), col=2)
abline(lm(Taobs ~ Ta.k10, as.data.frame(X)), col=2)
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data.list.R
\name{summary.data.list}
\alias{summary.data.list}
\title{Summary with checks of the data.list for appropriate form.}
\usage{
\method{summary}{data.list}(
object,
printit = TRUE,
stopit = TRUE,
nms = names(object),
msgextra = "",
...
)
}
\arguments{
\item{object}{The object to be summarized and checked}
\item{printit}{A boolean deciding if check results tables are printed}
\item{stopit}{A boolean deciding if the function stop with an error if the check is not ok}
\item{nms}{A character vector. If given specifies the variables (vectors or matrices) in object to check}
\item{msgextra}{A character which is added in the printout of an (potential) error message}
\item{...}{Not used}
}
\value{
The tables generated.
Checking the data.list for appropriate form:
A check of the time vector t, which must have equidistant time points and no NAs.
Then the results of checks of vectors (observations):
- NAs: Proportion of NAs
- length: Same length as the 't' vector?
- class: The class of the vector
Then the results of checking data.frames and matrices (forecasts):
- maxHorizonNAs: The proportion of NAs for the horizon (i.e. column) with the highest proportion of NAs
- meanNAs: The proportion of NAs of the entire matrix
- nrow: Same length as the 't' vector?
- colnames: Columns must be names 'kx', where 'x' is the horizon (e.g. k12 is 12-step horizon)
- sameclass: Error if not all columns are the same class
- class: Prints the class of the columns if they are all the same
}
\description{
Summary including checks of the data.list for appropriate form.
}
\details{
Prints on table form the result of the checks.
}
\examples{
summary(Dbuilding)
# Some NAs in k1 forecast
D <- Dbuilding
D$Ta$k1[1:1500] <- NA
summary(D)
# Vector with observations not same length as t throws error
D <- Dbuilding
D$heatload <- D$heatload[1:10]
try(summary(D))
# Forecasts wrong count
D <- Dbuilding
D$Ta <- D$Ta[1:10, ]
try(summary(D))
# Wrong column names
D <- Dbuilding
names(D$Ta)[4] <- "xk"
names(D$Ta)[2] <- "x2"
try(summary(D))
# No column names
D <- Dbuilding
names(D$Ta) <- NULL
try(summary(D))
# Don't stop or only print if stopped
onlineforecast:::summary.data.list(D, stopit=FALSE)
try(onlineforecast:::summary.data.list(D, printit=FALSE))
# Only check for specified variables
# (e.g. do like this in model functions to check only variables used in model)
onlineforecast:::summary.data.list(D, nms=c("heatload","I"))
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment