From 7cf139093197c28e050e110f64bbdd23baffebd3 Mon Sep 17 00:00:00 2001 From: Peder <pbac@dtu.dk> Date: Mon, 13 Jul 2020 00:29:11 +0200 Subject: [PATCH] Finally found better name for lag(), now lagdf() which always returns data.frame, and lagvec() which returns a vector --- R/AR.R | 2 +- R/data.list.R | 2 +- R/{lg.R => lagdf.R} | 120 +++++++++++++----------------- R/lagvec.R | 35 +++++++++ R/persistence.R | 2 +- R/plot_ts.R | 2 +- R/residuals.R | 2 +- R/rls_summary.R | 2 +- data/all/make.R | 2 +- make.R | 34 ++++----- vignettes/forecast-evaluation.Rmd | 4 +- vignettes/setup-data.Rmd | 6 +- 12 files changed, 115 insertions(+), 98 deletions(-) rename R/{lg.R => lagdf.R} (63%) create mode 100644 R/lagvec.R diff --git a/R/AR.R b/R/AR.R index 91e5190..bd96fda 100644 --- a/R/AR.R +++ b/R/AR.R @@ -79,7 +79,7 @@ AR <- function(lags){ # Check if saved output values for AR exists if(is.na(model$yAR[1])){ # First time its called, so just use output values from data - val <- matrix(lg(data[[model$output]], lag), nrow=length(data$t), ncol=length(model$kseq)) + val <- matrix(lagvec(data[[model$output]], lag), nrow=length(data$t), ncol=length(model$kseq)) }else{ y <- c(model$yAR, data$y) # Find the seq for the new y lagged vector diff --git a/R/data.list.R b/R/data.list.R index 1822f9c..46030c9 100644 --- a/R/data.list.R +++ b/R/data.list.R @@ -174,7 +174,7 @@ subset.data.list <- function(x, subset = NA, nms = NA, kseq = NA, lagforecasts = if(lagforecasts){ val <- lapply(val, function(X){ if(any(class(X) == "data.frame") & length(grep("^k[[:digit:]]+$",names(X))) > 0) { - return(lg.data.frame(X, lagseq="+k")) + return(lagdf.data.frame(X, lagseq="+k")) }else{ return(X) } diff --git a/R/lg.R b/R/lagdf.R similarity index 63% rename from R/lg.R rename to R/lagdf.R index c3087bc..4896fdb 100644 --- a/R/lg.R +++ b/R/lagdf.R @@ -2,88 +2,70 @@ #library(devtools) #document() #load_all(as.package("../../onlineforecast")) -#?lg +#?lagdf -lag_vector <- function(x, lag){ - if (lag > 0) { - ## Lag x, i.e. delay x lag steps - return(c(rep(NA, lag), x[1:(length(x) - lag)])) - }else if(lag < 0) { - ## Lag x, i.e. delay x lag steps - return(c(x[(abs(lag) + 1):length(x)], rep(NA, abs(lag)))) - }else{ - ## lag = 0, return x - return(x) - } -} - -#' Lagging of a vector by simply the values back or fourth. +#' Lagging by shifting the values back or fourth always returning a data.frame. #' -#' This function lags (shifts) the values of the vector. If \code{lagseq} is a single integer value, then a -#' vector is returned. If \code{lagseq} is an integer vector, then a data.frame is returned with the columns -#' as the vectors lagged with the values in lagseq. +#' This function lags (shifts) the values of the vector. A data.frame is always returned with the columns +#' as the vectors lagged with the values in lagseq. The column names are set to "kxx", where xx are the lag of the column. #' #' -#' @title Lagging of a vector +#' @title Lagging which returns a data.frame #' @param x The vector to be lagged. #' @param lagseq The integer(s) setting the lag steps. -#' @return A vector or a data.frame. -#' @rdname lg -#' @seealso \code{\link{lg.data.frame}} which is run when \code{x} is a data.frame. +#' @return A data.frame. +#' @rdname lagdf +#' @seealso \code{\link{lagdf.data.frame}} which is run when \code{x} is a data.frame. #' @examples #' # The values are simply shifted #' # Ahead in time -#' lg(1:10, 3) +#' lagdf(1:10, 3) #' # Back in time -#' lg(1:10, -3) +#' lagdf(1:10, -3) #' # Works but returns a numric -#' lg(as.factor(1:10), 3) +#' lagdf(as.factor(1:10), 3) #' # Works and returns a character -#' lg(as.character(1:10), 3) +#' lagdf(as.character(1:10), 3) #' # Giving several lag values -#' lg(1:10, c(1:3)) -#' lg(1:10, c(5,3,-1)) +#' lagdf(1:10, c(1:3)) +#' lagdf(1:10, c(5,3,-1)) #' #' # See also how to lag a forecast data.frame -#' ?lg.data.frame +#' ?lagdf.data.frame #' #' #'@export -lg <- function(x, lagseq){ - UseMethod("lg") +lagdf <- function(x, lagseq){ + UseMethod("lagdf") } #' @export -lg.numeric <- function(x, lagseq) { - if(length(lagseq) == 1){ - return(lag_vector(x, lagseq)) - }else{ - ## Return a data.frame - tmp <- lapply_cbind_df(lagseq, function(lag){ - return(lag_vector(x, lag)) - }) - names(tmp) <- pst("k",lagseq) - return(tmp) - } +lagdf.numeric <- function(x, lagseq) { + ## Return a data.frame + tmp <- lapply_cbind_df(lagseq, function(lag){ + return(lagvec(x, lag)) + }) + names(tmp) <- pst("k",lagseq) + return(tmp) } #' @export -lg.factor <- function(x, lagseq) { - lg.numeric(x, lagseq) +lagdf.factor <- function(x, lagseq) { + lagdf.numeric(x, lagseq) } #' @export -lg.character <- function(x, lagseq) { - lg.numeric(x, lagseq) +lagdf.character <- function(x, lagseq) { + lagdf.numeric(x, lagseq) } #' @export -lg.logical <- function(x, lagseq) { - lg.numeric(x, lagseq) +lagdf.logical <- function(x, lagseq) { + lagdf.numeric(x, lagseq) } @@ -95,7 +77,7 @@ lg.logical <- function(x, lagseq) { #' @param x The data.frame to have columns lagged #' @param lagseq The sequence of lags as an integer. Alternatively, as a character "+k", "-k", "+h" or "-h", e.g. "k12" will with "+k" be lagged 12. #' @return A data.frame with columns that are lagged -#' @rdname lg +#' @rdname lagdf #' @examples #' #' # dataframe of forecasts @@ -103,30 +85,30 @@ lg.logical <- function(x, lagseq) { #' X #' #' # Lag all columns -#' lg(X, 1) -#' \dontshow{if(!all(is.na(lg(X, 1)[1, ]))){stop("Lag all columns didn't work")}} +#' lagdf(X, 1) +#' \dontshow{if(!all(is.na(lagdf(X, 1)[1, ]))){stop("Lag all columns didn't work")}} #' #' # Lag each column different steps -#' lg(X, 1:3) +#' lagdf(X, 1:3) #' # Lag each columns with its k value from the column name -#' lg(X, "+k") +#' lagdf(X, "+k") #' \dontshow{ -#' if(any(lg(X, 1:3) != lg(X, "+k"),na.rm=TRUE)){stop("Couldn't lag +k")} +#' if(any(lagdf(X, 1:3) != lagdf(X, "+k"),na.rm=TRUE)){stop("Couldn't lag +k")} #' } #' # Also works for columns named hxx #' names(X) <- gsub("k", "h", names(X)) -#' lg(X, "-h") +#' lagdf(X, "-h") #' #' # If not same length as columns in X, then it doesn't know how to lag -#' \donttest{#lg(X, 1:2)} +#' \donttest{#lagdf(X, 1:2)} #' #' \dontshow{ -#' if(!class(lg(data.frame(k1=1:10), 2)) == "data.frame"){stop("Trying to lag data.frame with 1 column, but return is not class data.frame")} -#' if(!all(dim(lg(data.frame(k1=1:10), "+k")) == c(10,1))){stop("Trying to lag data.frame with 1 column, but return is not class data.frame")} +#' if(!class(lagdf(data.frame(k1=1:10), 2)) == "data.frame"){stop("Trying to lag data.frame with 1 column, but return is not class data.frame")} +#' if(!all(dim(lagdf(data.frame(k1=1:10), "+k")) == c(10,1))){stop("Trying to lag data.frame with 1 column, but return is not class data.frame")} #' } #' #' @export -lg.data.frame <- function(x, lagseq) { +lagdf.data.frame <- function(x, lagseq) { X <- x nms <- nams(X) if (length(lagseq) == 1) { @@ -148,7 +130,7 @@ lg.data.frame <- function(x, lagseq) { }else{ ## lagseq has length equal to the number of columns in X X <- as.data.frame(sapply(1:length(lagseq), function(i) { - lag_vector(X[, i], lagseq[i]) + lagvec(X[, i], lagseq[i]) })) nams(X) <- nms } @@ -157,7 +139,7 @@ lg.data.frame <- function(x, lagseq) { lag <- lagseq ## If only one row in X given, then X it is a not a data.frame anymore (code above has changed it) if(is.vector(X)){ - X <- as.data.frame(lag_vector(X, lag)) + X <- as.data.frame(lagvec(X, lag)) nams(X) <- nms } else { if (lag > 0) { @@ -174,22 +156,22 @@ lg.data.frame <- function(x, lagseq) { } #' @export -lg.matrix <- function(x, lagseq){ - lg.data.frame(x, lagseq) +lagdf.matrix <- function(x, lagseq){ + lagdf.data.frame(x, lagseq) } ## ## Test ## x <- data.frame(k1=1:5,k2=6:10) ## ## -## lg(x, lagseq=1) +## lagdf(x, lagseq=1) ## source("nams.R") -## lg(as.matrix(x), lagseq=c(1,2)) +## lagdf(as.matrix(x), lagseq=c(1,2)) ## ## -## lg(x, lagseq="+k") -## lg(x, "+k") -## lg(x, "-k") +## lagdf(x, lagseq="+k") +## lagdf(x, "+k") +## lagdf(x, "-k") -## lg.data.table <- function(x, nms, lagseq, per_reference = FALSE) { +## lagdf.data.table <- function(x, nms, lagseq, per_reference = FALSE) { ## DT <- x ## if (!per_reference) { ## ## Don't do it per reference diff --git a/R/lagvec.R b/R/lagvec.R new file mode 100644 index 0000000..8ca9086 --- /dev/null +++ b/R/lagvec.R @@ -0,0 +1,35 @@ +#' Lag by shifting the vecter +#' +#' A positive value of \code{lag} shifts the values to the right in the vector. +#' +#' @title Lag by shifting +#' @param x The vector to lag +#' @param lag (integer) The steps to lag. +#' @return The shifted vector +#' +#' @examples +#' +#' # The values are simply shifted +#' # Ahead in time +#' lagvec(1:10, 3) +#' # Back in time +#' lagvec(1:10, -3) +#' # Works but returns a numric +#' lagvec(as.factor(1:10), 3) +#' # Works and returns a character +#' lagvec(as.character(1:10), 3) +#' +#' @export + +lagvec <- function(x, lag){ + if (lag > 0) { + ## Lag x, i.e. delay x lag steps + return(c(rep(NA, lag), x[1:(length(x) - lag)])) + }else if(lag < 0) { + ## Lag x, i.e. delay x lag steps + return(c(x[(abs(lag) + 1):length(x)], rep(NA, abs(lag)))) + }else{ + ## lag = 0, return x + return(x) + } +} diff --git a/R/persistence.R b/R/persistence.R index 3b4ab05..ac21340 100644 --- a/R/persistence.R +++ b/R/persistence.R @@ -38,7 +38,7 @@ persistence <- function(y, kseq, perlen=NA){ }else{ # A periodic persistence Yhat <- as.data.frame(sapply(kseq, function(k){ - lg(y, (perlen-k)%%perlen) + lagdf(y, (perlen-k)%%perlen) })) } names(Yhat) <- pst("k",kseq) diff --git a/R/plot_ts.R b/R/plot_ts.R index 1ce50ab..332c0f9 100644 --- a/R/plot_ts.R +++ b/R/plot_ts.R @@ -136,7 +136,7 @@ plot_ts.data.list <- function(object, patterns=".*", xlim = NA, ylims = NA, xlab # Started with k, then it's forecasts and must be lagged to sync if( prefix == "k" ){ ks <- as.integer(gsub("k","",nams(DL[[nm]])[i])) - X <- lg(X, lagseq=ks) + X <- lagdf(X, lagseq=ks) } # Fix if it is a vector if(is.null(dim(X))) { diff --git a/R/residuals.R b/R/residuals.R index cd22ff7..a87a87b 100644 --- a/R/residuals.R +++ b/R/residuals.R @@ -43,7 +43,7 @@ residuals.data.frame <- function(object, y, ...){ Yhat <- object # Add some checking at some point - Residuals <- y - lg(Yhat, "+k") + Residuals <- y - lagdf(Yhat, "+k") # Named with hxx (it's not a forecast, but an observation available at t) names(Residuals) <- gsub("k","h",names(Residuals)) # diff --git a/R/rls_summary.R b/R/rls_summary.R index 414e4e1..c3659a1 100644 --- a/R/rls_summary.R +++ b/R/rls_summary.R @@ -104,7 +104,7 @@ rls_summary <- function(object, scoreperiod = NA, scorefun = rmse, usecomplete = #abscv <- abs(s/m) # # An AR1 coefficient can tell a bit about the behaviour of the coefficient # x <- c(val) - # xl1 <- lg(x,1) + # xl1 <- lagdf(x,1) # c(mean=m, sd=s, min=min(val,na.rm=TRUE), max=max(val,na.rm=TRUE)) #coefvar=abscv, skewness=skewness(val, na.rm=TRUE))#, ar1=unname(lm(x ~ xl1)$coefficients[2])) })) diff --git a/data/all/make.R b/data/all/make.R index 18be8d5..0fd5222 100644 --- a/data/all/make.R +++ b/data/all/make.R @@ -35,7 +35,7 @@ for (ii in 1:length(nms)) { i <- i[grep("k[[:digit:]]+$", names(data_or)[i])] # # - data[[nms[ii]]] <- lg(data_or[ ,i], -1:-length(i)) + data[[nms[ii]]] <- lagdf(data_or[ ,i], -1:-length(i)) names(data[[nms[ii]]]) <- pst("k", 1:length(i)) row.names(data[[nms[ii]]]) <- NULL data[[nms[ii]]] <- as.data.frame(data[[nms[ii]]]) diff --git a/make.R b/make.R index ebb44d3..a7e27cc 100644 --- a/make.R +++ b/make.R @@ -1,19 +1,19 @@ #---------------------------------------------------------------- -# These packages must be installed -install.packages("Rcpp") -install.packages("R6") -install.packages("splines") -install.packages("digest") -# cpp matrix library -install.packages("RcppArmadillo") -# For develop install -install.packages("devtools") -install.packages("roxygen2") -# For testing and building vignettes -install.packages("rmarkdown") -install.packages("R.rsp") -install.packages("data.table") -install.packages("plotly") +## # These packages must be installed +## install.packages("Rcpp") +## install.packages("R6") +## install.packages("splines") +## install.packages("digest") +## # cpp matrix library +## install.packages("RcppArmadillo") +## # For develop install +## install.packages("devtools") +## install.packages("roxygen2") +## # For testing and building vignettes +## install.packages("rmarkdown") +## install.packages("R.rsp") +## install.packages("data.table") +## install.packages("plotly") @@ -87,10 +87,10 @@ system("R CMD check ../onlineforecast_1.0.0.tar.gz") # Install rtools # Run in R: #writeLines('PATH="${RTOOLS40_HOME}\\usr\\bin;${PATH}"', con = "~/.Renviron") -# Check if rtools are found: +# Restart R and check if rtools are found: #Sys.which("make") -# Must have Makevars and Makevars.win +# Must have Makevars and Makevars.win in "src" # Make the two files, find them and copy into "src" #library("RcppArmadillo") #RcppArmadillo.package.skeleton("tmp-pkg") diff --git a/vignettes/forecast-evaluation.Rmd b/vignettes/forecast-evaluation.Rmd index 33bf041..516a1a1 100644 --- a/vignettes/forecast-evaluation.Rmd +++ b/vignettes/forecast-evaluation.Rmd @@ -308,7 +308,7 @@ for(nm in nms[-1]){ ok <- as.data.frame(ok) names(ok) <- pst("k",kseq) # Lag to match resiuduals in time -ok <- lg(ok, "+k") +ok <- lagdf(ok, "+k") # Only the score period ok <- ok & D$scoreperiod # Finally, the vector with TRUE for all points with no NAs for any forecast @@ -339,7 +339,7 @@ RMSE <- sapply(nms, function(nm){ ```{r, include=FALSE} # sapply(kseq, function(k){ -# rmse(y - lg(YhatDM[ ,pst("k",k)], k)) +# rmse(y - lagdf(YhatDM[ ,pst("k",k)], k)) # # hej det er vilfred jeg er peders søn og jeg elsker min far go jeg god til matematik og jeg elsker også min mor # }) ``` diff --git a/vignettes/setup-data.Rmd b/vignettes/setup-data.Rmd index 727d137..c53b3a5 100644 --- a/vignettes/setup-data.Rmd +++ b/vignettes/setup-data.Rmd @@ -291,7 +291,7 @@ legend("topright", c("8-step forecasts","Observations"), bg="white", lty=1, col= Notice how the are not aligned, since the forecasts are 8 hours ahead. To align them the forecasts must be lagged 8 steps by: ```{r} -plot(D$t[i], lg(D$I$k8[i], 8), type="l", col=2, xlab="Time", ylab="Global radiation (W/m²)") +plot(D$t[i], lagvec(D$I$k8[i], 8), type="l", col=2, xlab="Time", ylab="Global radiation (W/m²)") lines(D$t[i], D$I.obs[i]) legend("topright", c("8-step forecasts lagged","Observations"), bg="white", lty=1, col=2:1) ``` @@ -334,7 +334,7 @@ example the heatload vs. ambient temperature 8-step forecast: ```{r, fig.width=2*fhs, fig.height=fhs, out.width=ows2} par(mfrow=c(1,2)) plot(D$Ta$k8, D$heatload) -plot(lg(D$Ta$k8, 8), D$heatload) +plot(lagvec(D$Ta$k8, 8), D$heatload) ``` So lagging (thus aligning in time) makes less slightly less scatter. @@ -350,7 +350,7 @@ Just as a quick side note: This is the principle used for fitting onlineforecast models, simply shift forecasts to align with the observations: ```{r, fig.width=fhs, fig.height=fhs, out.width=ows} ## Lag the 8-step forecasts to be aligned with the observations -x <- lg(D$I$k8, 8) +x <- lagvec(D$I$k8, 8) ## Take a smaller range x <- x[i] ## Take the observations -- GitLab