#---------------------------------------------------------------- # Init by deleting all variables and functions rm(list=ls()) # Set the working directory setwd("/home/hgb/git/onlineforecast/misc-R/quantile/") #---------------------------------------------------------------- #---------------------------------------------------------------- # Exercise on RLS # Init rm(list = ls()) sapply(dir("functions",full.names=TRUE), source) # A function for fitting a recursive least squares estimation source("rls.R") rls #---------------------------------------------------------------- #---------------------------------------------------------------- # Generate some data from a linear model n <- 200 x <- runif(n) beta0 <- 2 beta1 <- -3 y <- beta0 + beta1 * x + rnorm(n, sd=0.1) # Estimate the coefficients fit <- lm(y ~ x) plot(x, y) abline(fit) fit # Generate again with other coefficient values x1 <- runif(n) beta0 <- -2 beta1 <- 3 y1 <- beta0 + beta1 * x1 + rnorm(n, sd=0.1) # Estimate again fit1 <- lm(y1 ~ x1) plot(x1, y1) abline(fit1) fit # Combine them as two "periods", # such that at the start of the the second part, at i=201, the # coefficients change and have different values X <- data.frame(y=c(y,y1), x=c(x,x1)) # See clearly there is two "regimes" plot(X$x, X$y) # As a time series you only see the change in mean plot(X$y) # Fit a linear regression on the combined data lm(y ~ x, X) #---------------------------------------------------------------- #---------------------------------------------------------------- # Fit a recursive least squares (linear regression) on the combined data val <- rls(y ~ x, lambda = 0.97, data = X, k = 1) # Plot the tracked coefficients colnames(val$Theta) <- c("beta0","beta1") plot.ts(val$Theta) plot.ts(val$y) lines(val$y - val$residuals, col ="red") #---------------------------------------------------------------- setwd("/home/hgb/git/onlineforecast/") library(devtools) library(splines) library(quantreg) load_all(".") # need to be under the package directory ## create an fcst matrix of the input data createFCSTmatrix <- function(vec, kHor) { # Create a matrix of the temperature measurements # vec: vector of temperature measurements # kHor: horizon of the forecast # Return: matrix of the temperature measurements N <- length(vec) M <- length(kHor) mat <- matrix(NA, nrow = N, ncol = M) for (i in 1:(N - M)) { mat[i, ] <- vec[i:(i + M - 1)] } colnames(mat) <- paste0("k", kHor) return(mat) } inputX <- createFCSTmatrix(X$x, kHor = 0:24) D <- list() D$t <- seq_along(X$x) D$x <- inputX D$y <- X$y ## subset the data, as the algorithm does not handle NA! I had forgotten that. ## it is due to the calculation for the simplex, which is not defined for NA #class(D) <- "data.list" idx <- in_range(1,D$t,350) Dtrain <- list() Dtrain$t <- D$t[idx] Dtrain$x <- inputX[idx,] Dtrain$y <- X$y[idx] ## needs to have scoreperiod, for computing the loss Dtrain$scoreperiod <- in_range(1,D$t,300) class(Dtrain) <- "data.list" str(Dtrain) ## Create the model, where N1 is the initial number of data points - for the cold start model <- qmodel$new(N1 = 30) ## The output is the y model$output = "y" ## The design matrix, intercept and the x model$add_inputs(Xinput = "x", mu = "one()") ## Add the regularization parameter, lambda, and the bounds model$add_regprm("rls_prm(lambda=0.9)") # model$add_prmbounds(lambda = c(0.99, 0.999, 0.9999)) ### Setup of the booking keeping matrix - ############################################################## ####### THIS SHOULD BE DONE IN quantile_fit.R ################ ############################################################## ## First 2 are the intercept and the x model$IX <- 0:1 ## The number of predictors, we could just use the length of the input matrix or the IX model$K <- 2 ## The output column is the 2nd one model$Iy <- 2 ## Select which quantiles we want to optimise and fit model$tau <- c(0.05,0.2, 0.5, 0.8,0.95) ## Select which prediction horizon model$kseq <- 0:24 #opt_model <- quantile_optim(model = model, data = Dtrain) PAR <- c("lambda" = 0.999) quantile_fit(prm = PAR, model = model, data = Dtrain, q = model$tau[1]) PAR <- c("lambda" = 0.999) quantile_fit(prm = PAR, model = model, data = Dtrain, q = model$tau[2]) PAR <- c("lambda" = 0.97) quantile_fit(prm = PAR, model = model, data = Dtrain, q = model$tau[3]) PAR <- c("lambda" = 0.999) quantile_fit(prm = PAR, model = model, data = Dtrain, q = model$tau[4]) PAR <- c("lambda" = 0.999) quantile_fit(prm = PAR, model = model, data = Dtrain, q = model$tau[5]) ## compute the predictions from the input data and the estimated coefficeints. Pred_model <- quantile_predict(model = model, datatr = model$datatr) par(mfrow = c(2,1)) plot(1:length(Dtrain$y), rep(NA, length(Dtrain$y)), type = "l", ylim = range(Dtrain$y)) polygon(c(1:length(Pred_model$q0.05[,"k1"]), rev(1:length(Pred_model$q0.95[,"k1"]))), c(Pred_model$q0.05[,"k1"], rev(Pred_model$q0.95[,"k1"])), col = "grey80", border = NA) lines(Dtrain$y, col = "black") lines(Pred_model$q0.5[,"k1"], type = "l", col = "red") lines(val$y - val$residuals, col ="blue") legend("bottomleft", legend = c("y", "RLS pred", "Quantile Pred"), col = c("black", "blue", "red"), lty = 1, ncol = 3, bty = "n", cex = 1.4) plot(val$Theta[,"beta0"], type = "l", col = "blue", ylim = range(val$Theta, na.rm = TRUE)) lines(model$beta$q0.5$k1[,2], lty = 1, col = "red") lines(val$Theta[,"beta1"], type = "l", lty = 2, col = "blue") lines(model$beta$q0.5$k1[,1], type = "l", lty = 2, col = "red") legend(x = 50, y = 0, legend = c("RLS Intercept", "RLS Slope", "Quantile Intercept", "Quantile Slope"), col = c("blue", "blue", "red", "red"), lty = c(1,2,1,2), ncol = 2, bty = "n", cex = 1.4)