adaptive_quantile.R

#----------------------------------------------------------------
# Init by deleting all variables and functions
rm(list=ls())
# Set the working directory
setwd("/home/hgb/git/onlineforecast/misc-R/quantile/")
#----------------------------------------------------------------


#----------------------------------------------------------------
# Exercise on RLS

# Init
rm(list = ls())
sapply(dir("functions",full.names=TRUE), source)


# A function for fitting a recursive least squares estimation
source("rls.R")
rls
#----------------------------------------------------------------


#----------------------------------------------------------------
# Generate some data from a linear model
n <- 200
x <- runif(n)
beta0 <- 2
beta1 <- -3
y <- beta0 + beta1 * x + rnorm(n, sd=0.1)

# Estimate the coefficients
fit <- lm(y ~ x)
plot(x, y)
abline(fit)
fit

# Generate again with other coefficient values
x1 <- runif(n)
beta0 <- -2
beta1 <- 3
y1 <- beta0 + beta1 * x1 + rnorm(n, sd=0.1)

# Estimate again
fit1 <- lm(y1 ~ x1)
plot(x1, y1)
abline(fit1)
fit

# Combine them as two "periods",
# such that at the start of the the second part, at i=201, the 
# coefficients change and have different values
X <- data.frame(y=c(y,y1), x=c(x,x1))

# See clearly there is two "regimes"
plot(X$x, X$y)

# As a time series you only see the change in mean
plot(X$y)

# Fit a linear regression on the combined data
lm(y ~ x, X)
#----------------------------------------------------------------


#----------------------------------------------------------------
# Fit a recursive least squares (linear regression) on the combined data
val <- rls(y ~ x, lambda = 0.97, data = X, k = 1)

# Plot the tracked coefficients
colnames(val$Theta) <- c("beta0","beta1")
plot.ts(val$Theta)


plot.ts(val$y)
lines(val$y - val$residuals, col ="red")
#----------------------------------------------------------------

setwd("/home/hgb/git/onlineforecast/")
library(devtools)
library(splines)
library(quantreg)
load_all(".") # need to be under the package directory

## create an fcst matrix of the input data
createFCSTmatrix <- function(vec, kHor) {
  # Create a matrix of the temperature measurements
  # vec: vector of temperature measurements
  # kHor: horizon of the forecast
  # Return: matrix of the temperature measurements
  N <- length(vec)
  M <- length(kHor)
  mat <- matrix(NA, nrow = N, ncol = M)
  for (i in 1:(N - M)) {
    mat[i, ] <- vec[i:(i + M - 1)]
  }
  colnames(mat) <- paste0("k", kHor)
  return(mat)
}

inputX <- createFCSTmatrix(X$x, kHor = 0:24)

D <- list()
D$t <- seq_along(X$x)
D$x <- inputX
D$y <- X$y

## subset the data, as the algorithm does not handle NA! I had forgotten that.
## it is due to the calculation for the simplex, which is not defined for NA
#class(D) <- "data.list"
idx <-  in_range(1,D$t,350)
Dtrain <- list()
Dtrain$t <- D$t[idx]
Dtrain$x <- inputX[idx,]
Dtrain$y <- X$y[idx]

## needs to have scoreperiod, for computing the loss
Dtrain$scoreperiod <- in_range(1,D$t,300)

class(Dtrain) <- "data.list"

str(Dtrain)

## Create the model, where N1 is the initial number of data points - for the cold start
model <- qmodel$new(N1 = 30)
## The output is the y
model$output = "y"
## The design matrix, intercept and the x
model$add_inputs(Xinput = "x",
                mu = "one()")

## Add the regularization parameter, lambda, and the bounds
model$add_regprm("rls_prm(lambda=0.9)")

# model$add_prmbounds(lambda = c(0.99, 0.999, 0.9999))

### Setup of the booking keeping matrix - 
##############################################################
####### THIS SHOULD BE DONE IN quantile_fit.R ################
##############################################################
## First 2 are the intercept and the x
model$IX <- 0:1
## The number of predictors, we could just use the length of the input matrix or the IX
model$K <- 2
## The output column is the 2nd one
model$Iy <- 2

## Select which quantiles we want to optimise and fit
model$tau <- c(0.05,0.2, 0.5, 0.8,0.95)
## Select which prediction horizon
model$kseq <- 0:24

#opt_model <- quantile_optim(model = model, data = Dtrain)

PAR <- c("lambda" = 0.999)
quantile_fit(prm = PAR, model = model, data = Dtrain, q = model$tau[1])
PAR <- c("lambda" = 0.999)
quantile_fit(prm = PAR, model = model, data = Dtrain, q = model$tau[2])
PAR <- c("lambda" = 0.97)
quantile_fit(prm = PAR, model = model, data = Dtrain, q = model$tau[3])
PAR <- c("lambda" = 0.999)
quantile_fit(prm = PAR, model = model, data = Dtrain, q = model$tau[4])
PAR <- c("lambda" = 0.999)
quantile_fit(prm = PAR, model = model, data = Dtrain, q = model$tau[5])

## compute the predictions from the input data and the estimated coefficeints.
Pred_model <- quantile_predict(model = model, datatr = model$datatr)


par(mfrow = c(2,1))

plot(1:length(Dtrain$y), rep(NA, length(Dtrain$y)), type = "l", ylim = range(Dtrain$y))
polygon(c(1:length(Pred_model$q0.05[,"k1"]), rev(1:length(Pred_model$q0.95[,"k1"]))), c(Pred_model$q0.05[,"k1"], rev(Pred_model$q0.95[,"k1"])), col = "grey80", border = NA)


lines(Dtrain$y, col = "black")
lines(Pred_model$q0.5[,"k1"], type = "l", col = "red")

lines(val$y - val$residuals, col ="blue")
legend("bottomleft", legend = c("y", "RLS pred", "Quantile Pred"), col = c("black", "blue", "red"), lty = 1, ncol = 3, bty = "n", cex = 1.4)

plot(val$Theta[,"beta0"], type = "l", col = "blue", ylim = range(val$Theta, na.rm = TRUE))
lines(model$beta$q0.5$k1[,2], lty = 1, col = "red")
lines(val$Theta[,"beta1"], type = "l", lty = 2, col = "blue")
lines(model$beta$q0.5$k1[,1], type = "l", lty = 2, col = "red")
legend(x = 50, y = 0, legend = c("RLS Intercept", "RLS Slope", "Quantile Intercept", "Quantile Slope"), col = c("blue", "blue", "red", "red"), lty = c(1,2,1,2), ncol = 2, bty = "n", cex = 1.4)