Skip to content
Snippets Groups Projects
adaptive_quantile.R 5.63 KiB
Newer Older
  • Learn to ignore specific revisions
  • hgb's avatar
    hgb committed
    #----------------------------------------------------------------
    # Init by deleting all variables and functions
    rm(list=ls())
    # Set the working directory
    setwd("/home/hgb/git/onlineforecast/misc-R/quantile/")
    #----------------------------------------------------------------
    
    
    #----------------------------------------------------------------
    # Exercise on RLS
    
    # Init
    rm(list = ls())
    sapply(dir("functions",full.names=TRUE), source)
    
    
    # A function for fitting a recursive least squares estimation
    source("rls.R")
    rls
    #----------------------------------------------------------------
    
    
    #----------------------------------------------------------------
    # Generate some data from a linear model
    n <- 200
    x <- runif(n)
    beta0 <- 2
    beta1 <- -3
    y <- beta0 + beta1 * x + rnorm(n, sd=0.1)
    
    # Estimate the coefficients
    fit <- lm(y ~ x)
    plot(x, y)
    abline(fit)
    fit
    
    # Generate again with other coefficient values
    x1 <- runif(n)
    beta0 <- -2
    beta1 <- 3
    y1 <- beta0 + beta1 * x1 + rnorm(n, sd=0.1)
    
    # Estimate again
    fit1 <- lm(y1 ~ x1)
    plot(x1, y1)
    abline(fit1)
    fit
    
    # Combine them as two "periods",
    # such that at the start of the the second part, at i=201, the 
    # coefficients change and have different values
    X <- data.frame(y=c(y,y1), x=c(x,x1))
    
    # See clearly there is two "regimes"
    plot(X$x, X$y)
    
    # As a time series you only see the change in mean
    plot(X$y)
    
    # Fit a linear regression on the combined data
    lm(y ~ x, X)
    #----------------------------------------------------------------
    
    
    #----------------------------------------------------------------
    # Fit a recursive least squares (linear regression) on the combined data
    val <- rls(y ~ x, lambda = 0.97, data = X, k = 1)
    
    # Plot the tracked coefficients
    colnames(val$Theta) <- c("beta0","beta1")
    plot.ts(val$Theta)
    
    
    plot.ts(val$y)
    lines(val$y - val$residuals, col ="red")
    #----------------------------------------------------------------
    
    setwd("/home/hgb/git/onlineforecast/")
    library(devtools)
    library(splines)
    library(quantreg)
    load_all(".") # need to be under the package directory
    
    ## create an fcst matrix of the input data
    createFCSTmatrix <- function(vec, kHor) {
      # Create a matrix of the temperature measurements
      # vec: vector of temperature measurements
      # kHor: horizon of the forecast
      # Return: matrix of the temperature measurements
      N <- length(vec)
      M <- length(kHor)
      mat <- matrix(NA, nrow = N, ncol = M)
      for (i in 1:(N - M)) {
        mat[i, ] <- vec[i:(i + M - 1)]
      }
      colnames(mat) <- paste0("k", kHor)
      return(mat)
    }
    
    inputX <- createFCSTmatrix(X$x, kHor = 0:24)
    
    D <- list()
    D$t <- seq_along(X$x)
    D$x <- inputX
    D$y <- X$y
    
    ## subset the data, as the algorithm does not handle NA! I had forgotten that.
    ## it is due to the calculation for the simplex, which is not defined for NA
    #class(D) <- "data.list"
    idx <-  in_range(1,D$t,350)
    Dtrain <- list()
    Dtrain$t <- D$t[idx]
    Dtrain$x <- inputX[idx,]
    Dtrain$y <- X$y[idx]
    
    ## needs to have scoreperiod, for computing the loss
    Dtrain$scoreperiod <- in_range(1,D$t,300)
    
    class(Dtrain) <- "data.list"
    
    str(Dtrain)
    
    ## Create the model, where N1 is the initial number of data points - for the cold start
    model <- qmodel$new(N1 = 30)
    ## The output is the y
    model$output = "y"
    ## The design matrix, intercept and the x
    model$add_inputs(Xinput = "x",
                    mu = "one()")
    
    ## Add the regularization parameter, lambda, and the bounds
    model$add_regprm("rls_prm(lambda=0.9)")
    
    # model$add_prmbounds(lambda = c(0.99, 0.999, 0.9999))
    
    ### Setup of the booking keeping matrix - 
    ##############################################################
    ####### THIS SHOULD BE DONE IN quantile_fit.R ################
    ##############################################################
    ## First 2 are the intercept and the x
    model$IX <- 0:1
    ## The number of predictors, we could just use the length of the input matrix or the IX
    model$K <- 2
    ## The output column is the 2nd one
    model$Iy <- 2
    
    ## Select which quantiles we want to optimise and fit
    model$tau <- c(0.05,0.2, 0.5, 0.8,0.95)
    ## Select which prediction horizon
    model$kseq <- 0:24
    
    #opt_model <- quantile_optim(model = model, data = Dtrain)
    
    PAR <- c("lambda" = 0.999)
    quantile_fit(prm = PAR, model = model, data = Dtrain, q = model$tau[1])
    PAR <- c("lambda" = 0.999)
    quantile_fit(prm = PAR, model = model, data = Dtrain, q = model$tau[2])
    PAR <- c("lambda" = 0.97)
    quantile_fit(prm = PAR, model = model, data = Dtrain, q = model$tau[3])
    PAR <- c("lambda" = 0.999)
    quantile_fit(prm = PAR, model = model, data = Dtrain, q = model$tau[4])
    PAR <- c("lambda" = 0.999)
    quantile_fit(prm = PAR, model = model, data = Dtrain, q = model$tau[5])
    
    ## compute the predictions from the input data and the estimated coefficeints.
    Pred_model <- quantile_predict(model = model, datatr = model$datatr)
    
    
    par(mfrow = c(2,1))
    
    plot(1:length(Dtrain$y), rep(NA, length(Dtrain$y)), type = "l", ylim = range(Dtrain$y))
    polygon(c(1:length(Pred_model$q0.05[,"k1"]), rev(1:length(Pred_model$q0.95[,"k1"]))), c(Pred_model$q0.05[,"k1"], rev(Pred_model$q0.95[,"k1"])), col = "grey80", border = NA)
    
    
    lines(Dtrain$y, col = "black")
    lines(Pred_model$q0.5[,"k1"], type = "l", col = "red")
    
    lines(val$y - val$residuals, col ="blue")
    legend("bottomleft", legend = c("y", "RLS pred", "Quantile Pred"), col = c("black", "blue", "red"), lty = 1, ncol = 3, bty = "n", cex = 1.4)
    
    plot(val$Theta[,"beta0"], type = "l", col = "blue", ylim = range(val$Theta, na.rm = TRUE))
    lines(model$beta$q0.5$k1[,2], lty = 1, col = "red")
    lines(val$Theta[,"beta1"], type = "l", lty = 2, col = "blue")
    lines(model$beta$q0.5$k1[,1], type = "l", lty = 2, col = "red")
    legend(x = 50, y = 0, legend = c("RLS Intercept", "RLS Slope", "Quantile Intercept", "Quantile Slope"), col = c("blue", "blue", "red", "red"), lty = c(1,2,1,2), ncol = 2, bty = "n", cex = 1.4)