Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#----------------------------------------------------------------
# Init by deleting all variables and functions
rm(list=ls())
# Set the working directory
setwd("/home/hgb/git/onlineforecast/misc-R/quantile/")
#----------------------------------------------------------------
#----------------------------------------------------------------
# Exercise on RLS
# Init
rm(list = ls())
sapply(dir("functions",full.names=TRUE), source)
# A function for fitting a recursive least squares estimation
source("rls.R")
rls
#----------------------------------------------------------------
#----------------------------------------------------------------
# Generate some data from a linear model
n <- 200
x <- runif(n)
beta0 <- 2
beta1 <- -3
y <- beta0 + beta1 * x + rnorm(n, sd=0.1)
# Estimate the coefficients
fit <- lm(y ~ x)
plot(x, y)
abline(fit)
fit
# Generate again with other coefficient values
x1 <- runif(n)
beta0 <- -2
beta1 <- 3
y1 <- beta0 + beta1 * x1 + rnorm(n, sd=0.1)
# Estimate again
fit1 <- lm(y1 ~ x1)
plot(x1, y1)
abline(fit1)
fit
# Combine them as two "periods",
# such that at the start of the the second part, at i=201, the
# coefficients change and have different values
X <- data.frame(y=c(y,y1), x=c(x,x1))
# See clearly there is two "regimes"
plot(X$x, X$y)
# As a time series you only see the change in mean
plot(X$y)
# Fit a linear regression on the combined data
lm(y ~ x, X)
#----------------------------------------------------------------
#----------------------------------------------------------------
# Fit a recursive least squares (linear regression) on the combined data
val <- rls(y ~ x, lambda = 0.97, data = X, k = 1)
# Plot the tracked coefficients
colnames(val$Theta) <- c("beta0","beta1")
plot.ts(val$Theta)
plot.ts(val$y)
lines(val$y - val$residuals, col ="red")
#----------------------------------------------------------------
setwd("/home/hgb/git/onlineforecast/")
library(devtools)
library(splines)
library(quantreg)
load_all(".") # need to be under the package directory
## create an fcst matrix of the input data
createFCSTmatrix <- function(vec, kHor) {
# Create a matrix of the temperature measurements
# vec: vector of temperature measurements
# kHor: horizon of the forecast
# Return: matrix of the temperature measurements
N <- length(vec)
M <- length(kHor)
mat <- matrix(NA, nrow = N, ncol = M)
for (i in 1:(N - M)) {
mat[i, ] <- vec[i:(i + M - 1)]
}
colnames(mat) <- paste0("k", kHor)
return(mat)
}
inputX <- createFCSTmatrix(X$x, kHor = 0:24)
D <- list()
D$t <- seq_along(X$x)
D$x <- inputX
D$y <- X$y
## subset the data, as the algorithm does not handle NA! I had forgotten that.
## it is due to the calculation for the simplex, which is not defined for NA
#class(D) <- "data.list"
idx <- in_range(1,D$t,350)
Dtrain <- list()
Dtrain$t <- D$t[idx]
Dtrain$x <- inputX[idx,]
Dtrain$y <- X$y[idx]
## needs to have scoreperiod, for computing the loss
Dtrain$scoreperiod <- in_range(1,D$t,300)
class(Dtrain) <- "data.list"
str(Dtrain)
## Create the model, where N1 is the initial number of data points - for the cold start
model <- qmodel$new(N1 = 30)
## The output is the y
model$output = "y"
## The design matrix, intercept and the x
model$add_inputs(Xinput = "x",
mu = "one()")
## Add the regularization parameter, lambda, and the bounds
model$add_regprm("rls_prm(lambda=0.9)")
# model$add_prmbounds(lambda = c(0.99, 0.999, 0.9999))
### Setup of the booking keeping matrix -
##############################################################
####### THIS SHOULD BE DONE IN quantile_fit.R ################
##############################################################
## First 2 are the intercept and the x
model$IX <- 0:1
## The number of predictors, we could just use the length of the input matrix or the IX
model$K <- 2
## The output column is the 2nd one
model$Iy <- 2
## Select which quantiles we want to optimise and fit
model$tau <- c(0.05,0.2, 0.5, 0.8,0.95)
## Select which prediction horizon
model$kseq <- 0:24
#opt_model <- quantile_optim(model = model, data = Dtrain)
PAR <- c("lambda" = 0.999)
quantile_fit(prm = PAR, model = model, data = Dtrain, q = model$tau[1])
PAR <- c("lambda" = 0.999)
quantile_fit(prm = PAR, model = model, data = Dtrain, q = model$tau[2])
PAR <- c("lambda" = 0.97)
quantile_fit(prm = PAR, model = model, data = Dtrain, q = model$tau[3])
PAR <- c("lambda" = 0.999)
quantile_fit(prm = PAR, model = model, data = Dtrain, q = model$tau[4])
PAR <- c("lambda" = 0.999)
quantile_fit(prm = PAR, model = model, data = Dtrain, q = model$tau[5])
## compute the predictions from the input data and the estimated coefficeints.
Pred_model <- quantile_predict(model = model, datatr = model$datatr)
par(mfrow = c(2,1))
plot(1:length(Dtrain$y), rep(NA, length(Dtrain$y)), type = "l", ylim = range(Dtrain$y))
polygon(c(1:length(Pred_model$q0.05[,"k1"]), rev(1:length(Pred_model$q0.95[,"k1"]))), c(Pred_model$q0.05[,"k1"], rev(Pred_model$q0.95[,"k1"])), col = "grey80", border = NA)
lines(Dtrain$y, col = "black")
lines(Pred_model$q0.5[,"k1"], type = "l", col = "red")
lines(val$y - val$residuals, col ="blue")
legend("bottomleft", legend = c("y", "RLS pred", "Quantile Pred"), col = c("black", "blue", "red"), lty = 1, ncol = 3, bty = "n", cex = 1.4)
plot(val$Theta[,"beta0"], type = "l", col = "blue", ylim = range(val$Theta, na.rm = TRUE))
lines(model$beta$q0.5$k1[,2], lty = 1, col = "red")
lines(val$Theta[,"beta1"], type = "l", lty = 2, col = "blue")
lines(model$beta$q0.5$k1[,1], type = "l", lty = 2, col = "red")
legend(x = 50, y = 0, legend = c("RLS Intercept", "RLS Slope", "Quantile Intercept", "Quantile Slope"), col = c("blue", "blue", "red", "red"), lty = c(1,2,1,2), ncol = 2, bty = "n", cex = 1.4)