## create list for matrices
<- function(Ft, Gt, Wt_star){
set_up_dlm_matrices_unknown_v if(!is.array(Gt)){
Stop("Gt and Ft should be array")
}if(missing(Wt_star)){
return(list(Ft=Ft, Gt=Gt))
else{
}return(list(Ft=Ft, Gt=Gt, Wt_star=Wt_star))
}
}
## create list for initial states
<- function(m0, C0_star, n0, S0){
set_up_initial_states_unknown_v return(list(m0=m0, C0_star=C0_star, n0=n0, S0=S0))
}
<- function(data, matrices,
forward_filter_unknown_v
initial_states, delta){## retrieve dataset
<- data$yt
yt <- length(yt)
T
## retrieve matrices
<- matrices$Ft
Ft <- matrices$Gt
Gt if(missing(delta)){
<- matrices$Wt_star
Wt_star
}
## retrieve initial state
<- initial_states$m0
m0 <- initial_states$C0_star
C0_star <- initial_states$n0
n0 <- initial_states$S0
S0 <- S0*C0_star
C0
## create placeholder for results
<- dim(Gt)[1]
d <- matrix(0, nrow=T, ncol=d)
at <- array(0, dim=c(d, d, T))
Rt <- numeric(T)
ft <- numeric(T)
Qt <- matrix(0, nrow=T, ncol=d)
mt <- array(0, dim=c(d, d, T))
Ct <- numeric(T)
et <- numeric(T)
nt <- numeric(T)
St <- numeric(T)
dt
# moments of priors at t
for(i in 1:T){
if(i == 1){
<- Gt[, , i] %*% m0
at[i, ] <- Gt[, , i] %*% C0 %*% t(Gt[, , i])
Pt <- 0.5*Pt + 0.5*t(Pt)
Pt if(missing(delta)){
<- Wt_star[, , i]*S0
Wt <- Pt + Wt
Rt[, , i] <- 0.5*Rt[,,i]+0.5*t(Rt[,,i])
Rt[,,i] else{
}<- Pt/delta
Rt[, , i] <- 0.5*Rt[,,i]+0.5*t(Rt[,,i])
Rt[,,i]
}
else{
}<- Gt[, , i] %*% t(mt[i-1, , drop=FALSE])
at[i, ] <- Gt[, , i] %*% Ct[, , i-1] %*% t(Gt[, , i])
Pt if(missing(delta)){
<- Wt_star[, , i] * St[i-1]
Wt <- Pt + Wt
Rt[, , i] =0.5*Rt[,,i]+0.5*t(Rt[,,i])
Rt[,,i]else{
}<- Pt/delta
Rt[, , i] <- 0.5*Rt[,,i]+0.5*t(Rt[,,i])
Rt[,,i]
}
}
# moments of one-step forecast:
<- t(Ft[, , i]) %*% t(at[i, , drop=FALSE])
ft[i] <- t(Ft[, , i]) %*% Rt[, , i] %*% Ft[, , i] +
Qt[i] ifelse(i==1, S0, St[i-1])
<- yt[i] - ft[i]
et[i]
<- ifelse(i==1, n0, nt[i-1]) + 1
nt[i] <- ifelse(i==1, S0,
St[i] -1])*(1 + 1/nt[i]*(et[i]^2/Qt[i]-1))
St[i
# moments of posterior at t:
<- Rt[, , i] %*% Ft[, , i] / Qt[i]
At
<- at[i, ] + t(At) * et[i]
mt[i, ] <- St[i]/ifelse(i==1, S0,
Ct[, , i] -1])*(Rt[, , i] - Qt[i] * At %*% t(At))
St[i<- 0.5*Ct[,,i]+0.5*t(Ct[,,i])
Ct[,,i]
}cat("Forward filtering is completed!\n")
return(list(mt = mt, Ct = Ct, at = at, Rt = Rt,
ft = ft, Qt = Qt, et = et,
nt = nt, St = St))
}
### smoothing function ###
<- function(data, matrices,
backward_smoothing_unknown_v
posterior_states,delta){## retrieve data
<- data$yt
yt <- length(yt)
T
## retrieve matrices
<- matrices$Ft
Ft <- matrices$Gt
Gt
## retrieve matrices
<- posterior_states$mt
mt <- posterior_states$Ct
Ct <- posterior_states$Rt
Rt <- posterior_states$nt
nt <- posterior_states$St
St <- posterior_states$at
at
## create placeholder for posterior moments
<- matrix(NA, nrow = dim(mt)[1], ncol = dim(mt)[2])
mnt <- array(NA, dim = dim(Ct))
Cnt <- numeric(T)
fnt <- numeric(T)
Qnt
for(i in T:1){
if(i == T){
<- mt[i, ]
mnt[i, ] <- Ct[, , i]
Cnt[, , i] else{
}if(missing(delta)){
<- chol2inv(chol(Rt[, , i+1]))
inv_Rtp1 <- Ct[, , i] %*% t(Gt[, , i+1]) %*% inv_Rtp1
Bt <- mt[i, ] + Bt %*% (mnt[i+1, ] - at[i+1, ])
mnt[i, ] <- Ct[, , i] + Bt %*% (Cnt[, , i+1] -
Cnt[, , i] +1]) %*% t(Bt)
Rt[, , i<- 0.5*Cnt[,,i]+0.5*t(Cnt[,,i])
Cnt[,,i] else{
}<- solve(Gt[, , i+1])
inv_Gt <- (1-delta)*mt[i, ] +
mnt[i, ] *inv_Gt %*% t(mnt[i+1, ,drop=FALSE])
delta<- (1-delta)*Ct[, , i] +
Cnt[, , i] ^2*inv_Gt %*% Cnt[, , i + 1] %*% t(inv_Gt)
delta<- 0.5*Cnt[,,i]+0.5*t(Cnt[,,i])
Cnt[,,i]
}
}<- t(Ft[, , i]) %*% t(mnt[i, , drop=FALSE])
fnt[i] <- t(Ft[, , i]) %*% t(Cnt[, , i]) %*% Ft[, , i]
Qnt[i]
}for(i in 1:T){
=St[T]*Cnt[,,i]/St[i]
Cnt[,,i]=St[T]*Qnt[i]/St[i]
Qnt[i]
}cat("Backward smoothing is completed!\n")
return(list(mnt = mnt, Cnt = Cnt, fnt=fnt, Qnt=Qnt))
}
## Forecast Distribution for k step
<- function(posterior_states, k,
forecast_function_unknown_v
matrices, delta){
## retrieve matrices
<- matrices$Ft
Ft <- matrices$Gt
Gt if(missing(delta)){
<- matrices$Wt_star
Wt_star
}
<- posterior_states$mt
mt <- posterior_states$Ct
Ct <- posterior_states$St
St <- posterior_states$at
at
## set up matrices
<- dim(mt)[1] # time points
T <- dim(mt)[2] # dimension of state parameter vector
d
## placeholder for results
<- matrix(NA, nrow = k, ncol = d)
at <- array(NA, dim=c(d, d, k))
Rt <- numeric(k)
ft <- numeric(k)
Qt
for(i in 1:k){
## moments of state distribution
if(i == 1){
<- Gt[, , T+i] %*% t(mt[T, , drop=FALSE])
at[i, ]
if(missing(delta)){
<- Gt[, , T+i] %*% Ct[, , T] %*%
Rt[, , i] t(Gt[, , T+i]) + St[T]*Wt_star[, , T+i]
else{
}<- Gt[, , T+i] %*% Ct[, , T] %*%
Rt[, , i] t(Gt[, , T+i])/delta
}<- 0.5*Rt[,,i]+0.5*t(Rt[,,i])
Rt[,,i]
else{
}<- Gt[, , T+i] %*% t(at[i-1, , drop=FALSE])
at[i, ] if(missing(delta)){
<- Gt[, , T+i] %*% Rt[, , i-1] %*%
Rt[, , i] t(Gt[, , T+i]) + St[T]*Wt_star[, , T + i]
else{
}<- Gt[, , T+i] %*% Rt[, , i-1] %*%
Rt[, , i] t(Gt[, , T+i])/delta
}<- 0.5*Rt[,,i]+0.5*t(Rt[,,i])
Rt[,,i]
}
## moments of forecast distribution
<- t(Ft[, , T+i]) %*% t(at[i, , drop=FALSE])
ft[i] <- t(Ft[, , T+i]) %*% Rt[, , i] %*% Ft[, , T+i] +
Qt[i]
St[T]
}cat("Forecasting is completed!\n") # indicator of completion
return(list(at=at, Rt=Rt, ft=ft, Qt=Qt))
}
## obtain 95% credible interval
<- function(ft, Qt, nt,
get_credible_interval_unknown_v quantile = c(0.025, 0.975)){
<- matrix(0, nrow=length(ft), ncol=2)
bound
if ((length(nt)==1)){
for (t in 1:length(ft)){
<- qt(quantile[1], df = nt)
t_quantile 1] <- ft[t] + t_quantile*sqrt(as.numeric(Qt[t]))
bound[t,
# upper bound of 95% credible interval
<- qt(quantile[2], df = nt)
t_quantile 2] <- ft[t] +
bound[t, *sqrt(as.numeric(Qt[t]))}
t_quantileelse{
}# lower bound of 95% credible interval
for (t in 1:length(ft)){
<- qt(quantile[1], df = nt[t])
t_quantile 1] <- ft[t] +
bound[t, *sqrt(as.numeric(Qt[t]))
t_quantile
# upper bound of 95% credible interval
<- qt(quantile[2], df = nt[t])
t_quantile 2] <- ft[t] +
bound[t, *sqrt(as.numeric(Qt[t]))}
t_quantile
}return(bound)
}
Is there a recommended way to recover the parameter vector from the model object?
the question about this dimension of Theta seems to be as follows for seasonal component p = 2m-1 so we get 2(3)-1=5. The problem is that it isn’t clear that p here is the number of frequencies/harmonics. The linear trend component has 1 parameter
101.1 Practice Graded Assignment: NDLM data analysis
This peer-reviewed activity is highly recommended. It does not figure into your grade for this course, but it does provide you with the opportunity to apply what you’ve learned in R and prepare you for your data analysis project in week 5.
The R code below fits a Normal Dynamic Linear Model to the monthly time series of Google trends “hits” for the term “time series”. The model has two components:
- a polynomial model of order 2 and
- a seasonal component with 4 frequencies:
- \omega_1=2\pi/12, (annual cycle)
- \omega_2=2\pi/6 (6 months cycle),
- \omega_3=2\pi/4 and
- \omega_4=2\pi/3.
The model assumes that the observational variance v is unknown and the system variance-covariance matrix W_t is specified using a single discount factor. The discount factor is chosen using an optimality criterion as explained in the course.
You will be asked to modify the code in order to consider a DLM with two components:
- a polynomial model of order 1 and
- a seasonal component that contains a fundamental period of p = 12 and 2 additional harmonics for a total of 3 frequencies: \omega1=2\pi/12, \omega2=2\pi/6 and \omega3=2\pi/4.
You will also need to optimize the choice of the discount factor for this model.
You will be asked to upload pictures summarizing your results.
R code to fit the model:
- requires R packages
gtrends
,anddlm
as well as the files “all_dlm_functions_unknown_v.R” and “discountfactor_selection_functions.R” also provided below.
101.1.1 All dlm functions unknown v
101.1.2 Discount factor selection functions
##################################################
##### using discount factor ##########
##################################################
## compute measures of forecasting accuracy
## MAD: mean absolute deviation
## MSE: mean square error
## MAPE: mean absolute percentage error
## Neg LL: Negative log-likelihood of disc,
## based on the one step ahead forecast distribution
<- function(et, yt, Qt=NA, nt=NA, type){
measure_forecast_accuracy if(type == "MAD"){
<- mean(abs(et))
measure else if(type == "MSE"){
}<- mean(et^2)
measure else if(type == "MAPE"){
}<- mean(abs(et)/yt)
measure else if(type == "NLL"){
}<- log_likelihood_one_step_ahead(et, Qt, nt)
measure else{
}stop("Wrong type!")
}return(measure)
}
## compute log likelihood of one step ahead forecast function
<- function(et, Qt, nt){
log_likelihood_one_step_ahead ## et:the one-step-ahead error
## Qt: variance of one-step-ahead forecast function
## nt: degrees freedom of t distribution
<- length(et)
T =0
auxfor (t in 1:T){
=et[t]/sqrt(Qt[t])
zt=(dt(zt,df=nt[t],log=TRUE)-log(sqrt(Qt[t]))) + aux
aux
} return(-aux)
}
## Maximize log density of one-step-ahead forecast function to select discount factor
<- function(data, matrices, initial_states, df_range, type,
adaptive_dlm forecast=TRUE){
<- NA
measure_best <- numeric(length(df_range))
measure <- data$valid_data
valid_data <- NA
df_opt <- 0
j ## find the optimal discount factor
for(i in df_range){
<- j + 1
j <- forward_filter_unknown_v(data, matrices, initial_states, i)
results_tmp
<- measure_forecast_accuracy(et=results_tmp$et, yt=data$yt,
measure[j] Qt=results_tmp$Qt,
nt=c(initial_states$n0,results_tmp$nt), type=type)
if(j == 1){
<- measure[j]
measure_best <- results_tmp
results_filtered <- i
df_opt else if(measure[j] < measure_best){
}<- measure[j]
measure_best <- results_tmp
results_filtered <- i
df_opt
}
}<- backward_smoothing_unknown_v(data, matrices, results_filtered, delta = df_opt)
results_smoothed if(forecast){
<- forecast_function(results_filtered, length(valid_data),
results_forecast
matrices, df_opt)return(list(results_filtered=results_filtered,
results_smoothed=results_smoothed,
results_forecast=results_forecast,
df_opt = df_opt, measure=measure))
else{
}return(list(results_filtered=results_filtered,
results_smoothed=results_smoothed,
df_opt = df_opt, measure=measure))
}
}
101.1.3 Grading Criteria
The assignment will be graded based on the uploaded pictures summarizing the results. Estimates of some of the model parameters and additional discussion will also be requested.
# download data
library(gtrendsR)
<-gtrends("time series",time="all",hl="en",geo="",onlyInterest=TRUE)
timeseries_data#timeseries_data <- gtrends(c("NHL", "NFL"), time = "today 1-m")
#timeseries_data <- gtrends(keyword = "obama",geo = "US-AL-630",time = "today 1-m")
plot(timeseries_data$interest_over_time)
names(timeseries_data)
[1] "interest_over_time"
=timeseries_data$interest_over_time
timeseries_data=list(yt=timeseries_data$hits)
dataplot(data$yt, type='l', main="Google Trends: time series",
xlab='time', ylab='Google hits', lty=3, ylim=c(0,100))
library(dlm)
=dlmModTrig(s=12,q=4,dV=0,dW=1)
model_seasonal=dlmModPoly(order=2,dV=10,dW=rep(1,2),m0=c(40,0))
model_trend=model_trend+model_seasonal
model$C0=10*diag(10)
model=1
n0=10
S0=length(model$m0)
k=length(data$yt)
T
=array(0,c(1,k,T))
Ft=array(0,c(k,k,T))
Gtfor(t in 1:T){
=model$FF
Ft[,,t]=model$GG
Gt[,,t]
}
source('all_dlm_functions_unknown_v.R')
source('discountfactor_selection_functions.R')
=set_up_dlm_matrices_unknown_v(Ft=Ft,Gt=Gt)
matrices=set_up_initial_states_unknown_v(model$m0,
initial_states$C0,n0,S0)
model
=seq(0.9,1,by=0.005)
df_range
## fit discount DLM
## MSE
<- adaptive_dlm(data, matrices,
results_MSE "MSE",forecast=FALSE) initial_states, df_range,
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Backward smoothing is completed!
## print selected discount factor
print(paste("The selected discount factor:",results_MSE$df_opt))
[1] "The selected discount factor: 0.935"
## retrieve filtered results
<- results_MSE$results_filtered
results_filtered <- get_credible_interval_unknown_v(
ci_filtered $ft,results_filtered$Qt,results_filtered$nt)
results_filtered
## retrieve smoothed results
<- results_MSE$results_smoothed
results_smoothed <- get_credible_interval_unknown_v(
ci_smoothed $fnt, results_smoothed$Qnt,
results_smoothed$nt[length(results_smoothed$fnt)])
results_filtered
## plot smoothing results
par(mfrow=c(1,1), mar = c(3, 4, 2, 1))
<- timeseries_data$date
index plot(index, data$yt, ylab='Google hits',
main = "Google Trends: time series", type = 'l',
xlab = 'time', lty=3,ylim=c(0,100))
lines(index, results_smoothed$fnt, type = 'l', col='blue',
lwd=2)
lines(index, ci_smoothed[, 1], type='l', col='blue', lty=2)
lines(index, ci_smoothed[, 2], type='l', col='blue', lty=2)
# Plot trend and rate of change
par(mfrow=c(2,1), mar = c(3, 4, 2, 1))
plot(index,data$yt,pch=19,cex=0.3,col='lightgray',xlab="time",
ylab="Google hits",main="trend")
lines(index,results_smoothed$mnt[,1],lwd=2,col='magenta')
plot(index,results_smoothed$mnt[,2],col='darkblue',lwd=2,
type='l', ylim=c(-0.6,0.6), xlab="time",
ylab="rate of change")
abline(h=0,col='red',lty=2)
# Plot seasonal components
par(mfrow=c(2,2), mar = c(3, 4, 2, 1))
plot(index,results_smoothed$mnt[,3],lwd=2,col="darkgreen",
type='l', xlab="time",ylab="",main="period=12",
ylim=c(-12,12))
plot(index,results_smoothed$mnt[,5],lwd=2,col="darkgreen",
type='l',xlab="time",ylab="",main="period=6",
ylim=c(-12,12))
plot(index,results_smoothed$mnt[,7],lwd=2,col="darkgreen",
type='l', xlab="time",ylab="",main="period=4",
ylim=c(-12,12))
plot(index,results_smoothed$mnt[,9],lwd=2,col="darkgreen",
type='l', xlab="time",ylab="",main="period=3",
ylim=c(-12,12))
#Estimate for the observational variance: St[T]
$St[T] results_filtered
[1] 18.06464
101.2 My Submission
What modifications to the lines of R-code below have to be made to consider a DLM with the following structure:
- a polynomial model of order 1 and
- a seasonal component that contains a fundamental period of p=12 and 2 additional harmonics for a total of 3 frequencies: \omega_1=2\pi/12, \omega_2=2\pi/6 and \omega_3=2\pi/4.?
Upload the revised lines of R code (in rich text format, not as an R file), that provide the structure of the new model.
Assume dV=0, dW=1 in the seasonal component, dV=10,dW=1,m_0=40,C_0=10I,n_0=1,S0=10 with I being identity matrix with the appropriate dimensions n0=1 and S0=10
library(dlm)
=dlmModTrig(s=12,q=4,dV=0,dW=1)
model_seasonal=dlmModPoly(order=2,dV=10,dW=rep(1,2),m0=c(40,0))
model_trend=model_trend+model_seasonal
model$C0=10*diag(10)
model=1
n0=10 S0
answer:
library(dlm)
=dlmModTrig(s=12,q=3,dV=0,dW=1)
model_seasonal=dlmModPoly(order=1,dV=10,dW=rep(1,1),m0=c(40))
model_trend=model_trend+model_seasonal # superposition
model$C0=10*diag(7)
model=1
n0=10 S0
The new DLM with has the following structure: (a) a polynomial model of order 1 and (b) a seasonal component that contains a fundamental period of p=12 and 2 additional harmonics for a total of 3 frequencies: \omega_1=2\pi/12, \omega_2=2\pi/6 and \omega_3=2\pi/4.
What is the dimension of the state parameter vector _t for this new model that accounts for both components (a) and (b)?
# Dimension of the state parameter vector _t
cat(model$C10) # 3 freqs * 2 states from seasonal + 1 from trend
=length(model$m0)
k=length(data$yt)
T
=array(0,c(1,k,T))
Ft=array(0,c(k,k,T))
Gtfor(t in 1:T){
=model$FF
Ft[,,t]=model$GG
Gt[,,t]
}
source('all_dlm_functions_unknown_v.R')
source('discountfactor_selection_functions.R')
=set_up_dlm_matrices_unknown_v(Ft=Ft,Gt=Gt)
matrices=set_up_initial_states_unknown_v(model$m0,
initial_states$C0,n0,S0)
model
=seq(0.9,1,by=0.005)
df_range
## fit discount DLM
## MSE
<- adaptive_dlm(data, matrices,
results_MSE "MSE",forecast=FALSE) initial_states, df_range,
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Forward filtering is completed!
Backward smoothing is completed!
## print selected discount factor
print(paste("The selected discount factor:",results_MSE$df_opt))
[1] "The selected discount factor: 0.9"
## retrieve filtered results
<- results_MSE$results_filtered
results_filtered <- get_credible_interval_unknown_v(
ci_filtered $ft,results_filtered$Qt,results_filtered$nt)
results_filtered
## retrieve smoothed results
<- results_MSE$results_smoothed
results_smoothed <- get_credible_interval_unknown_v(
ci_smoothed $fnt, results_smoothed$Qnt,
results_smoothed$nt[length(results_smoothed$fnt)]) results_filtered
## plot smoothing results
par(mfrow=c(1,1), mar = c(3, 4, 2, 1))
<- timeseries_data$date
index plot(index, data$yt, ylab='Google hits',
main = "Google Trends: time series", type = 'l',
xlab = 'time', lty=3,ylim=c(0,100))
lines(index, results_smoothed$fnt, type = 'l', col='blue', lwd=2)
lines(index, ci_smoothed[, 1], type='l', col='blue', lty=2)
lines(index, ci_smoothed[, 2], type='l', col='blue', lty=2)
# Plot trend and rate of change
par(mfrow=c(2,1), mar = c(3, 4, 2, 1))
plot(index,data$yt,pch=19,cex=0.3,col='lightgray',xlab="time",
ylab="Google hits",main="trend")
lines(index,results_smoothed$mnt[,1],lwd=2,col='magenta')
plot(index,results_smoothed$mnt[,2],col='darkblue',lwd=2,
type='l', ylim=c(-0.6,0.6), xlab="time",
ylab="rate of change")
abline(h=0,col='red',lty=2)
# Plot seasonal components
par(mfrow=c(2,2), mar = c(3, 4, 2, 1))
plot(index,results_smoothed$mnt[,3],lwd=2,col="darkgreen",
type='l', xlab="time",ylab="",main="period=12",
ylim=c(-12,12))
plot(index,results_smoothed$mnt[,5],lwd=2,col="darkgreen",
type='l',xlab="time",ylab="",main="period=6",
ylim=c(-12,12))
plot(index,results_smoothed$mnt[,7],lwd=2,col="darkgreen",
type='l', xlab="time",ylab="",main="period=4",
ylim=c(-12,12))
# plot(index,results_smoothed$mnt[,9],lwd=2,col="darkgreen",
# type='l', xlab="time",ylab="",main="period=3",
# ylim=c(-12,12))
#Estimate for the observational variance: St[T]
$St[T] results_filtered
[1] 23.61709