## simulate data
=c(-1,0,1)
y=200
n.all =2 ## p is the order of AR process
p
for (i in 3:n.all) {
set.seed(i)
=runif(1)
Uif(U<=0.5){
=rnorm(1,0.1*y[i-1]+0.1*y[i-2],0.25)
y.newelse if(U>0.8){
}=rnorm(1,0.3*y[i-1]+0.5*y[i-2],0.25)
y.newelse{
}=rnorm(1,0.4*y[i-1]-0.5*y[i-2],0.25)
y.new
}=c(y,y.new)
y }
First I simulate the data using a mixture of AR processes.
The following code is used to determine the number of mixing components in the mixture model. It uses a Gibbs sampler to sample from the posterior distribution of the model parameters and calculates the Deviance Information Criterion (DIC) for models with different numbers of mixing components (2 to 5). The function model_comp_mix
takes the total number of components as an argument and returns the DIC for that model.
Most of the code is identical to the code in the previous section. What we have added is the calculation of the DIC for the mixture model.
## Function to determine the number of mixing components
## It is a combination of posterior inference for mixture model and calculation of DIC
library(MCMCpack)
Loading required package: coda
Loading required package: MASS
##
## Markov Chain Monte Carlo Package (MCMCpack)
## Copyright (C) 2003-2025 Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park
##
## Support provided by the U.S. National Science Foundation
## (Grants SES-0350646 and SES-0350613)
##
library(mvtnorm)
=function(tot_num_comp){
model_comp_mix
## hyperparameters
=matrix(rep(0,p),ncol=1) ## p is the order of AR process
m0=10*diag(p)
C0=0.1*diag(p)
C0.inv=0.02
n0=0.02
d0=tot_num_comp ## let the number of mixing component to vary by input
K=rep(1,K)
a
=matrix(y[(p+1):n.all],ncol=1) ## y_{p+1:T} n.all is the value of T
Y=matrix(c(y[p:(n.all-1)],y[1:(n.all-p)]),nrow=2,byrow=TRUE) ## design matrix F
Fmtx=length(Y)
n
## The code below is used to obtain posterior samples of model parameters
## Just copy from the last lecture
=function(L.cur){
sample_omega=sapply(1:K, function(k){sum(L.cur==k)})
n.vecrdirichlet(1,a+n.vec)
}
=function(beta.cur,omega.cur,nu.cur,y.cur,Fmtx.cur){
sample_L_one=function(k){
prob_k=beta.cur[((k-1)*p+1):(k*p)]
beta.use*dnorm(y.cur,mean=sum(beta.use*Fmtx.cur),sd=sqrt(nu.cur))
omega.cur[k]
}=sapply(1:K, prob_k)
prob.vec=sample(1:K,1,prob=prob.vec/sum(prob.vec))
L.samplereturn(L.sample)
}
=function(y,x,beta.cur,omega.cur,nu.cur){
sample_L=sapply(1:n, function(j){sample_L_one(beta.cur,omega.cur,nu.cur,y.cur=y[j,],Fmtx.cur=x[,j])})
L.newreturn(L.new)
}
=function(L.cur,beta.cur){
sample_nu=n0+n+p*K
n.star=function(idx){
err.y=L.cur[idx]
L.use=beta.cur[((L.use-1)*p+1):(L.use*p)]
beta.use=Y[idx,]-sum(Fmtx[,idx]*beta.use)
errreturn(err^2)
}=function(k.cur){
err.beta=beta.cur[((k.cur-1)*p+1):(k.cur*p)]
beta.use=matrix(beta.use,ncol=1)-m0
beta.use.minus.m0t(beta.use.minus.m0)%*%C0.inv%*%beta.use.minus.m0
}
=d0+sum(sapply(1:n,err.y))+sum(sapply(1:K,err.beta))
d.star1/rgamma(1,shape=n.star/2,rate=d.star/2)
}
=function(k,L.cur,nu.cur){
sample_beta=(L.cur==k)
idx.select=sum(idx.select)
n.kif(n.k==0){
=m0
m.k=C0
C.kelse{
}=Y[idx.select,]
y.tilde.k=Fmtx[,idx.select]
Fmtx.tilde.k=y.tilde.k-t(Fmtx.tilde.k)%*%m0
e.k=t(Fmtx.tilde.k)%*%C0%*%Fmtx.tilde.k+diag(n.k)
Q.k=chol2inv(chol(Q.k))
Q.k.inv=C0%*%Fmtx.tilde.k%*%Q.k.inv
A.k=m0+A.k%*%e.k
m.k=C0-A.k%*%Q.k%*%t(A.k)
C.k
}
rmvnorm(1,m.k,nu.cur*C.k)
}
=20000
nsim
## store parameters
=matrix(0,nrow=p*K,ncol=nsim)
beta.mtx=matrix(0,nrow=n,ncol=nsim)
L.mtx=matrix(0,nrow=K,ncol=nsim)
omega.mtx=rep(0,nsim)
nu.vec
## initial value
=rep(0,p*K)
beta.cur=rep(1,n)
L.cur=rep(1/K,K)
omega.cur=1
nu.cur
## Gibbs Sampler
for (i in 1:nsim) {
set.seed(i)
## sample omega
=sample_omega(L.cur)
omega.cur=omega.cur
omega.mtx[,i]
## sample L
=sample_L(Y,Fmtx,beta.cur,omega.cur,nu.cur)
L.cur=L.cur
L.mtx[,i]
## sample nu
=sample_nu(L.cur,beta.cur)
nu.cur=nu.cur
nu.vec[i]
## sample beta
=as.vector(sapply(1:K, function(k){sample_beta(k,L.cur,nu.cur)}))
beta.cur=beta.cur
beta.mtx[,i]
if(i%%1000==0){
print(i)
}
}
## Now compute DIC for mixture model
## Somilar as the calculation of DIC in Module 2
=function(idx,beta,nu,omega){
cal_log_likelihood_mix_one=rep(0,K)
norm.likfor (k.cur in 1:K) {
=sum(Fmtx[,idx]*beta[((k.cur-1)*p+1):(k.cur*p)])
mean.norm=dnorm(Y[idx,1],mean.norm,sqrt(nu),log=FALSE)
norm.lik[k.cur]
}=log(sum(norm.lik*omega))
log.likreturn(log.lik)
}
=function(beta,nu,omega){
cal_log_likelihood_mixsum(sapply(1:n, function(idx){cal_log_likelihood_mix_one(idx=idx,beta=beta,nu=nu,omega=omega)}))
}
=seq(10001,20000,by=1)
sample.select.idx
=rowMeans(beta.mtx[,sample.select.idx])
beta.mix=mean(nu.vec[sample.select.idx])
nu.mix=rowMeans(omega.mtx[,sample.select.idx])
omega.mix
=cal_log_likelihood_mix(beta.mix,nu.mix,omega.mix)
log.lik.bayes.mix
=sapply(sample.select.idx, function(k){cal_log_likelihood_mix(beta.mtx[,k],nu.vec[k],omega.mtx[,k])})
post.log.lik.mix=mean(post.log.lik.mix)
E.post.log.lik.mix
=2*(log.lik.bayes.mix-E.post.log.lik.mix)
p_DIC.mix
=-2*log.lik.bayes.mix+2*p_DIC.mix
DIC.mix
return(DIC.mix)
}
## Run this code will give you DIC corresponding to mixture model with 2:5 mixing components
=sapply(2:5,model_comp_mix) mix.model.all
[1] 1000
[1] 2000
[1] 3000
[1] 4000
[1] 5000
[1] 6000
[1] 7000
[1] 8000
[1] 9000
[1] 10000
[1] 11000
[1] 12000
[1] 13000
[1] 14000
[1] 15000
[1] 16000
[1] 17000
[1] 18000
[1] 19000
[1] 20000
[1] 1000
[1] 2000
[1] 3000
[1] 4000
[1] 5000
[1] 6000
[1] 7000
[1] 8000
[1] 9000
[1] 10000
[1] 11000
[1] 12000
[1] 13000
[1] 14000
[1] 15000
[1] 16000
[1] 17000
[1] 18000
[1] 19000
[1] 20000
[1] 1000
[1] 2000
[1] 3000
[1] 4000
[1] 5000
[1] 6000
[1] 7000
[1] 8000
[1] 9000
[1] 10000
[1] 11000
[1] 12000
[1] 13000
[1] 14000
[1] 15000
[1] 16000
[1] 17000
[1] 18000
[1] 19000
[1] 20000
[1] 1000
[1] 2000
[1] 3000
[1] 4000
[1] 5000
[1] 6000
[1] 7000
[1] 8000
[1] 9000
[1] 10000
[1] 11000
[1] 12000
[1] 13000
[1] 14000
[1] 15000
[1] 16000
[1] 17000
[1] 18000
[1] 19000
[1] 20000
This code takes a rather long time to run. It would be a good idea to save the output as a csv with k, DIC Second it requires access to the variables
y
,n.all
, andp
I defined these variables in the previous section, so you can run this code after running the previous section.Second we get to use this code in the ?sec-capstone-lm-infer-mix-comp It works fine for the earthquake data which is an AR(p) with p>1. But for the google tends data for covid which is an ar(1) process it has multiple issues. The first group were related to algebra - certain vectors and matrices are incompatible… Next I found that gibs sampler crashed due to numerical instability when the variance is too small.
I mention all this as the feedback asked to refer back to this section and added that the choice of prior is important. We actually saw earlier in Section 104.5 how to check the robustness of our model via prior sensitivity analysis! However the exercise has an explicit prior in it code so it seems strange that it is leads to bad DIC values.
In fact, if we think about it, if DIC should be able to inform us regarding the best model parameters. However we may need to use it in a bayesian optimization framework to find the best set of parameters and hyperparameters.
I hope I can figure out this matter soon however I am now moving on to the final project and will have to come back to this later, time permitting.
However one useful point is that I will prefer to tackle the earthquake data first as the code works fine for it and for the google trends data, it seems a bit of a trivial task to fit an AR(1) for the final project.
111.1 Prediction for location mixture of AR model
The following code can be used to obtain the h-step ahead prediction of the time series y using the posterior samples from the mixture model. The function y_pred_h_step
takes two arguments: h.step
, which is the number of steps ahead for prediction, and s
, which is the index of the posterior sample to use for prediction.
## the prediction function for mixture model
## Obtain the sth posterior sample for the h-step ahead prediction of y
<- function(h.step,s){
y_pred_h_step
= omega.mtx[,s]
omega.vec.use = beta.mtx[,s]
beta.use = nu.vec[s]
nu.use
for (i in 1:h.step) {
= sample(1:K,1,prob=omega.vec.use)
k.use = beta.use[((k.use-1)*p+1):(k.use*p)]
beta.cat = sum(y.cur*phi.cat)
mu.y = rnorm(1,mu.y,sqrt(nu.use))
y.new = y.new
y.pred[i] = c(y.new,y.cur)
y.cur = y.cur[-length(y.cur)]
y.cur
}return(y.pred)
}