108.1 First step for the project
Caution
Section omitted to comply with the Honor Code
Capstone Project: Bayesian Conjugate Analysis for Autogressive Time Series Models
Oren Bochman
July 2, 2025
Time Series, R code
Section omitted to comply with the Honor Code
---
date: 2025-07-02
title: "Homework - first-step-for-the-project -- M1L1"
subtitle: "Capstone Project: Bayesian Conjugate Analysis for Autogressive Time Series Models"
description: ""
categories:
- Bayesian Statistics
- Time Series
keywords:
- Time Series
- R code
fig-caption: Notes about ... Bayesian Statistics
title-block-banner: images/banner_deep.jpg
---
## First step for the project
::::: {.content-visible unless-profile="HC"}
::: {.callout-caution}
Section omitted to comply with the Honor Code
:::
:::::
::::: {.content-hidden unless-profile="HC"}
```{r}
#| label: lst-capstone-earthquake-data
## read data, you need to make sure the data file is in your current working directory
#| label: lbl-simulate-data
library(colorRamps)
library(leaflet)
library(fields)
library(mvtnorm)
earthquakes.dat <- read.delim("data/earthquakes.txt")
earthquakes.dat$Quakes=as.numeric(earthquakes.dat$Quakes)
y.dat=earthquakes.dat$Quakes[1:100] ## this is the training data
y.new=earthquakes.dat$Quakes[101:103] ## this is the test data
y.sample=y.dat
##plot the simulate data
plot.ts(y.sample,ylab=expression(italic(y)[italic(t)]),xlab=expression(italic(t)),main='')
```
Perform a conjugate Bayesian analysis of AR(2) model to your simulated data.
Using prior $m_0 = (0,0)^\top$, $C_0 = 10 I_2$, $n_0 = d_0 = .02$. Simulated 5000 samples of $\phi_1,\phi_2,v$. What is the posterior means of these three parameters? (Do the same calculations for $\phi_2^,\nu^$ below.)
```{r}
#| label: lbl-bayesian-analysis-q1-3
N=5000
p=2 ## order of AR process
n.all=length(y.sample) ## T, total number of data
Y=matrix(y.sample[3:n.all],ncol=1)
Fmtx=matrix(c(y.sample[2:(n.all-1)],y.sample[1:(n.all-2)]),nrow=p,byrow=TRUE)
n=length(Y)
m0=matrix(rep(0,p),ncol=1)
C0=10*diag(p)
n0=0.02
d0=0.02
e=Y-t(Fmtx)%*%m0
Q=t(Fmtx)%*%C0%*%Fmtx+diag(n)
Q.inv=chol2inv(chol(Q))
A=C0%*%Fmtx%*%Q.inv
m=m0+A%*%e
C=C0-A%*%Q%*%t(A)
n.star=n+n0
d.star=t(Y-t(Fmtx)%*%m0)%*%Q.inv%*%(Y-t(Fmtx)%*%m0)+d0
n.sample=5000
nu.sample=rep(0,n.sample)
phi.sample=matrix(0,nrow=n.sample,ncol=p)
for (i in 1:n.sample) {
set.seed(i)
nu.new=1/rgamma(1,shape=n.star/2,rate=d.star/2)
nu.sample[i]=nu.new
phi.new=rmvnorm(1,mean=m,sigma=nu.new*C)
phi.sample[i,]=phi.new
}
par(mfrow=c(1,3))
hist(phi.sample[,1],freq=FALSE,xlab=expression(phi[1]),main="",ylim=c(0,6.4))
lines(density(phi.sample[,1]),type='l',col='red')
hist(phi.sample[,2],freq=FALSE,xlab=expression(phi[2]),main="",ylim=c(0,6.4))
lines(density(phi.sample[,2]),type='l',col='red')
hist(nu.sample,freq=FALSE,xlab=expression(nu),main="")
lines(density(nu.sample),type='l',col='red')
# assert phi1=0.1, phi2=0.8, nu=1
library(testthat)
test_that("Posterior means are close to expected values", {
expect_equal(mean(phi.sample[,1]), 0.532, tolerance = 0.01)
expect_equal(mean(phi.sample[,2]), 0.427, tolerance = 0.01)
expect_equal(mean(nu.sample), 20.9, tolerance = 0.01)
})
```
```{r}
#| label: lst-capstone-google-search-index-covid
## read data, you need to make sure the data file is in your current working directory
covid.dat <- read.delim("data/GoogleSearchIndex.txt")
covid.dat$Week=as.Date(as.character(covid.dat$Week),format = "%Y-%m-%d")
y.dat=covid.dat$covid[1:57] ## this is the training data
y.new=covid.dat$covid[58:60] ## this is the test data
y.sample=y.dat
plot.ts(y.sample,ylab=expression(italic(y)[italic(t)]),xlab=expression(italic(t)),main='')
```
```{r}
#| label: lbl-bayesian-analysis-q1-4
N=5000
p=2 ## order of AR process
n.all=length(y.sample) ## T, total number of data
Y=matrix(y.sample[3:n.all],ncol=1)
Fmtx=matrix(c(y.sample[2:(n.all-1)],y.sample[1:(n.all-2)]),nrow=p,byrow=TRUE)
n=length(Y)
m0=matrix(rep(0,p),ncol=1)
C0=10*diag(p)
n0=0.02
d0=0.02
e=Y-t(Fmtx)%*%m0
Q=t(Fmtx)%*%C0%*%Fmtx+diag(n)
Q.inv=chol2inv(chol(Q))
A=C0%*%Fmtx%*%Q.inv
m=m0+A%*%e
C=C0-A%*%Q%*%t(A)
n.star=n+n0
d.star=t(Y-t(Fmtx)%*%m0)%*%Q.inv%*%(Y-t(Fmtx)%*%m0)+d0
n.sample=5000
nu.sample=rep(0,n.sample)
phi.sample=matrix(0,nrow=n.sample,ncol=p)
for (i in 1:n.sample) {
set.seed(i)
nu.new=1/rgamma(1,shape=n.star/2,rate=d.star/2)
nu.sample[i]=nu.new
phi.new=rmvnorm(1,mean=m,sigma=nu.new*C)
phi.sample[i,]=phi.new
}
par(mfrow=c(1,3))
hist(phi.sample[,1],freq=FALSE,xlab=expression(phi[1]),main="",ylim=c(0,6.4))
lines(density(phi.sample[,1]),type='l',col='red')
hist(phi.sample[,2],freq=FALSE,xlab=expression(phi[2]),main="",ylim=c(0,6.4))
lines(density(phi.sample[,2]),type='l',col='red')
hist(nu.sample,freq=FALSE,xlab=expression(nu),main="")
lines(density(nu.sample),type='l',col='red')
# assert phi1=0.1, phi2=0.8, nu=1
library(testthat)
test_that("Posterior means are close to expected values", {
expect_equal(mean(phi.sample[,1]), 1.11, tolerance = 0.01)
expect_equal(mean(phi.sample[,2]), -0.127, tolerance = 0.01)
expect_equal(mean(nu.sample), 83.5, tolerance = 0.01)
})
```
:::::