106 Homework - first-step-for-the-project – M1L1

106.1 First step for the project

Caution

Section omitted to comply with the Honor Code

--- date: 2025-07-02 title: "Homework - first-step-for-the-project -- M1L1" subtitle: "Capstone Project: Bayesian Conjugate Analysis for Autogressive Time Series Models" description: "" categories: - Bayesian Statistics - Time Series keywords: - Time Series - R code fig-caption: Notes about ... Bayesian Statistics title-block-banner: images/banner_deep.jpg --- ## First step for the project ::::: {.content-visible unless-profile="HC"} ::: {.callout-caution} Section omitted to comply with the Honor Code ::: ::::: ::::: {.content-hidden unless-profile="HC"} ```{r} #| label: lst-capstone-earthquake-data ## read data, you need to make sure the data file is in your current working directory #| label: lbl-simulate-data library(colorRamps) library(leaflet) library(fields) library(mvtnorm) earthquakes.dat <- read.delim("data/earthquakes.txt") earthquakes.dat$Quakes=as.numeric(earthquakes.dat$Quakes) y.dat=earthquakes.dat$Quakes[1:100] ## this is the training data y.new=earthquakes.dat$Quakes[101:103] ## this is the test data y.sample=y.dat ##plot the simulate data plot.ts(y.sample,ylab=expression(italic(y)[italic(t)]),xlab=expression(italic(t)),main='') ``` Perform a conjugate Bayesian analysis of AR(2) model to your simulated data. Using prior $m_0 = (0,0)^\top$, $C_0 = 10 I_2$, $n_0 = d_0 = .02$. Simulated 5000 samples of $\phi_1,\phi_2,v$. What is the posterior means of these three parameters? (Do the same calculations for $\phi_2^,\nu^$ below.) ```{r} #| label: lbl-bayesian-analysis-q1-3 N=5000 p=2 ## order of AR process n.all=length(y.sample) ## T, total number of data Y=matrix(y.sample[3:n.all],ncol=1) Fmtx=matrix(c(y.sample[2:(n.all-1)],y.sample[1:(n.all-2)]),nrow=p,byrow=TRUE) n=length(Y) m0=matrix(rep(0,p),ncol=1) C0=10*diag(p) n0=0.02 d0=0.02 e=Y-t(Fmtx)%*%m0 Q=t(Fmtx)%*%C0%*%Fmtx+diag(n) Q.inv=chol2inv(chol(Q)) A=C0%*%Fmtx%*%Q.inv m=m0+A%*%e C=C0-A%*%Q%*%t(A) n.star=n+n0 d.star=t(Y-t(Fmtx)%*%m0)%*%Q.inv%*%(Y-t(Fmtx)%*%m0)+d0 n.sample=5000 nu.sample=rep(0,n.sample) phi.sample=matrix(0,nrow=n.sample,ncol=p) for (i in 1:n.sample) { set.seed(i) nu.new=1/rgamma(1,shape=n.star/2,rate=d.star/2) nu.sample[i]=nu.new phi.new=rmvnorm(1,mean=m,sigma=nu.new*C) phi.sample[i,]=phi.new } par(mfrow=c(1,3)) hist(phi.sample[,1],freq=FALSE,xlab=expression(phi[1]),main="",ylim=c(0,6.4)) lines(density(phi.sample[,1]),type='l',col='red') hist(phi.sample[,2],freq=FALSE,xlab=expression(phi[2]),main="",ylim=c(0,6.4)) lines(density(phi.sample[,2]),type='l',col='red') hist(nu.sample,freq=FALSE,xlab=expression(nu),main="") lines(density(nu.sample),type='l',col='red') # assert phi1=0.1, phi2=0.8, nu=1 library(testthat) test_that("Posterior means are close to expected values", { expect_equal(mean(phi.sample[,1]), 0.532, tolerance = 0.01) expect_equal(mean(phi.sample[,2]), 0.427, tolerance = 0.01) expect_equal(mean(nu.sample), 20.9, tolerance = 0.01) }) ``` ```{r} #| label: lst-capstone-google-search-index-covid ## read data, you need to make sure the data file is in your current working directory covid.dat <- read.delim("data/GoogleSearchIndex.txt") covid.dat$Week=as.Date(as.character(covid.dat$Week),format = "%Y-%m-%d") y.dat=covid.dat$covid[1:57] ## this is the training data y.new=covid.dat$covid[58:60] ## this is the test data y.sample=y.dat plot.ts(y.sample,ylab=expression(italic(y)[italic(t)]),xlab=expression(italic(t)),main='') ``` ```{r} #| label: lbl-bayesian-analysis-q1-4 N=5000 p=2 ## order of AR process n.all=length(y.sample) ## T, total number of data Y=matrix(y.sample[3:n.all],ncol=1) Fmtx=matrix(c(y.sample[2:(n.all-1)],y.sample[1:(n.all-2)]),nrow=p,byrow=TRUE) n=length(Y) m0=matrix(rep(0,p),ncol=1) C0=10*diag(p) n0=0.02 d0=0.02 e=Y-t(Fmtx)%*%m0 Q=t(Fmtx)%*%C0%*%Fmtx+diag(n) Q.inv=chol2inv(chol(Q)) A=C0%*%Fmtx%*%Q.inv m=m0+A%*%e C=C0-A%*%Q%*%t(A) n.star=n+n0 d.star=t(Y-t(Fmtx)%*%m0)%*%Q.inv%*%(Y-t(Fmtx)%*%m0)+d0 n.sample=5000 nu.sample=rep(0,n.sample) phi.sample=matrix(0,nrow=n.sample,ncol=p) for (i in 1:n.sample) { set.seed(i) nu.new=1/rgamma(1,shape=n.star/2,rate=d.star/2) nu.sample[i]=nu.new phi.new=rmvnorm(1,mean=m,sigma=nu.new*C) phi.sample[i,]=phi.new } par(mfrow=c(1,3)) hist(phi.sample[,1],freq=FALSE,xlab=expression(phi[1]),main="",ylim=c(0,6.4)) lines(density(phi.sample[,1]),type='l',col='red') hist(phi.sample[,2],freq=FALSE,xlab=expression(phi[2]),main="",ylim=c(0,6.4)) lines(density(phi.sample[,2]),type='l',col='red') hist(nu.sample,freq=FALSE,xlab=expression(nu),main="") lines(density(nu.sample),type='l',col='red') # assert phi1=0.1, phi2=0.8, nu=1 library(testthat) test_that("Posterior means are close to expected values", { expect_equal(mean(phi.sample[,1]), 1.11, tolerance = 0.01) expect_equal(mean(phi.sample[,2]), -0.127, tolerance = 0.01) expect_equal(mean(nu.sample), 83.5, tolerance = 0.01) }) ``` :::::