84 Homework on Estimating the partition structure in Bayesian models - M5L09HW3

Caution

Section omitted to comply with the Honor Code

--- title : 'Homework on Estimating the partition structure in Bayesian models - M5L09HW3' subtitle : 'Bayesian Statistics: Mixture Models' categories: - Bayesian Statistics keywords: - Mixture Models - Homework - Partition Structure - Binder's Loss Function - MCMC --- ::::: {.content-visible unless-profile="HC"} ::: {.callout-caution} Section omitted to comply with the Honor Code ::: ::::: ::::: {.content-hidden unless-profile="HC"} 1. Binder's loss function is invariant to label switching - [x] Yes - [ ] No 2. Use the implementation of the MCMC algorithm for fitting a mixture model to the galaxies dataset contained in the lesson "Sample code for estimating the number of components and the partition structure in Bayesian models" to estimate the number of component associated with the optimal partition obtained using Binder's loss function with $γ_1 = 3$ and $γ_2 = 1$. What is the number of components in the optimal partition? ```{r} #| label: lbl-q2-galaxies-bayesian-1 ## Full Bayesian estimation of a mixture model for density estimation in the galaxies dataset rm(list=ls()) ### Loading data and setting up global variables library(MASS) ## for the dataset library(MCMCpack) ## for the rWishart() in the full conditionals data(galaxies) x = galaxies n = length(x) set.seed(781209) ### Fitting a Bayesian mixture model with KK = 30 ## the maximum number of components allowed ## Finding the value of alpha consistent with 6 expected components a priori ff = function(alpha) alpha*log((82+alpha-1)/alpha) - 6 alph = uniroot(ff, c(0.01, 20)) #solve nonlinear equation ff in range [0.01, 20] alph$root # 1.496393 ## Priors set up using an "empirical Bayes" approach aa = rep(1.5/KK,KK) # We approximate 1.496393 by 1.5 eta = mean(x) tau = sqrt(var(x)) dd = 2 qq = var(x)/KK ## Initialize the parameters w = rep(1,KK)/KK mu = rnorm(KK, mean(x), sd(x)) sigma = sd(x)/KK cc = sample(1:KK, n, replace=T, prob=w) ## Number of iterations of the sampler rrr = 25000 burn = 5000 ## Storing the samples cc.out = array(0, dim=c(rrr, n)) w.out = array(0, dim=c(rrr, KK)) mu.out = array(0, dim=c(rrr, KK)) sigma.out = array(0, dim=c(rrr, KK)) logpost = rep(0, rrr) for(s in 1:rrr){ # Sample the indicators for(i in 1:n){ v = rep(0,KK) for(k in 1:KK){ v[k] = log(w[k]) + dnorm(x[i], mu[k], sigma, log=TRUE) #Compute the log of the weights } v = exp(v - max(v))/sum(exp(v - max(v))) cc[i] = sample(1:KK, 1, replace=TRUE, prob=v) } # Sample the weights w = as.vector(rdirichlet(1, aa + tabulate(cc, nbins=KK))) # Sample the means for(k in 1:KK){ nk = sum(cc==k) xsumk = sum(x[cc==k]) tau2.hat = 1/(nk/sigma^2 + 1/tau^2) mu.hat = tau2.hat*(xsumk/sigma^2 + eta/tau^2) mu[k] = rnorm(1, mu.hat, sqrt(tau2.hat)) } # Sample the variances dd.star = dd + n/2 qq.star = qq + sum((x - mu[cc])^2)/2 sigma = sqrt(1/rgamma(1, dd.star, qq.star)) # Store samples cc.out[s,] = cc w.out[s,] = w mu.out[s,] = mu sigma.out[s] = sigma for(i in 1:n){ logpost[s] = logpost[s] + log(w[cc[i]]) + dnorm(x[i], mu[cc[i]], sigma, log=TRUE) } logpost[s] = logpost[s] + log(ddirichlet(w, aa)) for(k in 1:KK){ logpost[s] = logpost[s] + dnorm(mu[k], eta, tau, log=TRUE) } logpost[s] = logpost[s] + dgamma(1/sigma^2, dd, qq, log=TRUE) - 4*log(sigma) if(s/500==floor(s/500)){ print(paste("s =",s)) } } ``` ```{r} #| label: fig-q2-galaxies-bayesian-2 #| fig-cap: "Posterior distribution of the number of components in the galaxies dataset using a Dirichlet prior with alpha = 1.5/K." nunique = function(x) length(unique(x)) Kstar = apply(cc.out[-seq(1,burn),],1,nunique) par(mar=c(4,4,1,1) + 0.1) barplot(table(Kstar)/sum(table(Kstar)), xlab=expression(K^"*"), ylab="Frequency") #dev.print(file="postKstaralpha2.pdf", dev=pdf) ``` the pairwise probability matrix ```{r} #| label: lbl-q2-galaxies-bayesian-3 #| fig-cap: "Posterior distribution of the number of components in the galaxies dataset using a Dirichlet prior with alpha = 1.5/K." ## Construct pairwise co-clustering matrix for this dataset pairwise = matrix(0, nrow=n, ncol=n) for(s in 1:(rrr-burn)){ for(i in 1:n){ for(j in i:n){ pairwise[i,j] = pairwise[i,j] + as.numeric(cc.out[s+burn,i]==cc.out[s+burn,j]) pairwise[j,i] = pairwise[i,j] } } } DD = pairwise/max(pairwise) #DD ``` heatmap plot function ```{r} #| label: fig-q2-galaxies-bayesian-3 #| fig-cap: "Heatmap of the pairwise co-clustering matrix for the galaxies dataset using a Dirichlet prior with alpha = 1.5/K." heatmapplot = function(DD, alab, subsetaxis, llc=FALSE){ n = dim(DD)[1] #colorscale = rev(gray(0:100 / 100)) colorscale = c("white", rev(heat.colors(100))) nf = layout(matrix(c(1,2),nrow=1,ncol=2), c(7,1), TRUE) par(mar=c(4,3,1,0.5)) ###Display heat-map image(seq(1,n), seq(1,n), DD, axes=F, xlab="", ylab="", col=colorscale[seq(floor(min(100*DD)), floor(max(100*DD))) + 1]) axis(1,at=subsetaxis,labels=alab[subsetaxis],las=2,cex.axis=1) axis(2,at=subsetaxis,labels=alab[subsetaxis],las=2,cex.axis=1) box() abline(v = llc+0.5) abline(h = llc+0.5) ###Display color scale par(mar=c(3,0,0,0)) plot(1:100,1:100,xlim=c(0,2),ylim=c(0,100),type="n",axes=F,xlab ="",ylab ="") yposr = 1:100 rect(0, yposr-.5, 0.5, yposr+.5,col = colorscale, border=F) rect(0, .5, 0.5, 100.5,col = "transparent") text(0.42,c(yposr[1],yposr[25],yposr[50],yposr[75],yposr[100]),c("0.00","0.25","0.50","0.75","1.00"),pos=4,cex=1.1) } heatmapplot(DD, seq(1,n), seq(1,n,by=3)) ``` ```{r} #| label: fig-q2-galaxies-bayesian-4 #| fig-cap: "Posterior distribution of the number of components in the galaxies dataset using a Dirichlet prior with alpha = 1.5/K." ## Plot Bayesian estimate with pointwise credible bands along with kernel density estimate and frequentist point estimate ## Compute the samples of the density over a dense grid xx = seq(5000,37000,length=300) density.mcmc = array(0, dim=c(rrr-burn,length(xx))) for(s in 1:(rrr-burn)){ for(k in 1:KK){ density.mcmc[s,] = density.mcmc[s,] + w.out[s+burn,k]*dnorm(xx,mu.out[s+burn,k],sigma.out[s+burn]) } } density.mcmc.m = apply(density.mcmc , 2, mean) yy = density(x) colscale = c("black", "red") density.mcmc.lq = apply(density.mcmc, 2, quantile, 0.025) density.mcmc.uq = apply(density.mcmc, 2, quantile, 0.975) par(mfrow=c(1,1)) par(mar=c(4,4,1,1)+0.1) plot(xx, density.mcmc.m, type="n",ylim=c(0,max(density.mcmc.uq)),xlab="Velocity", ylab="Density") polygon(c(xx,rev(xx)), c(density.mcmc.lq, rev(density.mcmc.uq)), col="grey", border="grey") lines(xx, density.mcmc.m, col=colscale[1], lwd=2) lines(yy, col=colscale[2], lty=2, lwd=2) points(x, rep(0,n)) legend(27000, 0.00017, c("MCMC","KDE"), col=colscale[c(1,2)], lty=c(1,2), lwd=2, bty="n") ``` we want a point estimate for the number of components in the optimal partition according to Binder's loss function with $\gamma_1 = 3$ and $\gamma_2 = 1$. ```{r} #| label: lbl-q2-galaxies-bayesian-5 ##### Finding optimal partition according to Binder's loss function ## ## Function that computes the loss at a particular configuration Lstst = function(cch, DD, Dbar){ z = 0 for(i in 1:(n-1)){ for(j in (i+1):n){ if(cch[i]==cch[j]){ z = z + (DD[i,j]-Dbar) } } } return(z) } ## Initial value of the algorithm is the last iteration of the sampler ## Using as.numeric(factor()) is a cheap way to force the cluster labels ## to be sequential starting at 1 cch = as.numeric(factor(cc)) ## Setup parameters for the recursive alorithm #Dbar = 0.50 # the same penalty for fp and fn errors gamma_1 = 3 gamma_2 = 1 Dbar = gamma_2 / (gamma_1 + gamma_2) Dbar # 0.25 optLstst.old = -Inf optLstst.new = Lstst(cch, DD, Dbar=Dbar) maxiter = 50 niter = 1 while((optLstst.old!=optLstst.new)&(niter<=maxiter)){ for(i in 1:n){ nq = max(cch) + 1 q = rep(0, nq) for(s in 1:nq){ ccht = cch ccht[i] = s q[s] = Lstst(ccht, DD, Dbar=Dbar) } cch[i] = which.max(q) cch = as.numeric(factor(cch)) } optLstst.old = optLstst.new optLstst.new = Lstst(cch, DD, Dbar=Dbar) niter = niter+1 } print(nunique(cch)) ``` ```{r} #| label: fig-q2-galaxies-bayesian-5 #| fig-cap: "Heatmap of the pairwise co-clustering matrix for the galaxies dataset using a Dirichlet prior with alpha = 1.5/K." ## Create another heatmap plot of the co-clustering matrix in which the ## optimal clusters are represented. cchlo = as.numeric(as.character(factor(cch, labels=order(unique(cch))))) cchlotab = table(cchlo) llc = cumsum(cchlotab[-length(cchlotab)]) heatmapplot(DD, seq(1,n), seq(1,n,by=3), llc=llc) #dev.print(file="galaxiesheatmap50.pdf", dev=pdf) ``` 3. Rerun the algorithm contained in "Sample code for estimating the number of components and the partition structure in Bayesian models" using a prior for the weights $(w_1,\ldots,w_K)\sim \mathrm{Dir}(\frac{0.2}{K},\ldots,\frac{0.2}{K})$. What is the mode for the posterior distribution on $K^*$, the number of occupied clusters? ```{r} #| label: lbl-q3 ## Full Bayesian estimation of a mixture model for density estimation in the galaxies dataset rm(list=ls()) ### Loading data and setting up global variables library(MASS) library(MCMCpack) data(galaxies) x = galaxies n = length(x) set.seed(781209) ### Fitting a Bayesian mixture model with KK = 30 ## In this formulation, it should be interpreted as the ## maximum number of components allowed ## Finding the value of alpha consistent with 6 expected components a priori ff = function(alpha) alpha*log((82+alpha-1)/alpha) - 6 alph = uniroot(ff, c(0.01, 20)) alph$root # 1.496393 alph$root = 0.2 ## Priors set up using an "empirical Bayes" approach aa = rep(1.5/KK,KK) # We approximate 1.496393 by 1.5 eta = mean(x) tau = sqrt(var(x)) dd = 2 qq = var(x)/KK ## Initialize the parameters w = rep(1,KK)/KK mu = rnorm(KK, mean(x), sd(x)) sigma = sd(x)/KK cc = sample(1:KK, n, replace=T, prob=w) ## Number of iterations of the sampler rrr = 25000 burn = 5000 ## Storing the samples cc.out = array(0, dim=c(rrr, n)) w.out = array(0, dim=c(rrr, KK)) mu.out = array(0, dim=c(rrr, KK)) sigma.out = array(0, dim=c(rrr, KK)) logpost = rep(0, rrr) for(s in 1:rrr){ # Sample the indicators for(i in 1:n){ v = rep(0,KK) for(k in 1:KK){ v[k] = log(w[k]) + dnorm(x[i], mu[k], sigma, log=TRUE) #Compute the log of the weights } v = exp(v - max(v))/sum(exp(v - max(v))) cc[i] = sample(1:KK, 1, replace=TRUE, prob=v) } # Sample the weights w = as.vector(rdirichlet(1, aa + tabulate(cc, nbins=KK))) # Sample the means for(k in 1:KK){ nk = sum(cc==k) xsumk = sum(x[cc==k]) tau2.hat = 1/(nk/sigma^2 + 1/tau^2) mu.hat = tau2.hat*(xsumk/sigma^2 + eta/tau^2) mu[k] = rnorm(1, mu.hat, sqrt(tau2.hat)) } # Sample the variances dd.star = dd + n/2 qq.star = qq + sum((x - mu[cc])^2)/2 sigma = sqrt(1/rgamma(1, dd.star, qq.star)) # Store samples cc.out[s,] = cc w.out[s,] = w mu.out[s,] = mu sigma.out[s] = sigma for(i in 1:n){ logpost[s] = logpost[s] + log(w[cc[i]]) + dnorm(x[i], mu[cc[i]], sigma, log=TRUE) } logpost[s] = logpost[s] + log(ddirichlet(w, aa)) for(k in 1:KK){ logpost[s] = logpost[s] + dnorm(mu[k], eta, tau, log=TRUE) } logpost[s] = logpost[s] + dgamma(1/sigma^2, dd, qq, log=TRUE) - 4*log(sigma) if(s/500==floor(s/500)){ print(paste("s =",s)) } } ## ----- galaxiesbaysian2 nunique = function(x) length(unique(x)) Kstar = apply(cc.out[-seq(1,burn),],1,nunique) par(mar=c(4,4,1,1) + 0.1) barplot(table(Kstar)/sum(table(Kstar)), xlab=expression(K^"*"), ylab="Frequency") #dev.print(file="postKstaralpha2.pdf", dev=pdf) ## ----- galaxiesbaysian3 ## Construct pairwise co-clustering matrix for this dataset pairwise = matrix(0, nrow=n, ncol=n) for(s in 1:(rrr-burn)){ for(i in 1:n){ for(j in i:n){ pairwise[i,j] = pairwise[i,j] + as.numeric(cc.out[s+burn,i]==cc.out[s+burn,j]) pairwise[j,i] = pairwise[i,j] } } } DD = pairwise/max(pairwise) heatmapplot = function(DD, alab, subsetaxis, llc=FALSE){ n = dim(DD)[1] #colorscale = rev(gray(0:100 / 100)) colorscale = c("white", rev(heat.colors(100))) nf = layout(matrix(c(1,2),nrow=1,ncol=2), c(7,1), TRUE) par(mar=c(4,3,1,0.5)) ###Display heat-map image(seq(1,n), seq(1,n), DD, axes=F, xlab="", ylab="", col=colorscale[seq(floor(min(100*DD)), floor(max(100*DD))) + 1]) axis(1,at=subsetaxis,labels=alab[subsetaxis],las=2,cex.axis=1) axis(2,at=subsetaxis,labels=alab[subsetaxis],las=2,cex.axis=1) box() abline(v = llc+0.5) abline(h = llc+0.5) ###Display color scale par(mar=c(3,0,0,0)) plot(1:100,1:100,xlim=c(0,2),ylim=c(0,100),type="n",axes=F,xlab ="",ylab ="") yposr = 1:100 rect(0, yposr-.5, 0.5, yposr+.5,col = colorscale, border=F) rect(0, .5, 0.5, 100.5,col = "transparent") text(0.42,c(yposr[1],yposr[25],yposr[50],yposr[75],yposr[100]),c("0.00","0.25","0.50","0.75","1.00"),pos=4,cex=1.1) } heatmapplot(DD, seq(1,n), seq(1,n,by=3)) ## Plot Bayesian estimate with pointwise credible bands along with kernel density estimate and frequentist point estimate ## Compute the samples of the density over a dense grid xx = seq(5000,37000,length=300) density.mcmc = array(0, dim=c(rrr-burn,length(xx))) for(s in 1:(rrr-burn)){ for(k in 1:KK){ density.mcmc[s,] = density.mcmc[s,] + w.out[s+burn,k]*dnorm(xx,mu.out[s+burn,k],sigma.out[s+burn]) } } density.mcmc.m = apply(density.mcmc , 2, mean) yy = density(x) colscale = c("black", "red") density.mcmc.lq = apply(density.mcmc, 2, quantile, 0.025) density.mcmc.uq = apply(density.mcmc, 2, quantile, 0.975) par(mfrow=c(1,1)) par(mar=c(4,4,1,1)+0.1) plot(xx, density.mcmc.m, type="n",ylim=c(0,max(density.mcmc.uq)),xlab="Velocity", ylab="Density") polygon(c(xx,rev(xx)), c(density.mcmc.lq, rev(density.mcmc.uq)), col="grey", border="grey") lines(xx, density.mcmc.m, col=colscale[1], lwd=2) lines(yy, col=colscale[2], lty=2, lwd=2) points(x, rep(0,n)) legend(27000, 0.00017, c("MCMC","KDE"), col=colscale[c(1,2)], lty=c(1,2), lwd=2, bty="n") ##### Finding optimal partition according to Binder's loss function ## ## Function that computes the loss at a particular configuration Lstst = function(cch, DD, Dbar){ z = 0 for(i in 1:(n-1)){ for(j in (i+1):n){ if(cch[i]==cch[j]){ z = z + (DD[i,j]-Dbar) } } } return(z) } ## Initial value of the algorithm is the last iteration of the sampler ## Using as.numeric(factor()) is a cheap way to force the cluster labels ## to be sequential starting at 1 cch = as.numeric(factor(cc)) ## Setup parameters for the recursive alorithm Dbar = 0.50 optLstst.old = -Inf optLstst.new = Lstst(cch, DD, Dbar=Dbar) maxiter = 50 niter = 1 while((optLstst.old!=optLstst.new)&(niter<=maxiter)){ for(i in 1:n){ nq = max(cch) + 1 q = rep(0, nq) for(s in 1:nq){ ccht = cch ccht[i] = s q[s] = Lstst(ccht, DD, Dbar=Dbar) } cch[i] = which.max(q) cch = as.numeric(factor(cch)) } optLstst.old = optLstst.new optLstst.new = Lstst(cch, DD, Dbar=Dbar) niter = niter+1 } #print(nunique(cch)) ## Create another heatmap plot of the co-clustering matrix in which the ## optimal clusters are represented. cchlo = as.numeric(as.character(factor(cch, labels=order(unique(cch))))) cchlotab = table(cchlo) llc = cumsum(cchlotab[-length(cchlotab)]) heatmapplot(DD, seq(1,n), seq(1,n,by=3), llc=llc) #dev.print(file="galaxiesheatmap50.pdf", dev=pdf) ``` 4. Under the new prior $(w_1,\ldots,w_K)\sim \mathrm{Dir}(0.2K,\ldots,0.2K)$, what is the number of components in the optimal partitions according to Binder's loss function with $\gamma_1 = 3$ and $\gamma_2 = 1$? ```{r} #| label: lbl-q4 ## Full Bayesian estimation of a mixture model for density estimation in the galaxies dataset rm(list=ls()) ### Loading data and setting up global variables library(MASS) library(MCMCpack) data(galaxies) x = galaxies n = length(x) set.seed(781209) ### Fitting a Bayesian mixture model with KK = 30 ## In this formulation, it should be interpreted as the ## maximum number of components allowed ## Finding the value of alpha consistent with 6 expected components a priori ff = function(alpha) alpha*log((82+alpha-1)/alpha) - 6 alph = uniroot(ff, c(0.01, 20)) alph$root # 1.496393 alph$root = 0.2 ## Priors set up using an "empirical Bayes" approach aa = rep(0.2/KK,KK) # We approximate 1.496393 by 1.5 eta = mean(x) tau = sqrt(var(x)) dd = 2 qq = var(x)/KK ## Initialize the parameters w = rep(1,KK)/KK mu = rnorm(KK, mean(x), sd(x)) sigma = sd(x)/KK cc = sample(1:KK, n, replace=T, prob=w) ## Number of iterations of the sampler rrr = 25000 burn = 5000 ## Storing the samples cc.out = array(0, dim=c(rrr, n)) w.out = array(0, dim=c(rrr, KK)) mu.out = array(0, dim=c(rrr, KK)) sigma.out = array(0, dim=c(rrr, KK)) logpost = rep(0, rrr) for(s in 1:rrr){ # Sample the indicators for(i in 1:n){ v = rep(0,KK) for(k in 1:KK){ v[k] = log(w[k]) + dnorm(x[i], mu[k], sigma, log=TRUE) #Compute the log of the weights } v = exp(v - max(v))/sum(exp(v - max(v))) cc[i] = sample(1:KK, 1, replace=TRUE, prob=v) } # Sample the weights w = as.vector(rdirichlet(1, aa + tabulate(cc, nbins=KK))) # Sample the means for(k in 1:KK){ nk = sum(cc==k) xsumk = sum(x[cc==k]) tau2.hat = 1/(nk/sigma^2 + 1/tau^2) mu.hat = tau2.hat*(xsumk/sigma^2 + eta/tau^2) mu[k] = rnorm(1, mu.hat, sqrt(tau2.hat)) } # Sample the variances dd.star = dd + n/2 qq.star = qq + sum((x - mu[cc])^2)/2 sigma = sqrt(1/rgamma(1, dd.star, qq.star)) # Store samples cc.out[s,] = cc w.out[s,] = w mu.out[s,] = mu sigma.out[s] = sigma for(i in 1:n){ logpost[s] = logpost[s] + log(w[cc[i]]) + dnorm(x[i], mu[cc[i]], sigma, log=TRUE) } logpost[s] = logpost[s] + log(ddirichlet(w, aa)) for(k in 1:KK){ logpost[s] = logpost[s] + dnorm(mu[k], eta, tau, log=TRUE) } logpost[s] = logpost[s] + dgamma(1/sigma^2, dd, qq, log=TRUE) - 4*log(sigma) if(s/500==floor(s/500)){ print(paste("s =",s)) } } ## ----- galaxiesbaysian2 nunique = function(x) length(unique(x)) Kstar = apply(cc.out[-seq(1,burn),],1,nunique) par(mar=c(4,4,1,1) + 0.1) barplot(table(Kstar)/sum(table(Kstar)), xlab=expression(K^"*"), ylab="Frequency") #dev.print(file="postKstaralpha2.pdf", dev=pdf) ## ----- galaxiesbaysian3 ## Construct pairwise co-clustering matrix for this dataset pairwise = matrix(0, nrow=n, ncol=n) for(s in 1:(rrr-burn)){ for(i in 1:n){ for(j in i:n){ pairwise[i,j] = pairwise[i,j] + as.numeric(cc.out[s+burn,i]==cc.out[s+burn,j]) pairwise[j,i] = pairwise[i,j] } } } DD = pairwise/max(pairwise) heatmapplot = function(DD, alab, subsetaxis, llc=FALSE){ n = dim(DD)[1] #colorscale = rev(gray(0:100 / 100)) colorscale = c("white", rev(heat.colors(100))) nf = layout(matrix(c(1,2),nrow=1,ncol=2), c(7,1), TRUE) par(mar=c(4,3,1,0.5)) ###Display heat-map image(seq(1,n), seq(1,n), DD, axes=F, xlab="", ylab="", col=colorscale[seq(floor(min(100*DD)), floor(max(100*DD))) + 1]) axis(1,at=subsetaxis,labels=alab[subsetaxis],las=2,cex.axis=1) axis(2,at=subsetaxis,labels=alab[subsetaxis],las=2,cex.axis=1) box() abline(v = llc+0.5) abline(h = llc+0.5) ###Display color scale par(mar=c(3,0,0,0)) plot(1:100,1:100,xlim=c(0,2),ylim=c(0,100),type="n",axes=F,xlab ="",ylab ="") yposr = 1:100 rect(0, yposr-.5, 0.5, yposr+.5,col = colorscale, border=F) rect(0, .5, 0.5, 100.5,col = "transparent") text(0.42,c(yposr[1],yposr[25],yposr[50],yposr[75],yposr[100]),c("0.00","0.25","0.50","0.75","1.00"),pos=4,cex=1.1) } heatmapplot(DD, seq(1,n), seq(1,n,by=3)) ## Plot Bayesian estimate with pointwise credible bands along with kernel density estimate and frequentist point estimate ## Compute the samples of the density over a dense grid xx = seq(5000,37000,length=300) density.mcmc = array(0, dim=c(rrr-burn,length(xx))) for(s in 1:(rrr-burn)){ for(k in 1:KK){ density.mcmc[s,] = density.mcmc[s,] + w.out[s+burn,k]*dnorm(xx,mu.out[s+burn,k],sigma.out[s+burn]) } } density.mcmc.m = apply(density.mcmc , 2, mean) yy = density(x) colscale = c("black", "red") density.mcmc.lq = apply(density.mcmc, 2, quantile, 0.025) density.mcmc.uq = apply(density.mcmc, 2, quantile, 0.975) par(mfrow=c(1,1)) par(mar=c(4,4,1,1)+0.1) plot(xx, density.mcmc.m, type="n",ylim=c(0,max(density.mcmc.uq)),xlab="Velocity", ylab="Density") polygon(c(xx,rev(xx)), c(density.mcmc.lq, rev(density.mcmc.uq)), col="grey", border="grey") lines(xx, density.mcmc.m, col=colscale[1], lwd=2) lines(yy, col=colscale[2], lty=2, lwd=2) points(x, rep(0,n)) legend(27000, 0.00017, c("MCMC","KDE"), col=colscale[c(1,2)], lty=c(1,2), lwd=2, bty="n") ##### Finding optimal partition according to Binder's loss function ## ## Function that computes the loss at a particular configuration Lstst = function(cch, DD, Dbar){ z = 0 for(i in 1:(n-1)){ for(j in (i+1):n){ if(cch[i]==cch[j]){ z = z + (DD[i,j]-Dbar) } } } return(z) } ## Initial value of the algorithm is the last iteration of the sampler ## Using as.numeric(factor()) is a cheap way to force the cluster labels ## to be sequential starting at 1 cch = as.numeric(factor(cc)) ## Setup parameters for the recursive alorithm Dbar = 1/ (1 + 1) optLstst.old = -Inf optLstst.new = Lstst(cch, DD, Dbar=Dbar) maxiter = 50 niter = 1 while((optLstst.old!=optLstst.new)&(niter<=maxiter)){ for(i in 1:n){ nq = max(cch) + 1 q = rep(0, nq) for(s in 1:nq){ ccht = cch ccht[i] = s q[s] = Lstst(ccht, DD, Dbar=Dbar) } cch[i] = which.max(q) cch = as.numeric(factor(cch)) } optLstst.old = optLstst.new optLstst.new = Lstst(cch, DD, Dbar=Dbar) niter = niter+1 } #print(nunique(cch)) ## Create another heatmap plot of the co-clustering matrix in which the ## optimal clusters are represented. cchlo = as.numeric(as.character(factor(cch, labels=order(unique(cch))))) cchlotab = table(cchlo) llc = cumsum(cchlotab[-length(cchlotab)]) heatmapplot(DD, seq(1,n), seq(1,n,by=3), llc=llc) #dev.print(file="galaxiesheatmap50.pdf", dev=pdf) ``` :::::