require(tidyverse)
library(extraDistr)
Tutorial 3
Hands-on exercise on prior elicitation
1. The BIPED Study
Project: Remote expert elicitation to determine the prior probability distribution for Bayesian sample size determination in international randomized controlled trials: Bronchiolitis in Infants Placebo Versus Epinephrine and Dexamethasone (BIPED) Study
1.1 Given a range of beliefed pausible value, how can we calculate beta distribution parameters?
- Mathematically, we are solving for \(\alpha\) and \(\beta\) given L and U and the mode (“best value”);
\[P(L < \theta < U) = 0.95\] \[\theta \sim Beta(\alpha, \beta)\] - Let \(\theta\) be the number of patient end up being admitted to hospital out of 100 similar patients with bronchiolitis
- Suppose expert A pick the range as [10,90], that is \(P(10<\theta<90)=0.95\) and if we assume \(\theta\) follows a Beta distribution, and the mode, \(50 = \frac{\alpha-1}{\alpha+\beta-2}\) can we find the corresponding \(\alpha\) and \(\beta\) of this beta distribution?
- Given 95% CR, how to reverse calculation to get Beta distribution parameters: alpha and beta;
- We can use maximum likelihood estimation (MLE)! A statistical estimation method that helps us to identify a parametric distribution using empirical percentile.
# For this calculation we want to use beta-binomial distribution;
# This is because the confidence region in the BIPED app is reported on the number of events out of 100 standard patients.
# let's write out the likelihood of beta-binomial model
<- function(L, U, mode, level = 0.95) {
expert_beta stopifnot(0 < L, L < U, U < 1,
0 < mode, mode < 1,
> 0, level < 1)
level
# One-dimensional objective: log-k keeps the search on (0,???)
<- function(logk) {
objective <- exp(logk)
k <- 1 + mode * k
alpha <- 1 + (1 - mode) * k
beta <- qbeta( (1 - level)/2, alpha, beta )
q_low <- qbeta( 1 - (1 - level)/2, alpha, beta )
q_high - L)^2 + (q_high - U)^2
(q_low
}
<- optimize(objective, interval = c(-15, 25)) # log-scale search
opt <- exp(opt$minimum)
k list(alpha = 1 + mode * k,
beta = 1 + (1 - mode) * k,
SSE = opt$objective) # residual mis-fit (???0 if feasible)
}
# Maximize the likelihood using optim function;
expert_beta(L = 10/100, U = 90/100, mode = 50/100)
$alpha
[1] 2.093438
$beta
[1] 2.093438
$SSE
[1] 4.746533e-15
- Expert A selected a \(Beta(\alpha = 2.093438, \beta = 2.093438)\)
# we can create a R function to efficiently generate plots;
<- function(shape1=1, shape2=1, scale = 100){
beta_plot # creating x vector;
<- seq(from = 0, to = 1, by = 0.001)
x # finding density probability for each x element;
<- dbeta(x, shape1 = shape1, shape2 = shape2)
y # create a plotting dataframe;
<- data.frame(x,y)
data # ggplot;
ggplot(data, aes(x=x*scale, y=y)) +
geom_line() +
labs(x=bquote(.(scale) * theta), y='density', title=paste0('Beta(',shape1,',',shape2,')'))+
theme_bw()
}
beta_plot(2.093438,2.093438)
- What is the expected number of hospitalization? (mean of the beta distribution)
\[100 \times \frac{2.093438}{2.093438+2.093438} = 50 \]
- Suppose expert B pick the range as [50,90] and mode 80, what is expert B’s Beta distribution?
<-expert_beta(L = 50/100, U = 90/100, mode = 80/100)
beta_parameters
beta_plot(beta_parameters$alpha,beta_parameters$beta)
= 100*(beta_parameters$alpha/(beta_parameters$alpha+beta_parameters$beta))
mean mean
[1] 75.61505
1.2 Combine multiple expert options - (pooled) mixture distribution
<- 2.09
a1 <- 2.09
b1
<- 10.35
a2 <- 3.34
b2
<- c(1/2,1/2) #assign equal mixture weight to the two believes;
w
<- seq(0,1,0.01)
theta <- dbeta(theta,a1,b1)
p1 <- dbeta(theta,a2,b2)
p2
<- w[1]*p1+w[2]*p2
pool
<- 1.2*max(c(p1,p2, pool))
r
plot(theta,p1,col=gray(0.5),lwd=2,ylim=c(0,r),type="l",
xlab=expression(theta),ylab="Prior",
cex.lab=1.5,cex.axis=1.5)
lines(theta,p2,col=gray(0.5),lwd=2)
lines(theta,pool,col="red",lwd=2)
legend("topleft",c("Experts","Mixture of experts"),lwd=2,col=c(gray(0.5), "red"),bty="n",cex=1.5)