Pollution: Bayesian Variable Selection¶
Data from McDonald and Schwing [61] that include 15 independent variables and a measure of mortality on 60 U.S. metropolitan areas in 1959-1961. Originally, the data were used to illustrate ridge regression (the full covariate matrix has a huge condition number). This dataset was included in a review of Bayesian variable selection techniques by O’Hara et al [67].
Model¶
Analysis Program¶
using Mamba
## Data
data = [
36 27 71 8.1 3.34 11.4 81.5 3243 8.8 42.6 11.7 21 15 59 59 921.87
35 23 72 11.1 3.14 11.0 78.8 4281 3.5 50.7 14.4 8 10 39 57 997.88
44 29 74 10.4 3.21 9.8 81.6 4260 0.8 39.4 12.4 6 6 33 54 962.35
47 45 79 6.5 3.41 11.1 77.5 3125 27.1 50.2 20.6 18 8 24 56 982.29
43 35 77 7.6 3.44 9.6 84.6 6441 24.4 43.7 14.3 43 38 206 55 1071.29
53 45 80 7.7 3.45 10.2 66.8 3325 38.5 43.1 25.5 30 32 72 54 1030.38
43 30 74 10.9 3.23 12.1 83.9 4679 3.5 49.2 11.3 21 32 62 56 934.70
45 30 73 9.3 3.29 10.6 86.0 2140 5.3 40.4 10.5 6 4 4 56 899.53
36 24 70 9.0 3.31 10.5 83.2 6582 8.1 42.5 12.6 18 12 37 61 1001.90
36 27 72 9.5 3.36 10.7 79.3 4213 6.7 41.0 13.2 12 7 20 59 912.35
52 42 79 7.7 3.39 9.6 69.2 2302 22.2 41.3 24.2 18 8 27 56 1017.61
33 26 76 8.6 3.20 10.9 83.4 6122 16.3 44.9 10.7 88 63 278 58 1024.89
40 34 77 9.2 3.21 10.2 77.0 4101 13.0 45.7 15.1 26 26 146 57 970.47
35 28 71 8.8 3.29 11.1 86.3 3042 14.7 44.6 11.4 31 21 64 60 985.95
37 31 75 8.0 3.26 11.9 78.4 4259 13.1 49.6 13.9 23 9 15 58 958.84
35 46 85 7.1 3.22 11.8 79.9 1441 14.8 51.2 16.1 1 1 1 54 860.10
36 30 75 7.5 3.35 11.4 81.9 4029 12.4 44.0 12.0 6 4 16 58 936.23
15 30 73 8.2 3.15 12.2 84.2 4824 4.7 53.1 12.7 17 8 28 38 871.77
31 27 74 7.2 3.44 10.8 87.0 4834 15.8 43.5 13.6 52 35 124 59 959.22
30 24 72 6.5 3.53 10.8 79.5 3694 13.1 33.8 12.4 11 4 11 61 941.18
31 45 85 7.3 3.22 11.4 80.7 1844 11.5 48.1 18.5 1 1 1 53 891.71
31 24 72 9.0 3.37 10.9 82.8 3226 5.1 45.2 12.3 5 3 10 61 871.34
42 40 77 6.1 3.45 10.4 71.8 2269 22.7 41.4 19.5 8 3 5 53 971.12
43 27 72 9.0 3.25 11.5 87.1 2909 7.2 51.6 9.5 7 3 10 56 887.47
46 55 84 5.6 3.35 11.4 79.7 2647 21.0 46.9 17.9 6 5 1 59 952.53
39 29 75 8.7 3.23 11.4 78.6 4412 15.6 46.6 13.2 13 7 33 60 968.67
35 31 81 9.2 3.10 12.0 78.3 3262 12.6 48.6 13.9 7 4 4 55 919.73
43 32 74 10.1 3.38 9.5 79.2 3214 2.9 43.7 12.0 11 7 32 54 844.05
11 53 68 9.2 2.99 12.1 90.6 4700 7.8 48.9 12.3 648 319 130 47 861.83
30 35 71 8.3 3.37 9.9 77.4 4474 13.1 42.6 17.7 38 37 193 57 989.27
50 42 82 7.3 3.49 10.4 72.5 3497 36.7 43.3 26.4 15 18 34 59 1006.49
60 67 82 10.0 2.98 11.5 88.6 4657 13.5 47.3 22.4 3 1 1 60 861.44
30 20 69 8.8 3.26 11.1 85.4 2934 5.8 44.0 9.4 33 23 125 64 929.15
25 12 73 9.2 3.28 12.1 83.1 2095 2.0 51.9 9.8 20 11 26 58 857.62
45 40 80 8.3 3.32 10.1 70.3 2682 21.0 46.1 24.1 17 14 78 56 961.01
46 30 72 10.2 3.16 11.3 83.2 3327 8.8 45.3 12.2 4 3 8 58 923.23
54 54 81 7.4 3.36 9.7 72.8 3172 31.4 45.5 24.2 20 17 1 62 1113.16
42 33 77 9.7 3.03 10.7 83.5 7462 11.3 48.7 12.4 41 26 108 58 994.65
42 32 76 9.1 3.32 10.5 87.5 6092 17.5 45.3 13.2 29 32 161 54 1015.02
36 29 72 9.5 3.32 10.6 77.6 3437 8.1 45.5 13.8 45 59 263 56 991.29
37 38 67 11.3 2.99 12.0 81.5 3387 3.6 50.3 13.5 56 21 44 73 893.99
42 29 72 10.7 3.19 10.1 79.5 3508 2.2 38.8 15.7 6 4 18 56 938.50
41 33 77 11.2 3.08 9.6 79.9 4843 2.7 38.6 14.1 11 11 89 54 946.19
44 39 78 8.2 3.32 11.0 79.9 3768 28.6 49.5 17.5 12 9 48 53 1025.50
32 25 72 10.9 3.21 11.1 82.5 4355 5.0 46.4 10.8 7 4 18 60 874.28
34 32 79 9.3 3.23 9.7 76.8 5160 17.2 45.1 15.3 31 15 68 57 953.56
10 55 70 7.3 3.11 12.1 88.9 3033 5.9 51.0 14.0 144 66 20 61 839.71
18 48 63 9.2 2.92 12.2 87.7 4253 13.7 51.2 12.0 311 171 86 71 911.70
13 49 68 7.0 3.36 12.2 90.7 2702 3.0 51.9 9.7 105 32 3 71 790.73
35 40 64 9.6 3.02 12.2 82.5 3626 5.7 54.3 10.1 20 7 20 72 899.26
45 28 74 10.6 3.21 11.1 82.6 1883 3.4 41.9 12.3 5 4 20 56 904.16
38 24 72 9.8 3.34 11.4 78.0 4923 3.8 50.5 11.1 8 5 25 61 950.67
31 26 73 9.3 3.22 10.7 81.3 3249 9.5 43.9 13.6 11 7 25 59 972.46
40 23 71 11.3 3.28 10.3 73.8 1671 2.5 47.4 13.5 5 2 11 60 912.20
41 37 78 6.2 3.25 12.3 89.5 5308 25.9 59.7 10.3 65 28 102 52 967.80
28 32 81 7.0 3.27 12.1 81.0 3665 7.5 51.6 13.2 4 2 1 54 823.76
45 33 76 7.7 3.39 11.3 82.2 3152 12.1 47.3 10.9 14 11 42 56 1003.50
45 24 70 11.8 3.25 11.1 79.8 3678 1.0 44.8 14.0 7 3 8 56 895.70
42 33 76 9.7 3.22 9.0 76.2 9699 4.8 42.2 14.5 8 8 49 54 911.82
38 28 72 8.9 3.48 10.7 79.8 3451 11.7 37.5 13.0 14 13 39 58 954.44
]
pollution = Dict{Symbol, Any}(
:y => data[:, end],
:X => mapslices(x -> x / sqrt(var(x)), data[:, 1:(end - 1)], 1),
:p => size(data, 2) - 1
)
## Model Specification
model = Model(
y = Stochastic(1, (mu, sigma2) -> MvNormal(mu, sqrt(sigma2)), false),
mu = Logical(1, (alpha, X, theta) -> alpha + X * theta, false),
alpha = Stochastic(() -> Normal(0, 1000)),
theta = Logical(1, (beta, gamma) -> beta .* gamma),
beta = Stochastic(1,
p -> UnivariateDistribution[Normal(0, 1000) for i in 1:p],
false
),
gamma = Stochastic(1, () -> Bernoulli(0.5)),
sigma2 = Stochastic(() -> InverseGamma(0.0001, 0.0001))
)
## Gibbs Sampler for alpha and beta
Gibbs_alphabeta = Sampler([:alpha, :beta],
(alpha, beta, sigma2, X, gamma, y) ->
begin
alphabeta_distr = [alpha.distr; beta.distr]
alphabeta_mean = map(mean, alphabeta_distr)
alphabeta_invcov = spdiagm(map(d -> 1 / var(d), alphabeta_distr))
M = [ones(y) X * spdiagm(gamma)]
Sigma = inv(Symmetric(M' * M / sigma2 + alphabeta_invcov))
mu = Sigma * (M' * y / sigma2 + alphabeta_invcov * alphabeta_mean)
alphabeta_rand = rand(MvNormal(mu, Sigma))
Dict(:alpha => alphabeta_rand[1], :beta => alphabeta_rand[2:end])
end
)
Gibbs_sigma2 = Sampler([:sigma2],
(mu, sigma2, y) ->
begin
a = length(y) / 2.0 + shape(sigma2.distr)
b = sum(abs2, y - mu) / 2.0 + scale(sigma2.distr)
rand(InverseGamma(a, b))
end
)
## Initial Values
y = pollution[:y]
X = pollution[:X]
p = size(X, 2)
inits = [
Dict(:y => y, :alpha => mean(y), :gamma => rand(0:1, p),
:beta => inv(X' * X + eye(p)) * X' * y, :sigma2 => var(y)),
Dict(:y => y, :alpha => 1, :gamma => rand(0:1, p),
:beta => randn(p), :sigma2 => 1),
Dict(:y => y, :alpha => 17, :gamma => rand(0:1, p),
:beta => [15, -15, -10, 5, -10, -5, -10, 10, 40, -5, 0, 0, 0, 20, 5],
:sigma2 => 1)
]
## Sampling Scheme (without gamma)
scheme0 = [Gibbs_alphabeta, Gibbs_sigma2]
## Binary Hamiltonian Monte Carlo
scheme1 = [BHMC(:gamma, (2 * p + 0.5) * pi); scheme0]
setsamplers!(model, scheme1)
sim1 = mcmc(model, pollution, inits, 10000, burnin=1000, thin=2, chains=3)
describe(sim1)
discretediag(sim1[:, :gamma, :]) |> showall
## Binary MCMC Model Composition
scheme2 = [BMC3(:gamma); scheme0]
setsamplers!(model, scheme2)
sim2 = mcmc(model, pollution, inits, 10000, burnin=1000, thin=2, chains=3)
describe(sim2)
discretediag(sim2[:, :gamma, :]) |> showall
## Binary Metropolised Gibbs Sampling
scheme3 = [BMG(:gamma); scheme0]
setsamplers!(model, scheme3)
sim3 = mcmc(model, pollution, inits, 10000, burnin=1000, thin=2, chains=3)
describe(sim3)
discretediag(sim3[:, :gamma, :]) |> showall
## Discrete Gibbs Sampling
scheme4 = [DGS(:gamma); scheme0]
setsamplers!(model, scheme4)
sim4 = mcmc(model, pollution, inits, 10000, burnin=1000, thin=2, chains=3)
describe(sim4)
discretediag(sim4[:, :gamma, :]) |> showall
## Individual Adaptation Sampling
scheme5 = [BIA(:gamma); scheme0]
setsamplers!(model, scheme5)
sim5 = mcmc(model, pollution, inits, 10000, burnin=1000, thin=2, chains=3)
describe(sim5)
discretediag(sim5[:, :gamma, :]) |> showall
Results¶
## Binary Hamiltonian Monte Carlo
Iterations = 1002:10000
Thinning interval = 2
Chains = 1,2,3,4
Samples per chain = 4500
Empirical Posterior Estimates:
Mean SD Naive SE MCSE ESS
gamma[1] 0.494666667 0.49998544 0.00372667146 0.0354668606 198.73265
gamma[2] 0.147444444 0.35455827 0.00264272128 0.0244410034 210.44430
gamma[3] 0.015611111 0.12396878 0.00092400872 0.0055061412 506.90895
gamma[4] 0.082888889 0.27572186 0.00205510941 0.0154493001 318.51125
gamma[5] 0.030722222 0.17256889 0.00128625256 0.0105253177 268.81567
gamma[6] 0.318833333 0.46603724 0.00347363646 0.0336986753 191.25622
gamma[7] 0.052777778 0.22359575 0.00166658436 0.0135283814 273.17152
gamma[8] 0.039055556 0.19373256 0.00144399723 0.0068660579 796.14158
gamma[9] 0.963111111 0.18849422 0.00140495300 0.0115061554 268.37102
gamma[10] 0.014388889 0.11909088 0.00088765098 0.0047463037 629.57277
gamma[11] 0.022888889 0.14955344 0.00111470550 0.0070783015 446.41078
gamma[12] 0.094222222 0.29214575 0.00217752582 0.0191631169 232.41643
gamma[13] 0.113000000 0.31660159 0.00235980895 0.0206601029 234.83414
gamma[14] 0.598888889 0.49013706 0.00365326592 0.0327788035 223.58820
gamma[15] 0.008333333 0.09090846 0.00067759165 0.0023285399 1524.19744
theta[1] 10.963490356 11.99268806 0.08938821911 0.8074731491 220.58547
theta[2] -3.052823000 7.78859258 0.05805274150 0.5173033385 226.68742
theta[3] -0.180897422 1.69190595 0.01261072235 0.0669381806 638.85842
theta[4] 1.445892275 5.77285839 0.04302834596 0.3387376553 290.43895
theta[5] 0.220991524 1.67908086 0.01251512983 0.0755035072 494.54860
theta[6] -6.639050625 10.52547301 0.07845224381 0.7272107620 209.48969
theta[7] -0.569567370 2.97285383 0.02215834419 0.1667594094 317.80916
theta[8] 0.469028055 2.61485474 0.01948997649 0.0924855110 799.36980
theta[9] 32.862541601 12.24725603 0.09128565676 0.7176766659 291.21916
theta[10] -0.120870758 1.29895656 0.00968185058 0.0429416368 915.02312
theta[11] -0.111881745 3.05139958 0.02274378966 0.1397888103 476.48951
theta[12] -11.124331528 40.10862414 0.29895203352 2.8217496939 202.04056
theta[13] 9.987999815 38.64436971 0.28803812538 2.7092141635 203.46316
theta[14] 14.017636096 12.48284270 0.09304161608 0.8185848401 232.54103
theta[15] 0.027299181 0.58218805 0.00433937350 0.0086425051 4500.00000
sigma2 1675.673663690 421.46874551 3.14144255120 17.6104714349 572.78124
alpha 937.545199598 187.23566554 1.39557225319 12.5267326120 223.40945
Quantiles:
2.5% 25.0% 50.0% 75.0% 97.5%
gamma[1] 0.000000 0.000000 0.000000 1.000000 1.0000000
gamma[2] 0.000000 0.000000 0.000000 0.000000 1.0000000
gamma[3] 0.000000 0.000000 0.000000 0.000000 0.0000000
gamma[4] 0.000000 0.000000 0.000000 0.000000 1.0000000
gamma[5] 0.000000 0.000000 0.000000 0.000000 1.0000000
gamma[6] 0.000000 0.000000 0.000000 1.000000 1.0000000
gamma[7] 0.000000 0.000000 0.000000 0.000000 1.0000000
gamma[8] 0.000000 0.000000 0.000000 0.000000 1.0000000
gamma[9] 0.000000 1.000000 1.000000 1.000000 1.0000000
gamma[10] 0.000000 0.000000 0.000000 0.000000 0.0000000
gamma[11] 0.000000 0.000000 0.000000 0.000000 0.0000000
gamma[12] 0.000000 0.000000 0.000000 0.000000 1.0000000
gamma[13] 0.000000 0.000000 0.000000 0.000000 1.0000000
gamma[14] 0.000000 0.000000 1.000000 1.000000 1.0000000
gamma[15] 0.000000 0.000000 0.000000 0.000000 0.0000000
theta[1] 0.000000 0.000000 0.000000 22.137753 32.5989842
theta[2] -27.095671 0.000000 0.000000 0.000000 0.0000000
theta[3] 0.000000 0.000000 0.000000 0.000000 0.0000000
theta[4] 0.000000 0.000000 0.000000 0.000000 23.6631537
theta[5] 0.000000 0.000000 0.000000 0.000000 2.2412885
theta[6] -30.773041 -15.262916 0.000000 0.000000 0.0000000
theta[7] -10.908665 0.000000 0.000000 0.000000 0.0000000
theta[8] 0.000000 0.000000 0.000000 0.000000 10.3679336
theta[9] 0.000000 25.808602 32.266120 39.686565 58.7865382
theta[10] 0.000000 0.000000 0.000000 0.000000 0.0000000
theta[11] 0.000000 0.000000 0.000000 0.000000 0.0000000
theta[12] -163.621352 0.000000 0.000000 0.000000 0.0000000
theta[13] -10.012373 0.000000 0.000000 0.000000 155.4851914
theta[14] 0.000000 0.000000 17.277061 24.764372 34.1579980
theta[15] 0.000000 0.000000 0.000000 0.000000 0.0000000
sigma2 1074.425951 1385.703616 1605.131816 1878.631424 2759.1883162
alpha 663.986294 798.280105 878.440305 1099.826940 1314.4802623