-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathexample_linear_regression.jl
106 lines (72 loc) · 3.23 KB
/
example_linear_regression.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# # Linear regression
# We estimate simple linear regression model with a half-T prior.
# First, we load the packages we use.
# First, we import DynamicHMC and related libraries,
using TransformVariables, LogDensityProblems, LogDensityProblemsAD,
DynamicHMC, TransformedLogDensities
# then some packages that help code the log posterior,
using Parameters, Statistics, Random, Distributions, LinearAlgebra
# then diagnostic and benchmark tools,
using MCMCDiagnosticTools, DynamicHMC.Diagnostics, BenchmarkTools
# and use ForwardDiff for AD since the dimensions is small.
import ForwardDiff
# Then define a structure to hold the data: observables, covariates, and the degrees of
# freedom for the prior.
"""
Linear regression model ``y ∼ Xβ + ϵ``, where ``ϵ ∼ N(0, σ²)`` IID.
Weakly informative prior for `β`, half-T for `σ`.
"""
struct LinearRegressionProblem{TY <: AbstractVector, TX <: AbstractMatrix,
Tν <: Real}
"Observations."
y::TY
"Covariates"
X::TX
"Degrees of freedom for prior."
ν::Tν
end
# Then make the type callable with the parameters *as a single argument*.
function (problem::LinearRegressionProblem)(θ)
@unpack y, X, ν = problem # extract the data
@unpack β, σ = θ # works on the named tuple too
ϵ_distribution = Normal(0, σ) # the error term
ℓ_error = mapreduce((y, x) -> logpdf(ϵ_distribution, y - dot(x, β)), +,
y, eachrow(X)) # likelihood for error
ℓ_σ = logpdf(TDist(ν), σ) # prior for σ
ℓ_β = loglikelihood(Normal(0, 10), β) # prior for β
ℓ_error + ℓ_σ + ℓ_β
end
# Make up random data and test the function runs.
N = 100
X = hcat(ones(N), randn(N, 2));
β = [1.0, 2.0, -1.0]
σ = 0.5
y = X*β .+ randn(N) .* σ;
p = LinearRegressionProblem(y, X, 1.0);
p((β = β, σ = σ))
# It is usually a good idea to benchmark and optimize your log posterior code at this stage.
# Above, we have carefully optimized allocations away using `mapreduce`.
@btime p((β = $β, σ = $σ))
# For this problem, we write a function to return the transformation (as it varies with the
# number of covariates).
function problem_transformation(p::LinearRegressionProblem)
as((β = as(Array, size(p.X, 2)), σ = asℝ₊))
end
# Wrap the problem with a transformation, then use ForwardDiff for the gradient.
t = problem_transformation(p)
P = TransformedLogDensity(t, p)
∇P = ADgradient(:ForwardDiff, P);
# Finally, we sample from the posterior. `chain` holds the chain (positions and
# diagnostic information), while the second returned value is the tuned sampler
# which would allow continuation of sampling.
results = map(_ -> mcmc_with_warmup(Random.default_rng(), ∇P, 1000), 1:5)
# We use the transformation to obtain the posterior from the chain.
posterior = transform.(t, eachcol(pool_posterior_matrices(results)));
# Extract the parameter posterior means: `β`,
posterior_β = mean(first, posterior)
# then `σ`:
posterior_σ = mean(last, posterior)
# Effective sample sizes (of untransformed draws)
ess, R̂ = ess_rhat(stack_posterior_matrices(results))
# summarize NUTS-specific statistics of all chains
summarize_tree_statistics(mapreduce(x -> x.tree_statistics, vcat, results))