Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def fit(self, X, Y, n_samples=10000, tune_steps=1000, n_jobs=4):
with pm.Model() as self.model:
# Priors
std = pm.Uniform("std", 0, self.sps, testval=X.std())
beta = pm.StudentT("beta", mu=0, lam=self.sps, nu=self.nu)
alpha = pm.StudentT("alpha", mu=0, lam=self.sps, nu=self.nu, testval=Y.mean())
# Deterministic model
mean = pm.Deterministic("mean", alpha + beta * X)
# Posterior distribution
obs = pm.Normal("obs", mu=mean, sd=std, observed=Y)
## Run MCMC
# Find search start value with maximum a posterior estimation
start = pm.find_MAP()
# sample posterior distribution for latent variables
trace = pm.sample(n_samples, njobs=n_jobs, tune=tune_steps, start=start)
# Recover posterior samples
self.burned_trace = trace[int(n_samples / 2):]
def plot_priors(self, var_names=None):
if not self.built:
raise ValueError("Cannot plot priors until model is built!")
with pm.Model():
# get priors for fixed fx, separately for each level of each
# predictor
dists = []
for fixed_term in self.fixed_terms.values():
if var_names is not None and fixed_term.name not in var_names:
continue
for i, level in enumerate(fixed_term.levels):
params = {
k: v[i % len(v)] if isinstance(v, np.ndarray) else v
for k, v in fixed_term.prior.args.items()
}
dists += [getattr(pm, fixed_term.prior.name)(level, **params)]
# get priors for random effect SDs
for random_term in self.random_terms.values():
if var_names is not None and random_term.name not in var_names:
def run(steppers, p):
steppers = set(steppers)
traces = {}
effn = {}
runtimes = {}
with pm.Model() as model:
if USE_XY:
x = pm.Flat('x')
y = pm.Flat('y')
mu = np.array([0.,0.])
cov = np.array([[1.,p],[p,1.]])
z = pm.MvNormal.dist(mu=mu, cov=cov, shape=(2,)).logp(tt.stack([x,y]))
pot = pm.Potential('logp_xy', z)
start = {'x': 0, 'y': 0}
else:
mu = np.array([0.,0.])
cov = np.array([[1.,p],[p,1.]])
z = pm.MvNormal('z', mu=mu, cov=cov, shape=(2,))
start={'z': [0, 0]}
for step_cls in steppers:
name = step_cls.__name__
NxLvl = len(set(x))
# # Construct list of all pairwise comparisons, to compare with NHST TukeyHSD:
contrast_dict = None
for g1idx in range(NxLvl):
for g2idx in range(g1idx+1, NxLvl):
cmpVec = np.repeat(0, NxLvl)
cmpVec[g1idx] = -1
cmpVec[g2idx] = 1
contrast_dict = (contrast_dict, cmpVec)
z = (y - np.mean(y))/np.std(y)
## THE MODEL.
with pm.Model() as model:
# define the hyperpriors
a_SD_unabs = pm.StudentT('a_SD_unabs', mu=0, lam=0.001, nu=1)
a_SD = abs(a_SD_unabs) + 0.1
atau = 1 / a_SD**2
# define the priors
sigma = pm.Uniform('sigma', 0, 10) # y values are assumed to be standardized
tau = 1 / sigma**2
a0 = pm.Normal('a0', mu=0, tau=0.001) # y values are assumed to be standardized
a = pm.Normal('a', mu=0 , tau=atau, shape=NxLvl)
b = pm.Deterministic('b', a - T.mean(a))
mu = a0 + b[x]
# define the likelihood
yl = pm.Normal('yl', mu, tau=tau, observed=z)
# Generate a MCMC chain
trace = pm.sample(2000, progressbar=False)
def bayesianCenter(data):
with pm.Model():
loc = pm.Uniform('location', lower=-1000., upper=1000.)
scale = pm.Uniform('scale', lower=0.01, upper=1000.)
pm.Cauchy('y', alpha=loc, beta=scale, observed=data)
trace = pm.sample(3000, tune=3000, target_accept=0.92)
pm.traceplot(trace)
plt.show()
return np.mean(trace['location'])
Returns
----------
the PyMC3 model
"""
model_input = theano.shared(
np.zeros([self.num_training_samples, self.num_pred]))
model_output = theano.shared(np.zeros(self.num_training_samples))
self.shared_vars = {
'model_input': model_input,
'model_output': model_output,
}
model = pm.Model()
with model:
alpha = pm.Normal('alpha', mu=0, sd=100, shape=1)
betas = pm.Normal('betas', mu=0, sd=100, shape=(1, self.num_pred))
s = pm.HalfNormal('s', tau=1)
mean = alpha + tt.sum(betas * model_input, 1)
y = pm.Normal('y', mu=mean, sd=s, observed=model_output)
return model
def build_model():
with pm.Model() as model:
x = pm.Normal('x', 1, 1)
x2 = pm.Potential('x2', -x ** 2)
return model
# def logp_(value):
# logps = [T.log(pi[i]) + logp_normal(mu, tau, value)
# for i, mu in enumerate(mus)]
#
# return T.sum(
# logsumexp(T.stacklists(logps)[:, :self.num_training_samples],
# axis=0))
#
# return logp_
def stick_breaking(v):
portion_remaining = tt.concatenate(
[[1], tt.extra_ops.cumprod(1 - v)[:-1]])
return v * portion_remaining
model = pm.Model()
with model:
K = self.num_truncate
D = self.num_pred
alpha = pm.Gamma('alpha', 1.0, 1.0)
v = pm.Beta('v', 1, alpha, shape=K)
pi_ = stick_breaking(v)
pi = pm.Deterministic('pi', pi_/pi_.sum())
means = tt.stack([pm.Uniform('cluster_center_{}'.format(k),
lower=0.,
upper=10.,
shape=D) for k in range(K)])
This model estimates the population prevalence of respiratory syncytial virus (RSV) among children in Amman, Jordan, based on 3 years of admissions diagnosed with RSV to Al Bashir hospital.
To estimate this parameter from raw counts of diagnoses, we need to establish the population of 1-year-old children from which the diagnosed individuals were sampled. This involved correcting census data (national estimate of 1-year-olds) for the proportion of the population in the city, as well as for the market share of the hospital. The latter is based on expert esimate, and hence encoded as a prior.
'''
import pymc3 as pm
import numpy as np
# 1-year-old children in Jordan
kids = np.array([180489, 191817, 190830])
# Proportion of population in Amman
amman_prop = 0.35
# infant RSV cases in Al Bashir hostpital
rsv_cases = np.array([40, 59, 65])
with pm.Model() as model:
# Al Bashir hospital market share
market_share = pm.Uniform('market_share', 0.5, 0.6)
# Number of 1 y.o. in Amman
n_amman = pm.Binomial('n_amman', kids, amman_prop, shape=3)
# Prior probability
prev_rsv = pm.Beta('prev_rsv', 1, 5, shape=3)
# RSV in Amman
y_amman = pm.Binomial('y_amman', n_amman, prev_rsv, shape=3, testval=100)
# Likelihood for number with RSV in hospital (assumes Pr(hosp | RSV) = 1)
y_hosp = pm.Binomial('y_hosp', y_amman, market_share, observed=rsv_cases)
# Modified from
# https://github.com/pymc-devs/pymc3/blob/master/pymc3/examples/gelman_schools.py
import numpy as np
import matplotlib.pyplot as plt
from pymc3 import Model, Normal, HalfNormal, HalfCauchy, sample, traceplot, loo
J = 8
y = np.array([28, 8, -3, 7, -1, 1, 18, 12])
sigma = np.array([15, 10, 16, 11, 9, 11, 10, 18])
# Schools model defined at https://raw.githubusercontent.com/wiki/stan-dev/rstan/8schools.stan
with Model() as schools:
print('building model...')
eta = Normal('eta', 0, 1, shape=J)
mu = Normal('mu', 0, sd=1e6)
tau = HalfCauchy('tau', 25) # original model uses U[0,infty]
theta = mu + tau*eta
obs = Normal('obs', theta, sd=sigma, observed=y)
with schools:
print('sampling...')
tr = sample(1000)
l = loo(tr) # -29.6821436703
print('LOO estimate {}'.format(l))
traceplot(tr)