Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# rearrange the data to load it PyMC model.
coin = [] # list/vector index for each coins (from 0 to number of coins)
y = [] # list/vector with head (1) or tails (0) for each flip.
for i, flips in enumerate(N):
heads = z[i]
if heads > flips:
sys.exit("The number of heads can't be greater than the number of flips")
else:
y = y + [1] * heads + [0] * (flips-heads)
coin = coin + [i] * flips
# Specify the model in PyMC
with pm.Model() as model:
# define the hyperparameters
mu = pm.Beta('mu', 2, 2)
kappa = pm.Gamma('kappa', 1, 0.1)
# define the prior
theta = pm.Beta('theta', mu * kappa, (1 - mu) * kappa, shape=len(N))
# define the likelihood
y = pm.Bernoulli('y', p=theta[coin], observed=y)
# Generate a MCMC chain
trace = pm.sample(5000, random_seed=123)
## Check the results.
## Print summary for each trace
#pm.df_summary(trace)
## Check for mixing and autocorrelation
pm.autocorrplot(trace, varnames=['mu', 'kappa'])
"regressors_%s" % self.name,
0,
self.regressors_prior_scale,
shape=len(self.regressors),
)
if self.growth and "growth" not in self.priors:
self.priors["growth"] = pm.Normal("growth_%s" % self.name, 0, 0.1)
if (
len(self.changepoints)
and "changepoints" not in self.priors
and len(self.changepoints)
):
if self.auto_changepoints:
k = self.n_changepoints
alpha = pm.Gamma("alpha", 1.0, 1.0)
beta = pm.Beta("beta", 1.0, alpha, shape=k)
w1 = pm.Deterministic(
"w1",
tt.concatenate([[1], tt.extra_ops.cumprod(1 - beta)[:-1]])
* beta,
)
w, _ = theano.map(
fn=lambda x: tt.switch(tt.gt(x, 1e-4), x, 0), sequences=[w1]
)
self.w = pm.Deterministic("w", w)
else:
k = len(self.changepoints)
w = 1
cgpt = pm.Deterministic(
"cgpt",
pm.Laplace("cgpt_inner", 0, self.changepoints_prior_scale, shape=k)
* w,
# return logp_
def stick_breaking(v):
portion_remaining = tt.concatenate(
[[1], tt.extra_ops.cumprod(1 - v)[:-1]])
return v * portion_remaining
model = pm.Model()
with model:
K = self.num_truncate
D = self.num_pred
alpha = pm.Gamma('alpha', 1.0, 1.0)
v = pm.Beta('v', 1, alpha, shape=K)
pi_ = stick_breaking(v)
pi = pm.Deterministic('pi', pi_/pi_.sum())
means = tt.stack([pm.Uniform('cluster_center_{}'.format(k),
lower=0.,
upper=10.,
shape=D) for k in range(K)])
lower = tt.stack([pm.LKJCholeskyCov(
'cluster_variance_{}'.format(k),
n=D,
eta=2.,
sd_dist=pm.HalfNormal.dist(sd=1.)) for k in range(K)])
chol = tt.stack([pm.expand_packed_triangular(
D, lower[k]) for k in range(K)])
# Extract variables: test score, gender, number of siblings, previous disability, age,
# mother with HS education or better, hearing loss identified by 3 months
# of age
(score, male, siblings, disability,
age, mother_hs, early_ident) = test_scores[['score', 'male', 'siblings',
'prev_disab', 'age_test',
'mother_hs', 'early_ident']].astype(float).values.T
with pm.Model() as model:
# Impute missing values
sib_mean = pm.Exponential('sib_mean', 1.)
siblings_imp = pm.Poisson('siblings_imp', sib_mean,
observed=siblings)
p_disab = pm.Beta('p_disab', 1., 1.)
disability_imp = pm.Bernoulli(
'disability_imp', p_disab, observed=masked_values(disability, value=-999))
p_mother = pm.Beta('p_mother', 1., 1.)
mother_imp = pm.Bernoulli('mother_imp', p_mother,
observed=masked_values(mother_hs, value=-999))
s = pm.HalfCauchy('s', 5., testval=5)
beta = pm.Laplace('beta', 0., 100., shape=7, testval=.1)
expected_score = (beta[0] + beta[1] * male + beta[2] * siblings_imp + beta[3] * disability_imp +
beta[4] * age + beta[5] * mother_imp + beta[6] * early_ident)
observed_score = pm.Normal(
'observed_score', expected_score, s, observed=score)
@convert_dist_to_rv.register(pm.Beta, object)
def convert_dist_to_rv_Beta(dist, rng):
size = dist.shape.astype(int)[BetaRV.ndim_supp :]
res = BetaRV(dist.alpha, dist.beta, size=size, rng=rng)
return res
# model, and present a few possible work arounds.
import pymc3 as mc
# We define a simple model of a survey with one data point. We use a $Beta$
# distribution for the $p$ parameter in a binomial. We would like to know both
# the posterior distribution for p, as well as the predictive posterior
# distribution over the survey parameter.
alpha = 4
beta = 4
n = 20
yes = 15
with mc.Model() as model:
p = mc.Beta('p', alpha, beta)
surv_sim = mc.Binomial('surv_sim', n=n, p=p)
surv = mc.Binomial('surv', n=n, p=p, observed=yes)
# First let's try and use `find_MAP`.
with model:
print(mc.find_MAP())
# `find_map` defaults to find the MAP for only the continuous variables we have
# to specify if we would like to use the discrete variables.
with model:
print(mc.find_MAP(vars=model.vars))
# We set the `disp` variable to display a warning that we are using a
# non-gradient minimization technique, as discrete variables do not give much
"""
from __future__ import division
import matplotlib.pyplot as plt
plt.style.use('seaborn-darkgrid')
import numpy as np
import pymc3 as pm
# Generate the data
y1 = np.array([1, 1, 1, 1, 1, 0, 0]) # 5 heads and 2 tails
y2 = np.array([1, 1, 0, 0, 0, 0, 0]) # 2 heads and 5 tails
with pm.Model() as model:
# define the prior
theta1 = pm.Beta('theta1', 3, 3) # prior
theta2 = pm.Beta('theta2', 3, 3) # prior
# define the likelihood
y1 = pm.Bernoulli('y1', p=theta1, observed=y1)
y2 = pm.Bernoulli('y2', p=theta2, observed=y2)
# Generate a MCMC chain
trace = pm.sample(1000)
# create an array with the posterior sample
theta1_sample = trace['theta1']
theta2_sample = trace['theta2']
# Plot the trajectory of the last 500 sampled values.
plt.plot(theta1_sample[:-500], theta2_sample[:-500], marker='o', color='skyblue')
plt.xlim(0, 1)
plt.ylim(0, 1)
N = g_train.shape[0]
K = 30
x_plot = np.linspace(-10, 10, 500)
blue, *_ = sns.color_palette()
SEED = 54321
np.random.seed(SEED)
def stick_breaking(beta):
portion_remaining = tt.concatenate([[1], tt.extra_ops.cumprod(1 - beta)[:-1]])
return beta * portion_remaining
with pm.Model() as model:
alpha = pm.Gamma('alpha', 1., 1.)
beta = pm.Beta('beta', 1., alpha, shape=K)
w = pm.Deterministic('w', stick_breaking(beta))
tau = pm.Gamma('tau', 1., 1., shape=K)
lambda_ = pm.Uniform('lambda', 0, 5, shape=K)
mu = pm.Normal('mu', 0, tau=lambda_ * tau, shape=K)
obs = pm.NormalMixture('obs', w, mu, tau=lambda_ * tau,
observed=g_train[:,0])
with model:
trace = pm.sample(5000, random_seed=SEED)
pm.traceplot(trace, varnames=['alpha']);
# plots
fig, ax = plt.subplots(figsize=(8, 6))
z = np.array([45, 63, 58, 64, 58, 63, 51, 60, 59, 47, 63, 61, 60, 51, 59, 45,
61, 59, 60, 58, 63, 56, 63, 64, 64, 60, 64, 62, 49, 64, 64, 58, 64, 52, 64, 64,
64, 62, 64, 61, 59, 59, 55, 62, 51, 58, 55, 54, 59, 57, 58, 60, 54, 42, 59, 57,
59, 53, 53, 42, 59, 57, 29, 36, 51, 64, 60, 54, 54, 38, 61, 60, 61, 60, 62, 55,
38, 43, 58, 60, 44, 44, 32, 56, 43, 36, 38, 48, 32, 40, 40, 34, 45, 42, 41, 32,
48, 36, 29, 37, 53, 55, 50, 47, 46, 44, 50, 56, 58, 42, 58, 54, 57, 54, 51, 49,
52, 51, 49, 51, 46, 46, 42, 49, 46, 56, 42, 53, 55, 51, 55, 49, 53, 55, 40, 46,
56, 47, 54, 54, 42, 34, 35, 41, 48, 46, 39, 55, 30, 49, 27, 51, 41, 36, 45, 41,
53, 32, 43, 33])
condition = np.repeat([0,1,2,3], nSubj)
# Specify the model in PyMC
with pm.Model() as model:
# define the hyperparameters
kappa = pm.Gamma('kappa', 1, 0.1)
mu = pm.Beta('mu', 1, 1, shape=ncond)
# define the prior
theta = pm.Beta('theta', mu[condition] * kappa, (1 - mu[condition]) * kappa, shape=len(z))
# define the likelihood
y = pm.Binomial('y', p=theta, n=N, observed=z)
trace = pm.sample(1000)
## Check the results.
## Print summary for each trace
#pm.df_summary(trace)
## Check for mixing and autocorrelation
#pm.autocorrplot(trace, varnames=['mu', 'kappa'])
## Plot KDE and sampled values for each parameter.
pm.traceplot(trace)
K = 50
N = d_train.shape[0]
x_plot = np.arange(250)
blue, *_ = sns.color_palette()
SEED = 5132290 # from random.org
np.random.seed(SEED)
def stick_breaking(beta):
portion_remaining = tt.concatenate([[1], tt.extra_ops.cumprod(1 - beta)[:-1]])
return beta * portion_remaining
with pm.Model() as model:
alpha = pm.Gamma('alpha', 1., 1.)
beta = pm.Beta('beta', 1, alpha, shape=K)
w = pm.Deterministic('w', stick_breaking(beta))
mu = pm.Uniform('mu', 0., 300., shape=K)
obs = pm.Mixture('obs', w, pm.Poisson.dist(mu), observed=d_train[:,0])
with model:
step = pm.Metropolis()
trace = pm.sample(5000, step=step, random_seed=SEED)
# plots
pm.traceplot(trace, varnames=['alpha']);
fig, ax = plt.subplots(figsize=(8, 6))
plot_w = np.arange(K) + 1