Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_pymc3_convert_dists():
"""Just a basic check that all PyMC3 RVs will convert to and from Theano RVs."""
tt.config.compute_test_value = "ignore"
theano.config.cxx = ""
with pm.Model() as model:
norm_rv = pm.Normal("norm_rv", 0.0, 1.0, observed=1.0)
mvnorm_rv = pm.MvNormal("mvnorm_rv", np.r_[0.0], np.c_[1.0], shape=1, observed=np.r_[1.0])
cauchy_rv = pm.Cauchy("cauchy_rv", 0.0, 1.0, observed=1.0)
halfcauchy_rv = pm.HalfCauchy("halfcauchy_rv", 1.0, observed=1.0)
uniform_rv = pm.Uniform("uniform_rv", observed=1.0)
gamma_rv = pm.Gamma("gamma_rv", 1.0, 1.0, observed=1.0)
invgamma_rv = pm.InverseGamma("invgamma_rv", 1.0, 1.0, observed=1.0)
exp_rv = pm.Exponential("exp_rv", 1.0, observed=1.0)
halfnormal_rv = pm.HalfNormal("halfnormal_rv", 1.0, observed=1.0)
beta_rv = pm.Beta("beta_rv", 2.0, 2.0, observed=1.0)
binomial_rv = pm.Binomial("binomial_rv", 10, 0.5, observed=5)
dirichlet_rv = pm.Dirichlet("dirichlet_rv", np.r_[0.1, 0.1], observed=np.r_[0.1, 0.1])
poisson_rv = pm.Poisson("poisson_rv", 10, observed=5)
bernoulli_rv = pm.Bernoulli("bernoulli_rv", 0.5, observed=0)
betabinomial_rv = pm.BetaBinomial("betabinomial_rv", 0.1, 0.1, 10, observed=5)
categorical_rv = pm.Categorical("categorical_rv", np.r_[0.5, 0.5], observed=1)
multinomial_rv = pm.Multinomial("multinomial_rv", 5, np.r_[0.5, 0.5], observed=np.r_[2])
negbinomial_rv = pm.NegativeBinomial("negbinomial_rv", 10.2, 0.5, observed=5)
# Convert to a Theano `FunctionGraph`
fgraph = model_graph(model)
rvs_by_name = {n.owner.inputs[1].name: n.owner.inputs[1] for n in fgraph.outputs}
def fit(self, X, B, T):
n, k = X.shape
with pymc3.Model() as m:
beta_sd = pymc3.Exponential('beta_sd', 1.0) # Weak prior for the regression coefficients
beta = pymc3.Normal('beta', mu=0, sd=beta_sd, shape=(k,)) # Regression coefficients
c = sigmoid(dot(X, beta)) # Conversion rates for each example
k = pymc3.Lognormal('k', mu=0, sd=1.0) # Weak prior around k=1
lambd = pymc3.Exponential('lambd', 0.1) # Weak prior
# PDF of Weibull: k * lambda * (x * lambda)^(k-1) * exp(-(t * lambda)^k)
LL_observed = log(c) + log(k) + log(lambd) + (k-1)*(log(T) + log(lambd)) - (T*lambd)**k
# CDF of Weibull: 1 - exp(-(t * lambda)^k)
LL_censored = log((1-c) + c * exp(-(T*lambd)**k))
# We need to implement the likelihood using pymc3.Potential (custom likelihood)
# https://github.com/pymc-devs/pymc3/issues/826
logp = B * LL_observed + (1 - B) * LL_censored
logpvar = pymc3.Potential('logpvar', logp.sum())
self.trace = pymc3.sample(n_simulations=500, tune=500, discard_tuned_samples=True, njobs=1)
print('done')
print('done 2')
'group': np.r_[['drug']*len(drug), ['placebo']*len(placebo)]
})
y_mean = y.value.mean()
y_std = y.value.std() * 2
sigma_low = 1
sigma_high = 10
with pm.Model():
group1_mean = pm.Normal('group1_mean', y_mean, sd=y_std)
group2_mean = pm.Normal('group2_mean', y_mean, sd=y_std)
group1_std = pm.Uniform('group1_std', lower=sigma_low, upper=sigma_high)
group2_std = pm.Uniform('group2_std', lower=sigma_low, upper=sigma_high)
lambda_1 = group1_std**-2
lambda_2 = group2_std**-2
nu = pm.Exponential('ν_minus_one', 1/29.) + 1
pm.StudentT('drug', nu=nu, mu=group1_mean, lam=lambda_1, observed=drug)
pm.StudentT('placebo', nu=nu, mu=group2_mean, lam=lambda_2, observed=placebo)
diff_of_means = pm.Deterministic('difference of means', group1_mean - group2_mean)
pm.Deterministic('difference of stds', group1_std - group2_std)
pm.Deterministic(
'effect size', diff_of_means / np.sqrt((group1_std**2 + group2_std**2) / 2))
pm.sample(draws=20000, cores=4, chains=4,
progressbar=False, compute_convergence_checks=False)
from numpy.ma import masked_values
# Import data, filling missing values with sentinels (-999)
test_scores = pd.read_csv(pm.get_data('test_scores.csv')).fillna(-999)
# Extract variables: test score, gender, number of siblings, previous disability, age,
# mother with HS education or better, hearing loss identified by 3 months
# of age
(score, male, siblings, disability,
age, mother_hs, early_ident) = test_scores[['score', 'male', 'siblings',
'prev_disab', 'age_test',
'mother_hs', 'early_ident']].astype(float).values.T
with pm.Model() as model:
# Impute missing values
sib_mean = pm.Exponential('sib_mean', 1.)
siblings_imp = pm.Poisson('siblings_imp', sib_mean,
observed=siblings)
p_disab = pm.Beta('p_disab', 1., 1.)
disability_imp = pm.Bernoulli(
'disability_imp', p_disab, observed=masked_values(disability, value=-999))
p_mother = pm.Beta('p_mother', 1., 1.)
mother_imp = pm.Bernoulli('mother_imp', p_mother,
observed=masked_values(mother_hs, value=-999))
s = pm.HalfCauchy('s', 5., testval=5)
beta = pm.Laplace('beta', 0., 100., shape=7, testval=.1)
expected_score = (beta[0] + beta[1] * male + beta[2] * siblings_imp + beta[3] * disability_imp +
beta[4] * age + beta[5] * mother_imp + beta[6] * early_ident)
# Time series of recorded coal mining disasters in the UK from 1851 to 1962
disasters_data = array([4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6,
3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5,
2, 2, 3, 4, 2, 1, 3, 2, 2, 1, 1, 1, 1, 3, 0, 0,
1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1,
0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2,
3, 3, 1, 1, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4,
0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1])
year = arange(1851, 1962)
with pm.Model() as model:
switchpoint = pm.DiscreteUniform('switchpoint', lower=year.min(), upper=year.max())
early_mean = pm.Exponential('early_mean', lam=1.)
late_mean = pm.Exponential('late_mean', lam=1.)
# Allocate appropriate Poisson rates to years before and after current
# switchpoint location
rate = tt.switch(switchpoint >= year, early_mean, late_mean)
disasters = pm.Poisson('disasters', rate, observed=disasters_data)
# Initial values for stochastic nodes
start = {'early_mean': 2., 'late_mean': 3.}
tr = pm.sample(1000, tune=500, start=start)
pm.traceplot(tr)
def apply_parameters(self, g, df, initialization_trace=None):
for node in nx.topological_sort(g):
parent_names = g.nodes()[node]["parent_names"]
if parent_names:
if not initialization_trace:
sd = np.array([df[node].std()] + (df[node].std() / df[parent_names].std()).tolist())
mu = np.array([df[node].std()] + (df[node].std() / df[parent_names].std()).tolist())
node_sd = df[node].std()
else:
node_sd = initialization_trace["{}_sd".format(node)].mean()
mu = initialization_trace["beta_{}".format(node)].mean(axis=0)
sd = initialization_trace["beta_{}".format(node)].std(axis=0)
g.nodes()[node]["parameters"] = pm.Normal("beta_{}".format(node), mu=mu, sd=sd,
shape=len(parent_names) + 1)
g.nodes()[node]["sd"] = pm.Exponential("{}_sd".format(node), lam=node_sd)
return g
def build_model():
# data
failure = np.array([0., 1.])
value = np.array([1., 0.])
# model
with pm.Model() as model:
lam = pm.Exponential('lam', 1.)
pm.DensityDist('x', logp, observed={'failure': failure,
'lam': lam,
'value': value})
return model
#fname = 'https://github.com/pymc-devs/pymc3/blob/master/pymc3/examples/data/SP500.csv'
#returns = pd.read_csv(fname)
#returns = pd.read_csv('SP500.csv')
#print(len(returns))
n = 400
returns = np.genfromtxt(pymc3.get_data_file('pymc3.examples', "data/SP500.csv"))[-n:]
returns[:5]
plt.plot(returns)
plt.ylabel('daily returns in %');
with pymc3.Model() as sp500_model:
nu = pymc3.Exponential('nu', 1./10, testval=5.)
sigma = pymc3.Exponential('sigma', 1./.02, testval=.1)
s = ts.GaussianRandomWalk('s', sigma**-2, shape=len(returns))
volatility_process = pymc3.Deterministic('volatility_process', pymc3.exp(-2*s))
r = pymc3.StudentT('r', nu, lam=1/volatility_process, observed=returns)
with sp500_model:
print('optimizing...')
start = pymc3.find_MAP(vars=[s], fmin=scipy.optimize.fmin_l_bfgs_b)
print('sampling... (slow!)')
step = pymc3.NUTS(scaling=start)
trace = pymc3.sample(100, step, progressbar=False)
# Start next run at the last sampled position.
step = pymc3.NUTS(scaling=trace[-1], gamma=.25)
trace = pymc3.sample(1000, step, start=trace[-1], progressbar=False)
# Re-center data at mean, to reduce autocorrelation in MCMC sampling.
# Standardize (divide by SD) to make initialization easier.
x_m = np.mean(x)
x_sd = np.std(x)
y_m = np.mean(y)
y_sd = np.std(y)
zx = (x - x_m) / x_sd
zy = (y - y_m) / y_sd
tdf_gain = 1 # 1 for low-baised tdf, 100 for high-biased tdf
# THE MODEL
with pm.Model() as model:
# define the priors
tdf = pm.Exponential('tdf', 1/30.)
sd = pm.HalfNormal('sd', 25)
beta0 = pm.Normal('beta0', mu=0, sd=100)
beta1 = pm.Normal('beta1', mu=0, sd=100)
mu = beta0 + beta1 * zx
# define the likelihood
yl = pm.StudentT('yl', mu=mu, sd=sd, nu=tdf, observed=zy)
# Generate a MCMC chain
trace = pm.sample(2000)
# EXAMINE THE RESULTS
## Print summary for each trace
#pm.summary(trace)
## Check for mixing and autocorrelation