Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def two_gaussians(x):
log_like1 = - 0.5 * number_of_parameters * tt.log(2 * num.pi) \
- 0.5 * tt.log(dsigma) \
- 0.5 * (x - mu1).T.dot(isigma).dot(x - mu1)
log_like2 = - 0.5 * number_of_parameters * tt.log(2 * num.pi) \
- 0.5 * tt.log(dsigma) \
- 0.5 * (x - mu2).T.dot(isigma).dot(x - mu2)
return tt.log(w1 * tt.exp(log_like1) + w2 * tt.exp(log_like2))
with pm.Model() as self.PT_test:
for data_key in self.data_keys:
if data_key != "like":
uniform = pm.Uniform(data_key, shape=number_of_parameters, lower=-2. * num.ones_like(mu1),
upper=2. * num.ones_like(mu1), testval=-1. * num.ones_like(mu1), transform=None)
else:
like = pm.Deterministic('tmp', two_gaussians(uniform))
pm.Potential(self.data_keys[self.like_index], like)
# create or get test folder to write files.
self.test_dir_path = os.path.join(os.path.dirname(__file__), 'PT_TEST')
if not os.path.exists(self.test_dir_path):
try:
os.mkdir(self.test_dir_path)
except IOError as e:
print(e)
# create data.
data_increment = 1
self.lpoint = []
for data_key in self.data_keys:
if data_key != "like":
chain_data = num.arange(number_of_parameters).astype(num.float)*data_increment
def fit(self, X, Y, n_samples=10000, tune_steps=1000, n_jobs=4):
with pm.Model() as self.model:
# Priors
std = pm.Uniform("std", 0, self.sps, testval=X.std())
beta = pm.StudentT("beta", mu=0, lam=self.sps, nu=self.nu)
alpha = pm.StudentT("alpha", mu=0, lam=self.sps, nu=self.nu, testval=Y.mean())
# Deterministic model
mean = pm.Deterministic("mean", alpha + beta * X)
# Posterior distribution
obs = pm.Normal("obs", mu=mean, sd=std, observed=Y)
## Run MCMC
# Find search start value with maximum a posterior estimation
start = pm.find_MAP()
# sample posterior distribution for latent variables
trace = pm.sample(n_samples, njobs=n_jobs, tune=tune_steps, start=start)
# Recover posterior samples
self.burned_trace = trace[int(n_samples / 2):]
Args:
X (pd.DataFrame): predictors to determine imputed values.
Returns:
np.array: imputated dataset.
"""
# check if fitted then predict with least squares
check_is_fitted(self, "statistics_")
model = self.statistics_["param"]["model"]
labels = self.statistics_["param"]["labels"]
# add a Deterministic node for each missing value
# sampling then pulls from the posterior predictive distribution
# each missing data point. I.e. distribution for EACH missing
with model:
p_pred = pm.Deterministic(
"p_pred", pm.invlogit(model["alpha"] + model["beta"].dot(X.T))
)
tr = pm.sample(
self.sample,
tune=self.tune,
init=self.init,
**self.sample_kwargs
)
self.trace_ = tr
# decide how to impute. Use mean of posterior predictive or random draw
# not supported yet, but eventually consider using the MAP
if not self.fill_value or self.fill_value == "mean":
imp = tr["p_pred"].mean(0)
elif self.fill_value == "random":
imp = np.apply_along_axis(np.random.choice, 0, tr["p_pred"])
)
with self.model:
if self.seasonality:
if not isinstance(additive_seasonality, np.ndarray):
pm.Deterministic(
"additive_seasonality_hat_%s" % self.name, additive_seasonality
)
if not isinstance(multiplicative_seasonality, np.ndarray):
pm.Deterministic(
"multiplicative_seasonality_hat_%s" % self.name,
multiplicative_seasonality,
)
if self.regressors:
if not isinstance(additive_regressors, np.ndarray):
pm.Deterministic(
"additive_regressors_hat_%s" % self.name, additive_regressors
)
if not isinstance(multiplicative_regressors, np.ndarray):
pm.Deterministic(
"multiplicative_regressors_hat_%s" % self.name,
multiplicative_regressors,
)
if self.holidays:
if not isinstance(additive_holidays, np.ndarray):
pm.Deterministic(
"additive_holidays_hat_%s" % self.name, additive_holidays
)
if not isinstance(multiplicative_holidays, np.ndarray):
pm.Deterministic(
"multiplicative_holidays_hat_%s" % self.name,
multiplicative_holidays,
with pm.Model() as self.model:
# getting the location primers
for layer_index in range(self.num_layers):
setattr(self, 'w%d' % layer_index, self.__get_weights(layer_index, self.weight_shapes[layer_index]))
setattr(self, 'b%d' % layer_index, self.__get_biases(layer_index, self.bias_shapes[layer_index]))
if layer_index == 0:
fc = pm.Deterministic('fc%d' % layer_index, pm.math.tanh(pm.math.dot(self.network_input, self.weight(layer_index)) + self.bias(layer_index)))
setattr(self, 'fc%d' % layer_index, fc)
elif 0 < layer_index < self.num_layers - 1:
fc = pm.Deterministic('fc%d' % layer_index, pm.math.tanh(pm.math.dot(getattr(self, 'fc%d' % (layer_index - 1)), self.weight(layer_index)) + self.bias(layer_index)))
setattr(self, 'fc%d' % layer_index, fc)
else:
self._loc = pm.Deterministic('bnn_out', pm.math.sigmoid(pm.math.dot(getattr(self, 'fc%d' % (layer_index - 1)), self.weight(layer_index)) + self.bias(layer_index)) )
# getting the precision / standard deviation / variance
self.tau_rescaling = np.zeros((self.num_obs, self.network_input.shape[1]))
for obs_index in range(self.num_obs):
self.tau_rescaling[obs_index] += self.var_e_ranges
self.tau_rescaling = self.tau_rescaling**2
tau = pm.Gamma('tau', self.num_obs**2, 1., shape = (self.num_obs, self.network_input.shape[1]))
self.tau = tau / self.tau_rescaling
self.scale = pm.Deterministic('scale', 1. / pm.math.sqrt(self.tau))
# learn the floats
self.loc = pm.Deterministic('loc', (self.upper_rescalings - self.lower_rescalings) * self._loc + self.lower_rescalings)
self.out_floats = pm.Normal('out_floats', self.loc[:, self.floats], tau = self.tau[:, self.floats], observed = self.network_output[:, self._floats])
tt.concatenate([[1], tt.extra_ops.cumprod(1 - beta)[:-1]])
* beta,
)
w, _ = theano.map(
fn=lambda x: tt.switch(tt.gt(x, 1e-4), x, 0), sequences=[w1]
)
self.w = pm.Deterministic("w", w)
else:
k = len(self.changepoints)
w = 1
cgpt = pm.Deterministic(
"cgpt",
pm.Laplace("cgpt_inner", 0, self.changepoints_prior_scale, shape=k)
* w,
)
self.priors["changepoints"] = pm.Deterministic(
"changepoints_%s" % self.name, cgpt
)
if self.intercept and "intercept" not in self.priors:
self.priors["intercept"] = pm.Normal(
"intercept_%s" % self.name,
self.data["y"].mean(),
self.data["y"].std() * 2,
)
self.priors_names = {k: v.name for k, v in self.priors.items()}
#returns = pd.read_csv('SP500.csv')
#print(len(returns))
n = 400
returns = np.genfromtxt(pymc3.get_data_file('pymc3.examples', "data/SP500.csv"))[-n:]
returns[:5]
plt.plot(returns)
plt.ylabel('daily returns in %');
with pymc3.Model() as sp500_model:
nu = pymc3.Exponential('nu', 1./10, testval=5.)
sigma = pymc3.Exponential('sigma', 1./.02, testval=.1)
s = ts.GaussianRandomWalk('s', sigma**-2, shape=len(returns))
volatility_process = pymc3.Deterministic('volatility_process', pymc3.exp(-2*s))
r = pymc3.StudentT('r', nu, lam=1/volatility_process, observed=returns)
with sp500_model:
print('optimizing...')
start = pymc3.find_MAP(vars=[s], fmin=scipy.optimize.fmin_l_bfgs_b)
print('sampling... (slow!)')
step = pymc3.NUTS(scaling=start)
trace = pymc3.sample(100, step, progressbar=False)
# Start next run at the last sampled position.
step = pymc3.NUTS(scaling=trace[-1], gamma=.25)
trace = pymc3.sample(1000, step, start=trace[-1], progressbar=False)
pymc3.traceplot(trace, [nu, sigma]);
#returns = pd.read_csv('SP500.csv')
#print(len(returns))
n = 400
returns = np.genfromtxt(pymc3.get_data_file('pymc3.examples', "data/SP500.csv"))[-n:]
returns[:5]
plt.plot(returns)
plt.ylabel('daily returns in %');
with pymc3.Model() as sp500_model:
nu = pymc3.Exponential('nu', 1./10, testval=5.)
sigma = pymc3.Exponential('sigma', 1./.02, testval=.1)
s = ts.GaussianRandomWalk('s', sigma**-2, shape=len(returns))
volatility_process = pymc3.Deterministic('volatility_process', pymc3.exp(-2*s))
r = pymc3.StudentT('r', nu, lam=1/volatility_process, observed=returns)
with sp500_model:
print('optimizing...')
start = pymc3.find_MAP(vars=[s], fmin=scipy.optimize.fmin_l_bfgs_b)
print('sampling... (slow!)')
step = pymc3.NUTS(scaling=start)
trace = pymc3.sample(100, step, progressbar=False)
# Start next run at the last sampled position.
step = pymc3.NUTS(scaling=trace[-1], gamma=.25)
trace = pymc3.sample(1000, step, start=trace[-1], progressbar=False)
pymc3.traceplot(trace, [nu, sigma]);
K = 30
x_plot = np.linspace(-10, 10, 500)
blue, *_ = sns.color_palette()
SEED = 54321
np.random.seed(SEED)
def stick_breaking(beta):
portion_remaining = tt.concatenate([[1], tt.extra_ops.cumprod(1 - beta)[:-1]])
return beta * portion_remaining
with pm.Model() as model:
alpha = pm.Gamma('alpha', 1., 1.)
beta = pm.Beta('beta', 1., alpha, shape=K)
w = pm.Deterministic('w', stick_breaking(beta))
tau = pm.Gamma('tau', 1., 1., shape=K)
lambda_ = pm.Uniform('lambda', 0, 5, shape=K)
mu = pm.Normal('mu', 0, tau=lambda_ * tau, shape=K)
obs = pm.NormalMixture('obs', w, mu, tau=lambda_ * tau,
observed=g_train[:,0])
with model:
trace = pm.sample(5000, random_seed=SEED)
pm.traceplot(trace, varnames=['alpha']);
# plots
fig, ax = plt.subplots(figsize=(8, 6))
plot_w = np.arange(K) + 1
label = "%s_%s" % (label, key)
return self._build_dist(spec, label, value.name, **value.args)
return value
kwargs = {k: _expand_args(k, v, label) for (k, v) in kwargs.items()}
# Non-centered parameterization for hyperpriors
if (
spec.noncentered
and "sd" in kwargs
and "observed" not in kwargs
and isinstance(kwargs["sd"], pm.model.TransformedRV)
):
old_sd = kwargs["sd"]
_offset = pm.Normal(label + "_offset", mu=0, sd=1, shape=kwargs["shape"])
return pm.Deterministic(label, _offset * old_sd)
return dist(label, **kwargs)