Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if __name__ == "__main__":
argparser = ArgumentParser()
argparser.add_argument("--n-estimators", type=int, default=301)
argparser.add_argument("--lr", type=float, default=0.03)
argparser.add_argument("--minibatch-frac", type=float, default=0.1)
argparser.add_argument("--natural", action="store_true")
args = argparser.parse_args()
x_tr, y_tr, _ = gen_data(n=50)
poly_transform = PolynomialFeatures(1)
x_tr = poly_transform.fit_transform(x_tr)
ngb = NGBoost(
Base=default_tree_learner,
Dist=Normal,
Score=MLE,
n_estimators=args.n_estimators,
learning_rate=args.lr,
natural_gradient=args.natural,
minibatch_frac=args.minibatch_frac,
verbose=True,
)
ngb.fit(x_tr, y_tr)
x_te, y_te, _ = gen_data(n=1000, bound=1.3)
x_te = poly_transform.transform(x_te)
preds = ngb.pred_dist(x_te)
argparser = ArgumentParser()
argparser.add_argument("--dist", type=str, default="Normal")
argparser.add_argument("--noise-dist", type=str, default="Normal")
args = argparser.parse_args()
m, n = 1000, 50
if args.noise_dist == "Normal":
noise = np.random.randn(*(m, 1))
elif args.noise_dist == "Laplace":
noise = sp.stats.laplace.rvs(size=(m, 1))
beta = np.random.randn(n, 1)
X = np.random.randn(m, n) / np.sqrt(n)
Y = X @ beta + 0.5 * noise + 20
print(X.shape, Y.shape)
ngb = NGBoost(n_estimators=100, learning_rate=1.,
Dist=eval(args.dist),
Base=default_linear_learner,
natural_gradient=True,
minibatch_frac=1.0,
Score=MLE())
ngb.fit(X, Y)
preds = ngb.pred_dist(X)
print(f"R2: {r2_score(Y, preds.loc):.4f}")
pctles, observed, slope, intercept = calibration_regression(preds, Y)
print(observed)
plt.figure(figsize = (8, 3))
plt.subplot(1, 2, 1)
plot_calibration_curve(pctles, observed)
plt.subplot(1, 2, 2)
# load dataset -- use last column as label
data = dataset_name_to_loader[args.dataset]()
X, y = data.iloc[:,:-1].values, data.iloc[:,-1].values
# set default minibatch fraction based on dataset size
if not args.minibatch_frac:
args.minibatch_frac = min(0.8, 5000 / len(X))
logger = RegressionLogger(args)
for rep in range(args.n_reps):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
ngb = NGBoost(Base=base_name_to_learner[args.base],
Dist=Normal,
Score=score_name_to_score[args.score],
n_estimators=args.n_est,
learning_rate=args.lr,
natural_gradient=True,
second_order=True,
quadrant_search=True,
minibatch_frac=args.minibatch_frac,
nu_penalty=1e-5,
normalize_inputs=True,
normalize_outputs=True,
verbose=args.verbose)
ngb.fit(X_train, y_train)
forecast = ngb.pred_dist(X_test)
logger.tick(forecast, y_test)
import numpy as np
from sklearn.base import ClassifierMixin, RegressorMixin
from ngboost.ngboost import NGBoost
from ngboost.distns import Bernoulli, Normal
class NGBRegressor(NGBoost, RegressorMixin):
"""NGBoost for regression with Sklean API."""
def __init__(self, *args, **kwargs):
super(NGBRegressor, self).__init__(Dist=Normal, *args, **kwargs)
class NGBClassifier(NGBoost, ClassifierMixin):
"""NGBoost for classification with Sklean API.
Warning:
Dist need to be Bernoulli.
You can use this model for only binary classification.
"""
def __init__(self, *args, **kwargs):
super(NGBClassifier, self).__init__(Dist=Bernoulli, *args, **kwargs)
def predict(self, X):
dist = self.pred_dist(X)
return np.round(dist.prob)
"""
Probability prediction of Y at the points X=x at multiple boosting iterations
Parameters:
X : numpy array of predictors (n x p)
max_iter : largest number of boosting iterations to get the prediction for
Output:
A list of of the estimates of P(Y=k|X=x) of shape (n, K), one per boosting stage up to max_iter
"""
return [
dist.class_probs() for dist in self.staged_pred_dist(X, max_iter=max_iter)
]
class NGBSurvival(NGBoost, BaseEstimator):
"""
Constructor for NGBoost survival models.
NGBRegressor is a wrapper for the generic NGBoost class that facilitates survival analysis. Use this class if you want to predict an outcome that could take an infinite number of (ordered) values, but right-censoring is present in the observed data.
Parameters:
Dist : assumed distributional form of Y|X=x. A distribution from ngboost.distns, e.g. LogNormal
Score : rule to compare probabilistic predictions P̂ to the observed data y. A score from ngboost.scores, e.g. LogScore
Base : base learner to use in the boosting algorithm. Any instantiated sklearn regressor, e.g. DecisionTreeRegressor()
natural_gradient : logical flag indicating whether the natural gradient should be used
n_estimators : the number of boosting iterations to fit
learning_rate : the learning rate
minibatch_frac : the percent subsample of rows to use in each boosting iteration
col_sample : the percent subsample of columns to use in each boosting iteration
verbose : flag indicating whether output should be printed during fitting
verbose_eval : increment (in boosting iterations) at which output should be printed
def __getstate__(self):
state = super().__getstate__()
# Remove the unpicklable entries.
if self.Dist.__name__ == "DistWithUncensoredScore":
state["Dist"] = self.Dist.__base__
state["uncensor"] = True
return state
def __setstate__(self, state_dict):
if "uncensor" in state_dict.keys():
state_dict["Dist"] = state_dict["Dist"].uncensor(state_dict["Score"])
super().__setstate__(state_dict)
class NGBClassifier(NGBoost, BaseEstimator):
"""
Constructor for NGBoost classification models.
NGBRegressor is a wrapper for the generic NGBoost class that facilitates classification. Use this class if you want to predict an outcome that could take a discrete number of (unordered) values.
Parameters:
Dist : assumed distributional form of Y|X=x. A distribution from ngboost.distns, e.g. Bernoulli
Score : rule to compare probabilistic predictions P̂ to the observed data y. A score from ngboost.scores, e.g. LogScore
Base : base learner to use in the boosting algorithm. Any instantiated sklearn regressor, e.g. DecisionTreeRegressor()
natural_gradient : logical flag indicating whether the natural gradient should be used
n_estimators : the number of boosting iterations to fit
learning_rate : the learning rate
minibatch_frac : the percent subsample of rows to use in each boosting iteration
col_sample : the percent subsample of columns to use in each boosting iteration
verbose : flag indicating whether output should be printed during fitting
verbose_eval : increment (in boosting iterations) at which output should be printed
from ngboost.distns import (
Bernoulli,
ClassificationDistn,
LogNormal,
Normal,
RegressionDistn,
)
from ngboost.helpers import Y_from_censored
from ngboost.learners import default_tree_learner
from ngboost.ngboost import NGBoost
from ngboost.scores import LogScore
from sklearn.base import BaseEstimator
from sklearn.utils import check_array
class NGBRegressor(NGBoost, BaseEstimator):
"""
Constructor for NGBoost regression models.
NGBRegressor is a wrapper for the generic NGBoost class that facilitates regression. Use this class if you want to predict an outcome that could take an infinite number of (ordered) values.
Parameters:
Dist : assumed distributional form of Y|X=x. A distribution from ngboost.distns, e.g. Normal
Score : rule to compare probabilistic predictions P̂ to the observed data y. A score from ngboost.scores, e.g. LogScore
Base : base learner to use in the boosting algorithm. Any instantiated sklearn regressor, e.g. DecisionTreeRegressor()
natural_gradient : logical flag indicating whether the natural gradient should be used
n_estimators : the number of boosting iterations to fit
learning_rate : the learning rate
minibatch_frac : the percent subsample of rows to use in each boosting iteration
col_sample : the percent subsample of columns to use in each boosting iteration
verbose : flag indicating whether output should be printed during fitting
verbose_eval : increment (in boosting iterations) at which output should be printed
import numpy as np
from sklearn.base import ClassifierMixin, RegressorMixin
from ngboost.ngboost import NGBoost
from ngboost.distns import Bernoulli, Normal
class NGBRegressor(NGBoost, RegressorMixin):
"""NGBoost for regression with Sklean API."""
def __init__(self, *args, **kwargs):
super(NGBRegressor, self).__init__(Dist=Normal, *args, **kwargs)
class NGBClassifier(NGBoost, ClassifierMixin):
"""NGBoost for classification with Sklean API.
Warning:
Dist need to be Bernoulli.
You can use this model for only binary classification.
"""
def __init__(self, *args, **kwargs):
super(NGBClassifier, self).__init__(Dist=Bernoulli, *args, **kwargs)
def predict(self, X):