Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_regression(self):
data, target = load_boston(True)
x_train, x_test, y_train, y_test = train_test_split(data, target,
test_size=0.2,
random_state=42)
ngb = NGBoost(Base=default_tree_learner, Dist=Normal, Score=MLE,
natural_gradient=True, verbose=False)
ngb.fit(x_train, y_train)
preds = ngb.predict(x_test)
score = mean_squared_error(y_test, preds)
assert score <= 8.0
m, n = 1000, 50
if args.noise_dist == "Normal":
noise = np.random.randn(*(m, 1))
elif args.noise_dist == "Laplace":
noise = sp.stats.laplace.rvs(size=(m, 1))
beta = np.random.randn(n, 1)
X = np.random.randn(m, n) / np.sqrt(n)
Y = X @ beta + 0.5 * noise + 20
print(X.shape, Y.shape)
ngb = NGBoost(n_estimators=100, learning_rate=1.,
Dist=eval(args.dist),
Base=default_linear_learner,
natural_gradient=True,
minibatch_frac=1.0,
Score=MLE())
ngb.fit(X, Y)
preds = ngb.pred_dist(X)
print(f"R2: {r2_score(Y, preds.loc):.4f}")
pctles, observed, slope, intercept = calibration_regression(preds, Y)
print(observed)
plt.figure(figsize = (8, 3))
plt.subplot(1, 2, 1)
plot_calibration_curve(pctles, observed)
plt.subplot(1, 2, 2)
plot_pit_histogram(pctles, observed)
plt.tight_layout()
plt.savefig("./figures/pit.pdf")
plt.show()
E = (T > Y).astype(int)
print(X.shape, Y.shape, E.shape)
print(f"Event rate: {np.mean(E):.2f}")
X_tr, X_te, Y_tr, Y_te, T_tr, T_te, E_tr, E_te = train_test_split(
X, Y, T, E, test_size=0.2
)
ngb = NGBSurvival(
Dist=Exponential,
n_estimators=args.n_estimators,
learning_rate=args.lr,
natural_gradient=True,
Base=default_linear_learner,
Score=MLE,
verbose=True,
verbose_eval=True,
)
train_losses = ngb.fit(X_tr, np.exp(np.minimum(Y_tr, T_tr)), E_tr)
preds = ngb.pred_dist(X_te)
print(f"R2: {r2_score(Y_te, np.log(preds.mean()))}")
plt.hist(preds.mean(), range=(0, 10), bins=30, alpha=0.5, label="Pred")
plt.hist(np.exp(Y_te), range=(0, 10), bins=30, alpha=0.5, label="True")
plt.legend()
plt.show()
# since we simulated the data we fully observe all outcomes
# calibration assuming complete observations
pctles, observed, slope, intercept = calibration_regression(preds, Y_te)
argparser = ArgumentParser()
argparser.add_argument("--n-estimators", type=int, default=301)
argparser.add_argument("--lr", type=float, default=0.03)
argparser.add_argument("--minibatch-frac", type=float, default=0.1)
argparser.add_argument("--natural", action="store_true")
args = argparser.parse_args()
x_tr, y_tr, _ = gen_data(n=50)
poly_transform = PolynomialFeatures(1)
x_tr = poly_transform.fit_transform(x_tr)
ngb = NGBoost(
Base=default_tree_learner,
Dist=Normal,
Score=MLE,
n_estimators=args.n_estimators,
learning_rate=args.lr,
natural_gradient=args.natural,
minibatch_frac=args.minibatch_frac,
verbose=True,
)
ngb.fit(x_tr, y_tr)
x_te, y_te, _ = gen_data(n=1000, bound=1.3)
x_te = poly_transform.transform(x_te)
preds = ngb.pred_dist(x_te)
pctles, obs, _, _ = calibration_regression(preds, y_te)
all_preds = ngb.staged_pred_dist(x_te)
argparser = ArgumentParser()
argparser.add_argument("--n-estimators", type=int, default=(1 + BLK * 100))
argparser.add_argument("--lr", type=float, default=0.03)
argparser.add_argument("--minibatch-frac", type=float, default=0.1)
argparser.add_argument("--natural", action="store_true")
args = argparser.parse_args()
x_tr, y_tr, _ = gen_data(n=100)
poly_transform = PolynomialFeatures(1)
x_tr = poly_transform.fit_transform(x_tr)
ngb = NGBoost(
Base=default_tree_learner,
Dist=Normal,
Score=MLE,
n_estimators=args.n_estimators,
learning_rate=args.lr,
natural_gradient=args.natural,
minibatch_frac=args.minibatch_frac,
verbose=True,
)
blk = int(args.n_estimators / 100)
ngb.fit(x_tr, y_tr)
x_te, y_te, _ = gen_data(n=1000, bound=1.3)
x_te = poly_transform.transform(x_te)
preds = ngb.pred_dist(x_te)
pctles, obs, _, _ = calibration_regression(preds, y_te)
"wine": lambda: pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv', delimiter=";"),
"kin8nm": lambda: pd.read_csv("data/uci/kin8nm.csv"),
"naval": lambda: pd.read_csv("data/uci/naval-propulsion.txt", delim_whitespace=True, header=None).iloc[:,:-1],
"power": lambda: pd.read_excel("data/uci/power-plant.xlsx"),
"energy": lambda: pd.read_excel("https://archive.ics.uci.edu/ml/machine-learning-databases/00242/ENB2012_data.xlsx").iloc[:,:-1],
"protein": lambda: pd.read_csv("data/uci/protein.csv")[['F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'RMSD']],
"yacht": lambda: pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/00243/yacht_hydrodynamics.data", header=None, delim_whitespace=True),
}
base_name_to_learner = {
"tree": default_tree_learner,
"linear": default_linear_learner,
}
score_name_to_score = {
"MLE": MLE,
"CRPS": CRPS,
}
class RegressionLogger(object):
def __init__(self, args):
self.args = args
self.verbose = args.verbose
self.r2s = []
self.mses = []
self.nlls = []
self.calib_scores = []
self.calib_slopes = []
def tick(self, forecast, y_test):
y_pred = forecast.mean.detach().numpy()