Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_rgs(self):
np.random.seed(1)
n_samples = 10000
test_size = 0.2
n_est = 100
max_depth = 7
lr = 0.1
X, y = make_friedman1_poly(n_samples=n_samples)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=test_size)
model_palo = PaloBoost(distribution="gaussian",
n_estimators=n_est,
learning_rate=lr,
max_depth=max_depth)
model_sklr = GradientBoostingRegressor(
n_estimators=n_est,
learning_rate=lr,
max_depth=max_depth)
model_palo.fit(X_train, y_train)
y_hat = model_palo.predict(X_test)
rmse_palo = np.sqrt(np.mean((y_test - y_hat)**2))
model_sklr.fit(X_train, y_train)
y_hat = model_sklr.predict(X_test)
rmse_sklr = np.sqrt(np.mean((y_test - y_hat)**2))
def test_classification():
X, y = make_hastie_10_2(n_samples=10000)
y[y<0] = 0
n, m = X.shape
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.5)
models = {"palobst": PaloBoost(distribution="bernoulli",
n_estimators=10,
learning_rate=1.0,
max_depth=4,
subsample=0.5),
"palofrst": PaloForest(distribution="bernoulli",
n_estimators=10,
learning_rate=1.0,
max_depth=4,
subsample0=0.5),
"gbm": GBM(distribution="bernoulli",
n_estimators=10,
learning_rate=1.0,
max_depth=4,
subsample=0.5),
"sklearn": GradientBoostingClassifier(
n_estimators=10,
def test_regression():
X, y = make_friedman1(n_samples=10000, noise=5)
n, m = X.shape
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.5)
models = {"palobst": PaloBoost(distribution="gaussian",
n_estimators=100,
learning_rate=1.0,
max_depth=4,
subsample=0.5),
"palofrst": PaloForest(distribution="gaussian",
n_estimators=100,
learning_rate=1.0,
max_depth=4,
subsample0=0.5),
"gbm": GBM(distribution="gaussian",
n_estimators=100,
learning_rate=1.0,
max_depth=4,
subsample=0.5),
"sklearn": GradientBoostingRegressor(
n_estimators=100,
def test_cls(self):
np.random.seed(1)
n_samples = 10000
test_size = 0.2
n_est = 100
max_depth = 7
lr = 0.1
X, y = make_hastie_11_2(n_samples)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=test_size)
model_palo = PaloBoost(distribution="bernoulli",
n_estimators=n_est,
learning_rate=lr,
max_depth=max_depth)
model_sklr = GradientBoostingClassifier(
n_estimators=n_est,
learning_rate=lr,
max_depth=max_depth)
model_palo.fit(X_train, y_train)
y_hat = model_palo.predict_proba(X_test)[:,1]
auc_palo = roc_auc_score(y_test, y_hat)
model_sklr.fit(X_train, y_train)
y_hat = model_sklr.predict_proba(X_test)[:,1]
auc_sklr = roc_auc_score(y_test, y_hat)
def run_aux(learning_rate, max_depth, n_estimators=200):
X, y = get_friedman()
model = PaloBoost(distribution="gaussian",
n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
subsample=0.7)
model.fit(X, y)
prune_df = pd.DataFrame(model.get_prune_stats())
prune_df.columns = ["iteration", "nodes_pre", "nodes_post"]
lr_df = pd.DataFrame(model.get_lr_stats())
lr_df.columns = ["iteration", "lr"]
prune_df.to_csv("results/prune_{}_{}.csv".format(learning_rate, max_depth),
index=False)
lr_df.to_csv("results/lr_{}_{}.csv".format(learning_rate, max_depth),
index=False)
if self.block_size is not None:
n_block = int(n/self.block_size) + 1
mask_block = (np.random.rand(n_block) < self.subsample0)
mask = np.repeat(mask_block, self.block_size)[:n]
else:
mask = (np.random.rand(n) < self.subsample0)
X_i, y_i = X[mask,:], y[mask]
X_j, y_j = X[~mask,:], y[~mask]
if (self.distribution == "bernoulli" and
(np.unique(y_i).shape[0]==1 or
np.unique(y_j).shape[0]==1)):
continue
est = PaloBoost(distribution=self.distribution,
learning_rate=self.learning_rate,
max_depth=self.max_depth,
n_estimators=self.n_estimators,
subsample=self.subsample1,
subsample_splts=self.subsample2,
random_state=i*self.n_estimators)
est.fit(X_i, y_i)
self.estimators.append(est)
if self.feature_importances_ is None:
self.feature_importances_ = est.feature_importances_
else:
self.feature_importances_ += est.feature_importances_
if (self.distribution=="bernoulli" and
self.calibrate):
z_j = est.predict_proba(X_j)[:,1]
def clstask(X, y, n_estimators, learning_rate, max_depth, n_btstrp,
has_missing, test_size, add_noise):
models = {"0. PaloBoost": PaloBoost(distribution="bernoulli",
n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
subsample=0.7),
"1. SGTB-Bonsai": GBM(distribution="bernoulli",
n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
subsample=0.7),
"2. XGBoost": XGBClassifier(
n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
subsample=0.7)}
if not has_missing:
models["3. Scikit-Learn"] = GradientBoostingClassifier(
def regtask(X, y, n_estimators, learning_rate, max_depth, n_btstrp,
has_missing, test_size):
models = {"0. PaloBoost": PaloBoost(distribution="gaussian",
n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
subsample=0.7),
"1. SGTB-Bonsai": GBM(distribution="gaussian",
n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
subsample=0.7),
"2. XGBoost": XGBRegressor(
n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
subsample=0.7)}
if not has_missing:
models["3. Scikit-Learn"] = GradientBoostingRegressor(
def load(self, model):
# NOTE: not yet
self.calibrators = model["clb"]
self.calibrate = model["calibrate"]
self.distribution = model["distribution"]
self.estimators = []
for d in model["est"]:
est = PaloBoost()
est.load(d)
self.estimators.append(est)