Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
y[y<0] = 0
n, m = X.shape
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.5)
models = {"palobst": PaloBoost(distribution="bernoulli",
n_estimators=10,
learning_rate=1.0,
max_depth=4,
subsample=0.5),
"palofrst": PaloForest(distribution="bernoulli",
n_estimators=10,
learning_rate=1.0,
max_depth=4,
subsample0=0.5),
"gbm": GBM(distribution="bernoulli",
n_estimators=10,
learning_rate=1.0,
max_depth=4,
subsample=0.5),
"sklearn": GradientBoostingClassifier(
n_estimators=10,
learning_rate=1.0,
max_depth=4,
subsample=0.5)}
print("\n")
print("# Test Classification")
print("-----------------------------------------------------")
print(" model_name train_time predict_time auc ")
print("-----------------------------------------------------")
print(" {0:12} {1:12} {2:12} {3:.5f}".format(
def test_regression():
X, y = make_friedman1(n_samples=1000, noise=1.0)
#X, y = make_friedman2(n_samples=100000)
#X, y = make_friedman3(n_samples=100000)
n, m = X.shape
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2)
models = {"bonsai-gbm": GBM(distribution="gaussian",
n_estimators=100,
learning_rate=0.1,
max_depth=3,
subsample=0.7),
"sklearn": GradientBoostingRegressor(
n_estimators=100,
learning_rate=0.1,
max_depth=3,
subsample=0.7)}
print("\n")
print("# Test Regression")
print("-----------------------------------------------------")
print(" model_name train_time predict_time rmse ")
print("-----------------------------------------------------")
print(" {0:12} {1:12} {2:12} {3:.5f}".format(
X, y = make_friedman1(n_samples=10000, noise=5)
n, m = X.shape
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.5)
models = {"palobst": PaloBoost(distribution="gaussian",
n_estimators=100,
learning_rate=1.0,
max_depth=4,
subsample=0.5),
"palofrst": PaloForest(distribution="gaussian",
n_estimators=100,
learning_rate=1.0,
max_depth=4,
subsample0=0.5),
"gbm": GBM(distribution="gaussian",
n_estimators=100,
learning_rate=1.0,
max_depth=4,
subsample=0.5),
"sklearn": GradientBoostingRegressor(
n_estimators=100,
learning_rate=1.0,
max_depth=4,
subsample=0.5)}
print("\n")
print("# Test Regression")
print("-----------------------------------------------------")
print(" model_name train_time predict_time rmse ")
print("-----------------------------------------------------")
print(" {0:12} {1:12} {2:12} {3:.5f}".format(
def test_classification():
X, y = make_hastie_10_2(n_samples=100000)
y[y<0] = 0
n, m = X.shape
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2)
models = {"bonsai-gbm": GBM(distribution="bernoulli",
n_estimators=10,
learning_rate=1.0,
max_depth=3,
subsample=0.7),
"sklearn": GradientBoostingClassifier(
n_estimators=10,
learning_rate=1.0,
max_depth=3,
subsample=0.7)}
print("\n")
print("# Test Classification")
print("-----------------------------------------------------")
print(" model_name train_time predict_time auc ")
print("-----------------------------------------------------")
print(" {0:12} {1:12} {2:12} {3:.5f}".format(
def clstask(X, y, n_estimators, learning_rate, max_depth, n_btstrp,
has_missing, test_size, add_noise):
models = {"0. PaloBoost": PaloBoost(distribution="bernoulli",
n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
subsample=0.7),
"1. SGTB-Bonsai": GBM(distribution="bernoulli",
n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
subsample=0.7),
"2. XGBoost": XGBClassifier(
n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
subsample=0.7)}
if not has_missing:
models["3. Scikit-Learn"] = GradientBoostingClassifier(
n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
subsample=0.7)
perf_df = pd.DataFrame(columns=["model", "value", "n_est", "b_idx"])
def regtask(X, y, n_estimators, learning_rate, max_depth, n_btstrp,
has_missing, test_size):
models = {"0. PaloBoost": PaloBoost(distribution="gaussian",
n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
subsample=0.7),
"1. SGTB-Bonsai": GBM(distribution="gaussian",
n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
subsample=0.7),
"2. XGBoost": XGBRegressor(
n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
subsample=0.7)}
if not has_missing:
models["3. Scikit-Learn"] = GradientBoostingRegressor(
n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
subsample=0.7)