Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
algo=tpe.suggest, max_evals=n_eval, verbose=1,
rstate=self.random_state)
hyperparams = space_eval(self.space, best)
return hyperparams, trials
def fit(self, X, y):
self.model = XGBModel(n_estimators=self.n_best, **self.params)
self.model.fit(X=X[self.features], y=y, eval_metric='mae', verbose=False)
return self
def predict(self, X):
return self.model.predict(X[self.features])
class AutoLGB(BaseAutoML):
params = {
"bagging_freq": 1,
"verbosity": -1,
"seed": RANDOM_SEED,
"num_threads": -1,
}
space = {
"learning_rate": hp.loguniform("learning_rate", np.log(0.01), np.log(0.3)),
"num_leaves": hp.choice("num_leaves", [15, 31, 63, 127, 255]),
"max_depth": hp.choice("max_depth", [-1, 4, 6, 8, 10]),
"feature_fraction": hp.quniform("feature_fraction", .5, .9, 0.1),
"bagging_fraction": hp.quniform("bagging_fraction", .5, .9, 0.1),
"min_child_samples": hp.choice('min_child_samples', [10, 25, 100]),
"lambda_l1": hp.choice('lambda_l1', [0, .1, 1, 10]),
imp = imp.sort_values('feature_importances', ascending=False).drop_duplicates()
if len(random_cols) == 0:
imp = imp[imp['feature_importances'] != 0]
else:
th = imp.loc[imp.feature_names.isin(random_cols), 'feature_importances'].mean()
logger.debug('feature importance (th={:.2f}):\n{}'.format(th, imp))
imp = imp[(imp.feature_importances > th) & ~(imp.feature_names.isin(random_cols))]
return imp['feature_names'].tolist()
def optimize_hyperparam(self, X, y, test_size=.2, n_eval=100):
raise NotImplementedError
class AutoXGB(BaseAutoML):
params = {'random_state': RANDOM_SEED,
'n_jobs': -1}
space = {
"learning_rate": hp.loguniform("learning_rate", np.log(0.01), np.log(0.3)),
"max_depth": hp.choice("num_leaves", [6, 8, 10]),
"colsample_bytree": hp.quniform("colsample_bytree", .5, .9, 0.1),
"subsample": hp.quniform("subsample", .5, .9, 0.1),
"min_child_weight": hp.choice('min_child_weight', [10, 25, 100]),
}
def __init__(self, objective='reg:linear', metric='rmse', boosting='gbtree', params=params, space=space,
n_est=500, n_stop=10, sample_size=SAMPLE_SIZE, feature_selection=True, n_fs=10,
hyperparam_opt=True, n_hpopt=100, n_random_col=10, random_state=RANDOM_SEED, shuffle=True):