Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if len(random_cols) == 0:
imp = imp[imp['feature_importances'] != 0]
else:
th = imp.loc[imp.feature_names.isin(random_cols), 'feature_importances'].mean()
logger.debug('feature importance (th={:.2f}):\n{}'.format(th, imp))
imp = imp[(imp.feature_importances > th) & ~(imp.feature_names.isin(random_cols))]
return imp['feature_names'].tolist()
def optimize_hyperparam(self, X, y, test_size=.2, n_eval=100):
raise NotImplementedError
class AutoXGB(BaseAutoML):
params = {'random_state': RANDOM_SEED,
'n_jobs': -1}
space = {
"learning_rate": hp.loguniform("learning_rate", np.log(0.01), np.log(0.3)),
"max_depth": hp.choice("num_leaves", [6, 8, 10]),
"colsample_bytree": hp.quniform("colsample_bytree", .5, .9, 0.1),
"subsample": hp.quniform("subsample", .5, .9, 0.1),
"min_child_weight": hp.choice('min_child_weight', [10, 25, 100]),
}
def __init__(self, objective='reg:linear', metric='rmse', boosting='gbtree', params=params, space=space,
n_est=500, n_stop=10, sample_size=SAMPLE_SIZE, feature_selection=True, n_fs=10,
hyperparam_opt=True, n_hpopt=100, n_random_col=10, random_state=RANDOM_SEED, shuffle=True):
self.metric, minimize = self._get_metric_alias_minimize(metric)
def __init__(self, params, space, n_est=500, n_stop=10, sample_size=SAMPLE_SIZE, valid_size=VALID_SIZE,
shuffle=True, feature_selection=True, n_fs=10, hyperparam_opt=True, n_hpopt=100,
minimize=True, n_random_col=10, random_state=RANDOM_SEED):
"""Initialize an optimized regressor class object.
Args:
params (dict): default parameters for a regressor
space (dict): parameter space for hyperopt to explore
n_est (int): the number of iterations for a regressor
n_stop (int): early stopping rounds for a regressor
sample_size (int): the number of samples for feature selection and parameter search
valid_size (float): the fraction of samples for feature selection and/or hyperparameter tuning
shuffle (bool): if true, it uses random sampling for sampling and training/validation split. Otherwise
last sample_size and valid_size will be used.
feature_selection (bool): whether to select features
n_fs (int): the number of iterations for feature selection
hyperparam_opt (bool): whether to search optimal parameters
n_hpopt (int): the number of iterations for hyper-parameter optimization
minimize (bool): whether the lower the metric is the better