Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# enable display training loss
cv_res = lgb.cv(params_with_metric, lgb_train, num_boost_round=10,
nfold=3, stratified=False, shuffle=False,
metrics='l1', verbose_eval=False, eval_train_metric=True)
self.assertIn('train l1-mean', cv_res)
self.assertIn('valid l1-mean', cv_res)
self.assertNotIn('train l2-mean', cv_res)
self.assertNotIn('valid l2-mean', cv_res)
self.assertEqual(len(cv_res['train l1-mean']), 10)
self.assertEqual(len(cv_res['valid l1-mean']), 10)
# self defined folds
tss = TimeSeriesSplit(3)
folds = tss.split(X_train)
cv_res_gen = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=folds,
verbose_eval=False)
cv_res_obj = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=tss,
verbose_eval=False)
np.testing.assert_allclose(cv_res_gen['l2-mean'], cv_res_obj['l2-mean'])
# lambdarank
X_train, y_train = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../examples/lambdarank/rank.train'))
q_train = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../examples/lambdarank/rank.train.query'))
params_lambdarank = {'objective': 'lambdarank', 'verbose': -1, 'eval_at': 3}
lgb_train = lgb.Dataset(X_train, y_train, group=q_train)
# ... with l2 metric
cv_res_lambda = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10, nfold=3,
metrics='l2', verbose_eval=False)
self.assertEqual(len(cv_res_lambda), 2)
self.assertFalse(np.isnan(cv_res_lambda['l2-mean']).any())
# ... with NDCG (default) metric
cv_res_lambda = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10, nfold=3,
tss = TimeSeriesSplit(3)
folds = tss.split(X_train)
cv_res_gen = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=folds,
verbose_eval=False)
cv_res_obj = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=tss,
verbose_eval=False)
np.testing.assert_allclose(cv_res_gen['l2-mean'], cv_res_obj['l2-mean'])
# lambdarank
X_train, y_train = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../examples/lambdarank/rank.train'))
q_train = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../examples/lambdarank/rank.train.query'))
params_lambdarank = {'objective': 'lambdarank', 'verbose': -1, 'eval_at': 3}
lgb_train = lgb.Dataset(X_train, y_train, group=q_train)
# ... with l2 metric
cv_res_lambda = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10, nfold=3,
metrics='l2', verbose_eval=False)
self.assertEqual(len(cv_res_lambda), 2)
self.assertFalse(np.isnan(cv_res_lambda['l2-mean']).any())
# ... with NDCG (default) metric
cv_res_lambda = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10, nfold=3,
verbose_eval=False)
self.assertEqual(len(cv_res_lambda), 2)
self.assertFalse(np.isnan(cv_res_lambda['ndcg@3-mean']).any())
# self defined folds with lambdarank
cv_res_lambda_obj = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10,
folds=GroupKFold(n_splits=3),
verbose_eval=False)
np.testing.assert_allclose(cv_res_lambda['ndcg@3-mean'], cv_res_lambda_obj['ndcg@3-mean'])
# shuffle = False, override metric in params
params_with_metric = {'metric': 'l2', 'verbose': -1}
cv_res = lgb.cv(params_with_metric, lgb_train, num_boost_round=10,
nfold=3, stratified=False, shuffle=False,
metrics='l1', verbose_eval=False)
self.assertIn('l1-mean', cv_res)
self.assertNotIn('l2-mean', cv_res)
self.assertEqual(len(cv_res['l1-mean']), 10)
# shuffle = True, callbacks
cv_res = lgb.cv(params, lgb_train, num_boost_round=10, nfold=3, stratified=False, shuffle=True,
metrics='l1', verbose_eval=False,
callbacks=[lgb.reset_parameter(learning_rate=lambda i: 0.1 - 0.001 * i)])
self.assertIn('l1-mean', cv_res)
self.assertEqual(len(cv_res['l1-mean']), 10)
# enable display training loss
cv_res = lgb.cv(params_with_metric, lgb_train, num_boost_round=10,
nfold=3, stratified=False, shuffle=False,
metrics='l1', verbose_eval=False, eval_train_metric=True)
self.assertIn('train l1-mean', cv_res)
self.assertIn('valid l1-mean', cv_res)
self.assertNotIn('train l2-mean', cv_res)
self.assertNotIn('valid l2-mean', cv_res)
self.assertEqual(len(cv_res['train l1-mean']), 10)
self.assertEqual(len(cv_res['valid l1-mean']), 10)
# self defined folds
tss = TimeSeriesSplit(3)
folds = tss.split(X_train)
cv_res_gen = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=folds,
verbose_eval=False)
cv_res_obj = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=tss,
verbose_eval=False)
np.testing.assert_allclose(cv_res_gen['l2-mean'], cv_res_obj['l2-mean'])
'Cancelled', 'CancellationCode', 'Diverted', 'CarrierDelay',
'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay']]
lgb_params = {'learning_rate': 0.1,
'boosting': booster,
'objective': 'binary',
'metric': 'rmse',
'feature_fraction': 0.9,
'bagging_fraction': 0.75,
'num_leaves': 31,
'bagging_freq': 1,
'min_data_per_leaf': 250,
'device_type': 'gpu',
'gpu_device_id': 0}
lgb_train = lgb.Dataset(data=data, label=y)
cv = lgb.cv(lgb_params,
lgb_train,
num_boost_round=50,
early_stopping_rounds=5,
stratified=False,
verbose_eval=10)
'min_child_weight': 0.001,
'subsample_for_bin': 200000,
'min_split_gain': 0,
'reg_alpha': 0,
'reg_lambda': 0,
'num_leaves':63,
'seed': seed,
'nthread': 8
}
if online == 0:
print("Start train and validate...")
dtrain = lgb.Dataset(X, label=Y, feature_name=list(X.columns), categorical_feature=categorical)
eval_hist = lgb.cv(params,
dtrain,
nfold = 5,
num_boost_round=MAX_ROUNDS,
early_stopping_rounds=EARLY_STOP,
verbose_eval=50,
seed = seed,
stratified = False
)
print(eval_hist)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=seed, test_size=0.25)
dtrain = lgb.Dataset(X_train,
label=Y_train,
feature_name=list(X.columns),
categorical_feature=categorical)
params['seed'] = 1
if self.with_focal_loss:
focal_loss = lambda x,y: focal_loss_lgb(x, y,
params['alpha'], params['gamma'])
cv_result = lgb.cv(
params,
train,
num_boost_round=params['num_boost_round'],
fobj = focal_loss,
feval = lgb_focal_f1_score,
nfold=3,
stratified=True,
early_stopping_rounds=20)
else:
cv_result = lgb.cv(
params,
train,
num_boost_round=params['num_boost_round'],
metrics='binary_logloss',
feval = lgb_f1_score,
nfold=3,
stratified=True,
early_stopping_rounds=20)
self.early_stop_dict[objective.i] = len(cv_result['f1-mean'])
score = round(cv_result['f1-mean'][-1], 4)
objective.i+=1
return -score
if mean_merror < min_merror:
min_merror = mean_merror
best_params['num_leaves'] = num_leaves
best_params['max_depth'] = max_depth
params['num_leaves'] = best_params['num_leaves']
params['max_depth'] = best_params['max_depth']
# 过拟合
print("调参2:降低过拟合")
for max_bin in range(1,255,5):
for min_data_in_leaf in range(10,200,5):
params['max_bin'] = max_bin
params['min_data_in_leaf'] = min_data_in_leaf
cv_results = lgb.cv(
params,
lgb_train,
seed=42,
nfold=3,
metrics=['binary_error'],
early_stopping_rounds=3,
verbose_eval=True
)
mean_merror = pd.Series(cv_results['binary_error-mean']).min()
boost_rounds = pd.Series(cv_results['binary_error-mean']).argmin()
if mean_merror < min_merror:
min_merror = mean_merror
best_params['max_bin']= max_bin
best_params['min_data_in_leaf'] = min_data_in_leaf
'num_boost_round': self.num_boost_round,
'nfold': k_fold,
'early_stopping_rounds': 150,
'verbose_eval': 10,
'seed': 0,
}
if type(eval_metric) != str:
cv_params['feval'] = eval_metric
cv_params['params']['metric'] = 'None'
else:
cv_params['params']['metric'] = eval_metric
if self.problem_type == REGRESSION:
cv_params['stratified'] = False
print('Current parameters:\n', params)
eval_hist = lgb.cv(**cv_params) # TODO: Try to use customer early stopper to enable dart
best_score = eval_hist[self.eval_metric_name + '-mean'][-1]
print('Best num_boost_round:', len(eval_hist[self.eval_metric_name + '-mean']))
print('Best CV score:', best_score)
return best_score
objective function for lightgbm.
"""
# hyperopt casts as float
params['num_boost_round'] = int(params['num_boost_round'])
params['num_leaves'] = int(params['num_leaves'])
# need to be passed as parameter
if self.is_unbalance:
params['is_unbalance'] = True
params['verbose'] = -1
params['seed'] = 1
if self.with_focal_loss:
focal_loss = lambda x,y: focal_loss_lgb(x, y,
params['alpha'], params['gamma'])
cv_result = lgb.cv(
params,
train,
num_boost_round=params['num_boost_round'],
fobj = focal_loss,
feval = lgb_focal_f1_score,
nfold=3,
stratified=True,
early_stopping_rounds=20)
else:
cv_result = lgb.cv(
params,
train,
num_boost_round=params['num_boost_round'],
metrics='binary_logloss',
feval = lgb_f1_score,
nfold=3,