How to use the lightgbm.cv function in lightgbm

To help you get started, we’ve selected a few lightgbm examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github microsoft / LightGBM / tests / python_package_test / test_engine.py View on Github external
# enable display training loss
        cv_res = lgb.cv(params_with_metric, lgb_train, num_boost_round=10,
                        nfold=3, stratified=False, shuffle=False,
                        metrics='l1', verbose_eval=False, eval_train_metric=True)
        self.assertIn('train l1-mean', cv_res)
        self.assertIn('valid l1-mean', cv_res)
        self.assertNotIn('train l2-mean', cv_res)
        self.assertNotIn('valid l2-mean', cv_res)
        self.assertEqual(len(cv_res['train l1-mean']), 10)
        self.assertEqual(len(cv_res['valid l1-mean']), 10)
        # self defined folds
        tss = TimeSeriesSplit(3)
        folds = tss.split(X_train)
        cv_res_gen = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=folds,
                            verbose_eval=False)
        cv_res_obj = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=tss,
                            verbose_eval=False)
        np.testing.assert_allclose(cv_res_gen['l2-mean'], cv_res_obj['l2-mean'])
        # lambdarank
        X_train, y_train = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                                           '../../examples/lambdarank/rank.train'))
        q_train = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                          '../../examples/lambdarank/rank.train.query'))
        params_lambdarank = {'objective': 'lambdarank', 'verbose': -1, 'eval_at': 3}
        lgb_train = lgb.Dataset(X_train, y_train, group=q_train)
        # ... with l2 metric
        cv_res_lambda = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10, nfold=3,
                               metrics='l2', verbose_eval=False)
        self.assertEqual(len(cv_res_lambda), 2)
        self.assertFalse(np.isnan(cv_res_lambda['l2-mean']).any())
        # ... with NDCG (default) metric
        cv_res_lambda = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10, nfold=3,
github microsoft / LightGBM / tests / python_package_test / test_engine.py View on Github external
tss = TimeSeriesSplit(3)
        folds = tss.split(X_train)
        cv_res_gen = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=folds,
                            verbose_eval=False)
        cv_res_obj = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=tss,
                            verbose_eval=False)
        np.testing.assert_allclose(cv_res_gen['l2-mean'], cv_res_obj['l2-mean'])
        # lambdarank
        X_train, y_train = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                                           '../../examples/lambdarank/rank.train'))
        q_train = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                          '../../examples/lambdarank/rank.train.query'))
        params_lambdarank = {'objective': 'lambdarank', 'verbose': -1, 'eval_at': 3}
        lgb_train = lgb.Dataset(X_train, y_train, group=q_train)
        # ... with l2 metric
        cv_res_lambda = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10, nfold=3,
                               metrics='l2', verbose_eval=False)
        self.assertEqual(len(cv_res_lambda), 2)
        self.assertFalse(np.isnan(cv_res_lambda['l2-mean']).any())
        # ... with NDCG (default) metric
        cv_res_lambda = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10, nfold=3,
                               verbose_eval=False)
        self.assertEqual(len(cv_res_lambda), 2)
        self.assertFalse(np.isnan(cv_res_lambda['ndcg@3-mean']).any())
        # self defined folds with lambdarank
        cv_res_lambda_obj = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10,
                                   folds=GroupKFold(n_splits=3),
                                   verbose_eval=False)
        np.testing.assert_allclose(cv_res_lambda['ndcg@3-mean'], cv_res_lambda_obj['ndcg@3-mean'])
github microsoft / LightGBM / tests / python_package_test / test_engine.py View on Github external
# shuffle = False, override metric in params
        params_with_metric = {'metric': 'l2', 'verbose': -1}
        cv_res = lgb.cv(params_with_metric, lgb_train, num_boost_round=10,
                        nfold=3, stratified=False, shuffle=False,
                        metrics='l1', verbose_eval=False)
        self.assertIn('l1-mean', cv_res)
        self.assertNotIn('l2-mean', cv_res)
        self.assertEqual(len(cv_res['l1-mean']), 10)
        # shuffle = True, callbacks
        cv_res = lgb.cv(params, lgb_train, num_boost_round=10, nfold=3, stratified=False, shuffle=True,
                        metrics='l1', verbose_eval=False,
                        callbacks=[lgb.reset_parameter(learning_rate=lambda i: 0.1 - 0.001 * i)])
        self.assertIn('l1-mean', cv_res)
        self.assertEqual(len(cv_res['l1-mean']), 10)
        # enable display training loss
        cv_res = lgb.cv(params_with_metric, lgb_train, num_boost_round=10,
                        nfold=3, stratified=False, shuffle=False,
                        metrics='l1', verbose_eval=False, eval_train_metric=True)
        self.assertIn('train l1-mean', cv_res)
        self.assertIn('valid l1-mean', cv_res)
        self.assertNotIn('train l2-mean', cv_res)
        self.assertNotIn('valid l2-mean', cv_res)
        self.assertEqual(len(cv_res['train l1-mean']), 10)
        self.assertEqual(len(cv_res['valid l1-mean']), 10)
        # self defined folds
        tss = TimeSeriesSplit(3)
        folds = tss.split(X_train)
        cv_res_gen = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=folds,
                            verbose_eval=False)
        cv_res_obj = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=tss,
                            verbose_eval=False)
        np.testing.assert_allclose(cv_res_gen['l2-mean'], cv_res_obj['l2-mean'])
github h2oai / h2o4gpu / tests / python / open_data / gbm / test_lightgbm.py View on Github external
'Cancelled', 'CancellationCode', 'Diverted', 'CarrierDelay',
                 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay']]

    lgb_params = {'learning_rate': 0.1,
                  'boosting': booster,
                  'objective': 'binary',
                  'metric': 'rmse',
                  'feature_fraction': 0.9,
                  'bagging_fraction': 0.75,
                  'num_leaves': 31,
                  'bagging_freq': 1,
                  'min_data_per_leaf': 250,
                  'device_type': 'gpu',
                  'gpu_device_id': 0}
    lgb_train = lgb.Dataset(data=data, label=y)
    cv = lgb.cv(lgb_params,
                lgb_train,
                num_boost_round=50,
                early_stopping_rounds=5,
                stratified=False,
                verbose_eval=10)
github daviddwlee84 / MachineLearningPractice / Project / KaggleElo / train_model.py View on Github external
'min_child_weight': 0.001,
        'subsample_for_bin': 200000,
        'min_split_gain': 0,
        'reg_alpha': 0,
        'reg_lambda': 0,
        'num_leaves':63,
        'seed': seed,
        'nthread': 8
    }
    
    if online == 0:
        print("Start train and validate...")
        
        dtrain = lgb.Dataset(X, label=Y, feature_name=list(X.columns), categorical_feature=categorical)
        
        eval_hist = lgb.cv(params, 
                           dtrain, 
                           nfold = 5,
                           num_boost_round=MAX_ROUNDS,
                           early_stopping_rounds=EARLY_STOP,
                           verbose_eval=50, 
                           seed = seed,
                           stratified = False
                          )
        
        print(eval_hist)

        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=seed, test_size=0.25)
        dtrain = lgb.Dataset(X_train,
                         label=Y_train,
                         feature_name=list(X.columns),
                         categorical_feature=categorical)
github jrzaurin / LightGBM-with-Focal-Loss / utils / train_hyperopt.py View on Github external
params['seed'] = 1

			if self.with_focal_loss:
				focal_loss = lambda x,y: focal_loss_lgb(x, y,
					params['alpha'], params['gamma'])
				cv_result = lgb.cv(
					params,
					train,
					num_boost_round=params['num_boost_round'],
					fobj = focal_loss,
					feval = lgb_focal_f1_score,
					nfold=3,
					stratified=True,
					early_stopping_rounds=20)
			else:
				cv_result = lgb.cv(
					params,
					train,
					num_boost_round=params['num_boost_round'],
					metrics='binary_logloss',
					feval = lgb_f1_score,
					nfold=3,
					stratified=True,
					early_stopping_rounds=20)
			self.early_stop_dict[objective.i] = len(cv_result['f1-mean'])
			score = round(cv_result['f1-mean'][-1], 4)
			objective.i+=1
			return -score
github wanglei5205 / Machine_learning / Boosting--LightGBM / lgb-python / 2.lightgbm调参案例.py View on Github external
if mean_merror < min_merror:
            min_merror = mean_merror
            best_params['num_leaves'] = num_leaves
            best_params['max_depth'] = max_depth
            
params['num_leaves'] = best_params['num_leaves']
params['max_depth'] = best_params['max_depth']

# 过拟合
print("调参2:降低过拟合")
for max_bin in range(1,255,5):
    for min_data_in_leaf in range(10,200,5):
            params['max_bin'] = max_bin
            params['min_data_in_leaf'] = min_data_in_leaf
            
            cv_results = lgb.cv(
                                params,
                                lgb_train,
                                seed=42,
                                nfold=3,
                                metrics=['binary_error'],
                                early_stopping_rounds=3,
                                verbose_eval=True
                                )
                    
            mean_merror = pd.Series(cv_results['binary_error-mean']).min()
            boost_rounds = pd.Series(cv_results['binary_error-mean']).argmin()

            if mean_merror < min_merror:
                min_merror = mean_merror
                best_params['max_bin']= max_bin
                best_params['min_data_in_leaf'] = min_data_in_leaf
github awslabs / autogluon / autogluon / utils / tabular / ml / models / lgb / lgb_model.py View on Github external
'num_boost_round': self.num_boost_round,
            'nfold': k_fold,
            'early_stopping_rounds': 150,
            'verbose_eval': 10,
            'seed': 0,
        }
        if type(eval_metric) != str:
            cv_params['feval'] = eval_metric
            cv_params['params']['metric'] = 'None'
        else:
            cv_params['params']['metric'] = eval_metric
        if self.problem_type == REGRESSION:
            cv_params['stratified'] = False

        print('Current parameters:\n', params)
        eval_hist = lgb.cv(**cv_params)  # TODO: Try to use customer early stopper to enable dart
        best_score = eval_hist[self.eval_metric_name + '-mean'][-1]
        print('Best num_boost_round:', len(eval_hist[self.eval_metric_name + '-mean']))
        print('Best CV score:', best_score)
        return best_score
github jrzaurin / LightGBM-with-Focal-Loss / utils / train_hyperopt.py View on Github external
objective function for lightgbm.
			"""
			# hyperopt casts as float
			params['num_boost_round'] = int(params['num_boost_round'])
			params['num_leaves'] = int(params['num_leaves'])

			# need to be passed as parameter
			if self.is_unbalance:
				params['is_unbalance'] = True
			params['verbose'] = -1
			params['seed'] = 1

			if self.with_focal_loss:
				focal_loss = lambda x,y: focal_loss_lgb(x, y,
					params['alpha'], params['gamma'])
				cv_result = lgb.cv(
					params,
					train,
					num_boost_round=params['num_boost_round'],
					fobj = focal_loss,
					feval = lgb_focal_f1_score,
					nfold=3,
					stratified=True,
					early_stopping_rounds=20)
			else:
				cv_result = lgb.cv(
					params,
					train,
					num_boost_round=params['num_boost_round'],
					metrics='binary_logloss',
					feval = lgb_f1_score,
					nfold=3,