Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
rf.save_model()
# increase dataset for xgboost and lightgbm
X_slice = X[:50000,:]
y_slice = y[:50000]
# more complex hyperparameter optimization is done for the xgboost and lightgbm algorithms
# the amount of hyperparameters that need to be tuned does not favour a gridsearch
# approach, so as a result the parameters are optimized in different steps:
# 1. fix learning rate and number of estimators for tuning tree-based parameters
# 2. tune max_depth and min_child_weight
# 3. tune gamma
# 4. tune subsample and colsample_bytree
# 5. tune l2 regularization
# 6. reduce learning rate and start over until stopping criterium reached
xgb = XGBoostOpt(X_slice,y_slice,params_cv=params_cv,max_rounds=2,model_name='xgb_porto_seguro',save_dir=save_dir)
xgb.tune_params()
print('Best model score: %f.' %(xgb.best_score))
print('Best model parameters:')
print(xgb.best_model)
xgb.save_model()
# a similar approach is taken for lightgbm:
# 1. fix learning rate and number of estimators for tuning tree-based parameters
# 2. tune num_leaves and min_data_in_leaf
# 3. tune min_gain_to_split
# 4. tune bagging_fraction + bagging_freq and feature_fraction
# 5. tune lambda_l2
# 6. reduce learning rate and start over until stopping criterium reached
lgb = LGBMOpt(X_slice,y_slice,params_cv=params_cv,max_rounds=2,model_name='lgb_porto_seguro',save_dir=save_dir)
lgb.tune_params()
print('Best model score: %f.' %(lgb.best_score))