Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
X = train_data
y = train_labels
# replace NaN's by median values per column
imp = Imputer(missing_values='NaN',strategy='median',axis=0)
X = imp.fit_transform(X)
# set cv parameters
params_cv = {'cv_folds':5,
'early_stopping_rounds':100,
'scoring':'roc_auc'}
# optimize parameters for logistic regression using gridsearch over a range of parameters
# note: no pre-processing steps will be done for the examples below
lr = LogisticRegressionOpt(X,y,params_cv=params_cv,model_name='lr_porto_seguro',save_dir=save_dir)
lr.tune_params()
print('Best model parameters:')
print(lr.best_model)
lr.save_model()
# we will reduce the size of the dataset for the rest of the examples
# to keep the training time reasonable
X_slice = X[:10000,:]
y_slice = y[:10000]
# adaboost
ada = AdaBoostClassifierOpt(X_slice,y_slice,params_cv=params_cv,model_name='ada_porto_seguro',save_dir=save_dir)
ada.tune_params()
print('Best model parameters:')
print(ada.best_model)
ada.save_model()