Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"verbosity": -1}
# "seed": 8888
# folds = StratifiedKFold(n_splits=10, shuffle=True, random_state=8888)
# idx = y_train.argsort()
# y_lab = np.repeat(list(range(50000 // 20)), 20)
# y_lab = np.asarray(sorted(list(zip(idx, y_lab))))[:, -1].astype(np.int32)
# splits = folds.split(X_train, y_lab)
folds = KFold(n_splits=5, shuffle=True, random_state=2019)
splits = folds.split(X_train, y_train)
for fold_, (trn_idx, val_idx) in enumerate(splits):
print("fold n°{}".format(fold_ + 1))
trn_data = lgb.Dataset(X_train[trn_idx], y_train[trn_idx])
val_data = lgb.Dataset(X_train[val_idx], y_train[val_idx])
num_round = 20000
clf = lgb.train(param, trn_data, num_round, valid_sets=[trn_data, val_data], verbose_eval=100,
early_stopping_rounds=100)
oof_lgb[val_idx] = clf.predict(X_train[val_idx], num_iteration=clf.best_iteration)
predictions_lgb += clf.predict(X_test, num_iteration=clf.best_iteration) / folds.n_splits
print("MAE CV score: {:<8.8f}".format(1/(mean_absolute_error(oof_lgb, y_train)+1)))
print(predictions_lgb)
np.save('val.mse_lgb.npy',oof_lgb)
np.save('test.mse_lgb.npy',predictions_lgb)
def suggest_learning_rate(self, X, y, max_boost_round):
lr = [0.01, 0.02, 0.03, 0.04, 0.05]
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=0)
params = self.setParams(self.default_hyper_param)
max_round = max_boost_round // 500
auc = np.zeros([len(lr), max_round])
for i in range(len(lr)):
print ('learning rate: %.2f' %(lr[i]))
params['learning_rate'] = lr[i]
train_data = lgb.Dataset(X_train, y_train, free_raw_data=False)
clf = None
for j in range(max_round):
clf = lgb.train(params, train_data, num_boost_round=500, init_model=clf, keep_training_booster=True)
# score with regularization
auc[i, j] = roc_auc_score(y_valid, clf.predict(X_valid)) - lr[i] * 0.1 + j * 0.001
print (auc)
idx = np.argmax(auc)
best_lr = lr[idx // max_round]
best_boost_round = (idx % max_round + 1) * 500
return best_lr, best_boost_round
params = {'objective': 'multiclass',
'metric': 'auc_mu',
'verbose': -1,
'num_classes': 2,
'seed': 0}
results_auc_mu = {}
lgb.train(params, lgb_X, num_boost_round=10, valid_sets=[lgb_X], evals_result=results_auc_mu)
params = {'objective': 'binary',
'metric': 'auc',
'verbose': -1,
'seed': 0}
results_auc = {}
lgb.train(params, lgb_X, num_boost_round=10, valid_sets=[lgb_X], evals_result=results_auc)
np.testing.assert_allclose(results_auc_mu['training']['auc_mu'], results_auc['training']['auc'])
# test the case where all predictions are equal
lgb_X = lgb.Dataset(X[:10], label=y_new[:10])
params = {'objective': 'multiclass',
'metric': 'auc_mu',
'verbose': -1,
'num_classes': 2,
'min_data_in_leaf': 20,
'seed': 0}
results_auc_mu = {}
lgb.train(params, lgb_X, num_boost_round=10, valid_sets=[lgb_X], evals_result=results_auc_mu)
self.assertAlmostEqual(results_auc_mu['training']['auc_mu'][-1], 0.5)
# should give 1 when accuracy = 1
X = X[:10, :]
y = y[:10]
lgb_X = lgb.Dataset(X, label=y)
params = {'objective': 'multiclass',
'metric': 'auc_mu',
'num_classes': 10,
clf = sklearn.svm.SVC(kernel='linear', class_weight='balanced', probability=True, random_state=848)
clf.fit(X_train, y_train)
y_test = clf.predict_proba(X_test)[:,1]
return y_test
elif classifier == 'lgb':
param = {
'num_leaves':15,
'num_iterations':100,
'max_depth': 5,
'objective':'binary',
'is_unbalance': True,
'metric': ['auc', 'binary_logloss'],
'verbose': -1,
'seed': 848
}
train_data = lgb.Dataset(X_train, label=y_train)
clf = lgb.train(param, train_data)
y_test = clf.predict(X_test)
return y_test
for atpeParamFeature in atpeParamFeatures:
if atpeParamFeature in result and result[atpeParamFeature] is not None and result[atpeParamFeature] != '':
if atpeParamFeature in atpeParameterValues:
for value in atpeParameterValues[atpeParamFeature]:
vector.append(1.0 if result[atpeParamFeature] == value else 0)
else:
vector.append(float(result[atpeParamFeature]))
else:
vector.append(-3) # We use -3 because none of our atpe parameters ever take this value
vectors.append(vector)
if key in classPredictorKeys:
targets.append(allTargets.index(result[key]))
else:
targets.append(float(result[key]))
return lightgbm.Dataset(numpy.array(vectors), label=numpy.array(targets), feature_name=names)
params = {
'task': 'train',
'boosting_type': 'goss',
'objective': 'binary',
'metric': 'auc',
'num_leaves': 31,
'learning_rate': 0.01,
'feature_fraction': 1.0,
'min_data_in_leaf': 5,
'top_rate': 0.1,
'other_rate': 0.05,
#'num_threads': 20,
'verbose': -1
}
data = lgb.Dataset(X, y)
train_start = time.time()
clf = lgb.train(params, data, num_boost_round=self.base_round_num)
train_end = time.time()
estimated_train_time = (np.arange(self.max_round) + 1) * (train_end - train_start) * ratio
idx = np.arange(self.max_round)[estimated_train_time <= time_bedget]
if (idx.shape[0] == 0):
self.suggested_boost_round = self.base_round_num
self.suggested_train_time = estimated_train_time[0]
else:
self.suggested_boost_round = (idx[-1] + 1) * self.base_round_num
self.suggested_train_time = estimated_train_time[idx[-1]]
return self.suggested_boost_round
def prepare(self):
self.dtrain = lgb.Dataset(self.data.X_train, self.data.y_train,
free_raw_data=False)
'boosting_type': 'gbdt',
'objective': 'multiclass',
'metric': 'multi_logloss',
'max_depth': 5,
'num_leaves': 31,
'learning_rate': 0.025,
'feature_fraction': 0.85,
'lambda_l2': 1.5,
'num_class': n_categories,
}
for i, (train_fold, validate) in enumerate(kf):
print('Fold {}/{}'.format(i + 1, 5))
X_train, X_validate, label_train, label_validate = \
X.iloc[train_fold, :], X.iloc[validate, :], train_label[train_fold], train_label[validate]
lgb_train = lgb.Dataset(X_train, label_train, feature_name=feature_names, weight=weights_train[train_fold])
lgb_valid = lgb.Dataset(X_validate, label_validate, feature_name=feature_names, weight=weights_train[validate])
lgb_test = lgb.Dataset(X_test, feature_name=feature_names,weight=weights_test)
bst = lgb.train(
params_lgb,
lgb_train,
num_boost_round=2000,
valid_sets=[lgb_train, lgb_valid],
early_stopping_rounds=100,
verbose_eval=50,
)
best_trees.append(bst.best_iteration)
#ax = lgb.plot_importance(bst, max_num_features=10, grid=False, height=0.8, figsize=(16, 8))
#plt.show()
if f == 0 and verbose:
print ("Training with params : ")
print (params)
y_train = train_y[train_index]
y_val = train_y[val_index]
if isinstance(train_x, pd.DataFrame):
X_train = train_x.ix[train_index]
X_val = train_x.ix[val_index]
else:
X_train = train_x[train_index]
X_val = train_x[val_index]
train_data = lgbm.Dataset(X_train, label = y_train, feature_name = 'auto', categorical_feature = categorical)
valid_data = lgbm.Dataset(X_val, label = y_val, feature_name = 'auto', categorical_feature = categorical)
params['max_depth'] = int(params['max_depth'])
params['num_leaves'] = int(params['num_leaves'])
params['bagging_freq'] = int(params['bagging_freq'])
params['max_bin'] = int(params['max_bin'])
params['min_data_in_leaf'] = int(params['min_data_in_leaf'])
if stopping_rounds < 0:
params['num_boost_round'] = int(params['num_boost_round'])
model = lgbm.train(params, train_data)
preds_val = model.predict(X_val)
else:
model = lgbm.train(params, train_data, num_boost_round = 1000, valid_sets = valid_data, verbose_eval = verbose, early_stopping_rounds = stopping_rounds)
# Step 1a: Flatten dataset
x_test = x_test[0:5]
y_test = y_test[0:5]
nb_samples_train = x_train.shape[0]
nb_samples_test = x_test.shape[0]
x_train = x_train.reshape((nb_samples_train, 28 * 28))
x_test = x_test.reshape((nb_samples_test, 28 * 28))
# Step 2: Create the model
params = {'objective': 'multiclass', 'metric': 'multi_logloss', 'num_class': 10}
train_set = lgb.Dataset(x_train, label=np.argmax(y_train, axis=1))
test_set = lgb.Dataset(x_test, label=np.argmax(y_test, axis=1))
model = lgb.train(params=params, train_set=train_set, num_boost_round=100, valid_sets=[test_set])
# Step 3: Create the ART classifier
classifier = LightGBMClassifier(model=model, clip_values=(min_pixel_value, max_pixel_value))
# Step 4: Train the ART classifier
# The model has already been trained in step 2
# Step 5: Evaluate the ART classifier on benign test examples
predictions = classifier.predict(x_test)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print('Accuracy on benign test examples: {}%'.format(accuracy * 100))