Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_lambdarank(self):
X_train, y_train = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../examples/lambdarank/rank.train'))
X_test, y_test = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../examples/lambdarank/rank.test'))
q_train = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../examples/lambdarank/rank.train.query'))
q_test = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../examples/lambdarank/rank.test.query'))
gbm = lgb.LGBMRanker(n_estimators=50)
gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)],
eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, verbose=False,
callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))])
self.assertLessEqual(gbm.best_iteration_, 24)
self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.6333)
self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.6048)
def test_lightgbm_ranking():
try:
import lightgbm
except:
print("Skipping test_lightgbm_ranking!")
return
import shap
# train lightgbm ranker model
x_train, y_train, x_test, y_test, q_train, q_test = shap.datasets.rank()
model = lightgbm.LGBMRanker()
model.fit(x_train, y_train, group=q_train, eval_set=[(x_test, y_test)],
eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=5, verbose=False,
callbacks=[lightgbm.reset_parameter(learning_rate=lambda x: 0.95 ** x * 0.1)])
_validate_shap_values(model, x_test)
def test_lightgbm_ranking():
try:
import lightgbm
except:
print("Skipping test_lightgbm_ranking!")
return
import shap
# train lightgbm ranker model
x_train, y_train, x_test, y_test, q_train, q_test = shap.datasets.rank()
model = lightgbm.LGBMRanker()
model.fit(x_train, y_train, group=q_train, eval_set=[(x_test, y_test)],
eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=5, verbose=False,
callbacks=[lightgbm.reset_parameter(learning_rate=lambda x: 0.95 ** x * 0.1)])
_validate_shap_values(model, x_test)
def test_lambdarank(self):
fd = FileLoader('../../examples/lambdarank', 'rank')
X_train, y_train, _ = fd.load_dataset('.train', is_sparse=True)
X_test, _, X_test_fn = fd.load_dataset('.test', is_sparse=True)
group_train = fd.load_field('.train.query')
lgb_train = lgb.Dataset(X_train, y_train, group=group_train)
gbm = lgb.LGBMRanker(**fd.params)
gbm.fit(X_train, y_train, group=group_train)
sk_pred = gbm.predict(X_test)
fd.train_predict_check(lgb_train, X_test, X_test_fn, sk_pred)
fd.file_load_check(lgb_train, '.train')
feat_importance = clf.feature_importances_
best_iteration = clf.best_iteration_
if best_iteration is None:
best_iteration = -1
oof[val_idx] = clf.predict(val[features],num_iteration=best_iteration)
else:
gLR = GBDT_LR(clf)
gLR.fit(X_train, Y_train, eval_set=[(X_test, Y_test)],eval_metric="auc", verbose=1000)
feat_importance = gLR.feature_importance()
best_iteration = -1
clf=gLR
oof[val_idx] = clf.predict(train_df.iloc[val_idx][features], y_=target.iloc[val_idx],
num_iteration=best_iteration)
else: #lambda ranker
gbr = lgb.LGBMRanker()
gbr.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)],
eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=5, verbose=False,
callbacks=[lgb.reset_parameter(learning_rate=lambda x: 0.95 ** x * 0.1)])
fold_importance_df = pd.DataFrame()
fold_importance_df["feature"] = features
fold_importance_df["importance"] = feat_importance
fold_importance_df["fold"] = fold_ + 1
feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
predictions += clf.predict(test_df[features], num_iteration=best_iteration) / 5
fold_score = roc_auc_score(Y_test, oof[val_idx])
print("fold n°{} time={} score={}".format(fold_,time.time()-t0,fold_score))
#break
cv_score = roc_auc_score(target, oof)
print("CV score: {:<8.5f}".format(cv_score))
def train(tmp_dir, output_model):
train_file = os.path.join(tmp_dir, "train_mt.csv")
train_size = os.path.join(tmp_dir, "train_mt_size.csv")
X_train, y_train = load_svmlight_file(train_file)
model = lgb.LGBMRanker(boosting_type='gbdt', num_leaves=16,
max_depth=-1, learning_rate=0.1, n_estimators=100,
min_child_samples=5)
model.fit(X_train, y_train, group=np.loadtxt(train_size))
model.booster_.save_model(output_model)