Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
for col in ["A", "B", "C", "D"]:
X[col] = X[col].astype('category')
X_test[col] = X_test[col].astype('category')
#trn_data = lgb.Dataset(X, label=y)
if isMORT:
mort0 = LiteMORT(params).fit(X, y)
pred0 = list(mort0.predict(X_test))
mort1 = LiteMORT(params).fit(X, y, categorical_feature=[0])
pred1 = list(mort1.predict(X_test))
mort2 = LiteMORT(params).fit(X, y, categorical_feature=['A'])
pred2 = list(mort2.predict(X_test))
mort3 = LiteMORT(params).fit(X, y, categorical_feature=['A', 'B', 'C', 'D'])
pred3 = list(mort3.predict(X_test))
else:
clf=lgb.sklearn.LGBMClassifier()
gbm_ = clf.fit(X, y)
gbm0 = lgb.sklearn.LGBMClassifier().fit(X, y)
pred0 = list(gbm0.predict(X_test))
gbm1 = lgb.sklearn.LGBMClassifier().fit(X, y, categorical_feature=[0])
pred1 = list(gbm1.predict(X_test))
gbm2 = lgb.sklearn.LGBMClassifier().fit(X, y, categorical_feature=['A'])
pred2 = list(gbm2.predict(X_test))
gbm3 = lgb.sklearn.LGBMClassifier().fit(X, y, categorical_feature=['A', 'B', 'C', 'D'])
pred3 = list(gbm3.predict(X_test))
gbm3.booster_.save_model('categorical.model')
gbm4 = lgb.Booster(model_file='categorical.model')
pred4 = list(gbm4.predict(X_test))
pred_prob = list(gbm0.predict_proba(X_test)[:, 1])
np.testing.assert_almost_equal(pred_prob, pred4)
input("...")
#np.testing.assert_almost_equal(pred0, pred1)
mort1 = LiteMORT(params).fit(X, y, categorical_feature=[0])
pred1 = list(mort1.predict(X_test))
mort2 = LiteMORT(params).fit(X, y, categorical_feature=['A'])
pred2 = list(mort2.predict(X_test))
mort3 = LiteMORT(params).fit(X, y, categorical_feature=['A', 'B', 'C', 'D'])
pred3 = list(mort3.predict(X_test))
else:
clf=lgb.sklearn.LGBMClassifier()
gbm_ = clf.fit(X, y)
gbm0 = lgb.sklearn.LGBMClassifier().fit(X, y)
pred0 = list(gbm0.predict(X_test))
gbm1 = lgb.sklearn.LGBMClassifier().fit(X, y, categorical_feature=[0])
pred1 = list(gbm1.predict(X_test))
gbm2 = lgb.sklearn.LGBMClassifier().fit(X, y, categorical_feature=['A'])
pred2 = list(gbm2.predict(X_test))
gbm3 = lgb.sklearn.LGBMClassifier().fit(X, y, categorical_feature=['A', 'B', 'C', 'D'])
pred3 = list(gbm3.predict(X_test))
gbm3.booster_.save_model('categorical.model')
gbm4 = lgb.Booster(model_file='categorical.model')
pred4 = list(gbm4.predict(X_test))
pred_prob = list(gbm0.predict_proba(X_test)[:, 1])
np.testing.assert_almost_equal(pred_prob, pred4)
input("...")
#np.testing.assert_almost_equal(pred0, pred1)
def test_lightgbm_multiclass():
try:
import lightgbm
except:
print("Skipping test_lightgbm_multiclass!")
return
import shap
# train lightgbm model
X, Y = shap.datasets.iris()
model = lightgbm.sklearn.LGBMClassifier()
model.fit(X, Y)
# explain the model's predictions using SHAP values
shap_values = shap.TreeExplainer(model).shap_values(X)
# ensure plot works for first class
shap.dependence_plot(0, shap_values[0], X, show=False)
def test_sklearn_integration(self):
# we cannot use `check_estimator` directly since there is no skip test mechanism
for name, estimator in ((lgb.sklearn.LGBMClassifier.__name__, lgb.sklearn.LGBMClassifier),
(lgb.sklearn.LGBMRegressor.__name__, lgb.sklearn.LGBMRegressor)):
check_parameters_default_constructible(name, estimator)
# we cannot leave default params (see https://github.com/microsoft/LightGBM/issues/833)
estimator = estimator(min_child_samples=1, min_data_in_bin=1)
for check in _yield_all_checks(name, estimator):
check_name = check.func.__name__ if hasattr(check, 'func') else check.__name__
if check_name == 'check_estimators_nan_inf':
continue # skip test because LightGBM deals with nan
try:
check(name, estimator)
except SkipTest as message:
warnings.warn(message, SkipTestWarning)
self.model,
out_file=None,
feature_names=self.features,
class_names=classes,
rounded=True,
precision=True,
filled=True,
)
)
display(SVG(graph.pipe(format="svg")))
elif isinstance(self.model, xgb.XGBModel):
return xgb.plot_tree(self.model)
elif isinstance(self.model, lgb.sklearn.LGBMModel):
return lgb.plot_tree(self.model)
elif isinstance(self.model, sklearn.ensemble.BaseEnsemble):
estimator = self.model.estimators_[tree_num]
graph = Source(
sklearn.tree.export_graphviz(
estimator,
out_file=None,
feature_names=self.features,
class_names=classes,
rounded=True,
precision=True,
filled=True,
)
)
def create_estimator(model_params):
if model_params['engine'] == 'XGBRanker':
params = {'objective': 'rank:pairwise',
'learning_rate': model_params['learning_rate'],
#'gamma': 1.0,
#'min_child_weight': 0.1,
'max_depth': model_params['max_depth'],
'n_estimators': model_params['n_estimators']}
model = xgb.sklearn.XGBRanker(**params)
return model
elif model_params['engine'] == 'LGBMRanker':
params = {'objective': 'lambdarank',
'learning_rate': model_params['learning_rate'],
'max_depth': -1,
'n_estimators': model_params['n_estimators']}
model = lgb.sklearn.LGBMRanker(**params)
return model
self.model,
out_file=None,
feature_names=self.features,
class_names=classes,
rounded=True,
precision=True,
filled=True,
)
)
display(SVG(graph.pipe(format="svg")))
elif isinstance(self.model, xgb.XGBModel):
return xgb.plot_tree(self.model)
elif isinstance(self.model, lgb.sklearn.LGBMModel):
return lgb.plot_tree(self.model)
elif isinstance(self.model, cb.CatBoost):
return self.model.plot_tree(tree_idx=tree_num, pool=self.pool)
elif isinstance(self.model, sklearn.ensemble.BaseEnsemble):
estimator = self.model.estimators_[tree_num]
graph = Source(
sklearn.tree.export_graphviz(
estimator,
out_file=None,
feature_names=self.features,
class_names=classes,
rounded=True,
precision=True,