Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_lightgbm():
try:
import lightgbm
except:
print("Skipping test_lightgbm!")
return
import shap
# train lightgbm model
X, y = shap.datasets.boston()
model = lightgbm.sklearn.LGBMRegressor(categorical_feature=[8])
model.fit(X, y)
# explain the model's predictions using SHAP values
ex = shap.TreeExplainer(model)
shap_values = ex.shap_values(X)
predicted = model.predict(X, raw_score=True)
assert np.abs(shap_values.sum(1) + ex.expected_value - predicted).max() < 1e-6, \
"SHAP values don't sum to model output!"
# on TreeExplainer when trying to compute max nodes:
# max_nodes = np.max([len(t.values) for t in self.trees])
# The test does not fail with latest lightgbm 2.2.3 however
try:
import lightgbm
except:
print("Skipping test_lightgbm_constant_prediction!")
return
import shap
# train lightgbm model with a constant value for y
X, y = shap.datasets.boston()
# use the mean for all values
mean = np.mean(y)
y.fill(mean)
model = lightgbm.sklearn.LGBMRegressor(n_estimators=1)
model.fit(X, y)
# explain the model's predictions using SHAP values
shap_values = shap.TreeExplainer(model).shap_values(X)
# on TreeExplainer when trying to compute max nodes:
# max_nodes = np.max([len(t.values) for t in self.trees])
# The test does not fail with latest lightgbm 2.2.3 however
try:
import lightgbm
except:
print("Skipping test_lightgbm_constant_prediction!")
return
import shap
# train lightgbm model with a constant value for y
X, y = shap.datasets.boston()
# use the mean for all values
mean = np.mean(y)
y.fill(mean)
model = lightgbm.sklearn.LGBMRegressor(n_estimators=1)
model.fit(X, y)
# explain the model's predictions using SHAP values
shap_values = shap.TreeExplainer(model).shap_values(X)
def test_lightgbm_interaction():
try:
import lightgbm
except Exception as e:
print("Skipping test_lightgbm_interaction!")
return
import shap
# train XGBoost model
X, y = shap.datasets.boston()
model = lightgbm.sklearn.LGBMRegressor()
model.fit(X, y)
# verify symmetry of the interaction values (this typically breaks if anything is wrong)
interaction_vals = shap.TreeExplainer(model).shap_interaction_values(X)
for j in range(len(interaction_vals)):
for k in range(len(interaction_vals[j])):
for l in range(len(interaction_vals[j][k])):
assert abs(interaction_vals[j][k][l] - interaction_vals[j][l][k]) < 1e-6
def test_sklearn_integration(self):
# we cannot use `check_estimator` directly since there is no skip test mechanism
for name, estimator in ((lgb.sklearn.LGBMClassifier.__name__, lgb.sklearn.LGBMClassifier),
(lgb.sklearn.LGBMRegressor.__name__, lgb.sklearn.LGBMRegressor)):
check_parameters_default_constructible(name, estimator)
# we cannot leave default params (see https://github.com/microsoft/LightGBM/issues/833)
estimator = estimator(min_child_samples=1, min_data_in_bin=1)
for check in _yield_all_checks(name, estimator):
check_name = check.func.__name__ if hasattr(check, 'func') else check.__name__
if check_name == 'check_estimators_nan_inf':
continue # skip test because LightGBM deals with nan
try:
check(name, estimator)
except SkipTest as message:
warnings.warn(message, SkipTestWarning)
def train_lightgbm(trn_x, val_x, trn_y, val_y):
clf = LGBMRegressor(max_depth=50,
num_leaves=21,
n_estimators=5000,
min_child_weight=9,
learning_rate=0.01,
nthread=24,
subsample=0.80,
colsample_bytree=0.80,
seed=42)
clf.fit(trn_x, trn_y, eval_set=[(val_x, val_y)], verbose=True, eval_metric='l2', early_stopping_rounds=300)
return clf
sklearn.ensemble.GradientBoostingRegressor: "regression",
sklearn.ensemble.RandomForestClassifier: "classification",
sklearn.ensemble.RandomForestRegressor: "regression",
sklearn.naive_bayes.BernoulliNB: "classification",
sklearn.naive_bayes.GaussianNB: "classification",
sklearn.naive_bayes.MultinomialNB: "classification",
sklearn.tree.DecisionTreeClassifier: "classification",
sklearn.tree.DecisionTreeRegressor: "regression",
sklearn.svm.LinearSVC: "classification",
sklearn.svm.LinearSVR: "regression",
sklearn.svm.SVC: "classification",
sklearn.svm.SVR: "regression",
xgb.XGBClassifier: "classification",
xgb.XGBRegressor: "regression",
lgb.sklearn.LGBMClassifier: "classification",
lgb.sklearn.LGBMRegressor: "regression",
cb.CatBoostRegressor: "regression",
cb.CatBoostClassifier: "classification",
}
INTERPRET_EXPLAINERS = {
"problem": {
"classification": {"roc": ROC, "pr": PR},
"regression": {"regperf": RegressionPerf},
},
"local": {"lime": LimeTabular, "shap": ShapKernel},
"global": {"morris": MorrisSensitivity, "dependence": PartialDependence},
}
CLASS_METRICS_DESC = {
"Accuracy": "Measures how many observations, both positive and negative, were correctly classified.",
"Balanced Accuracy": "The balanced accuracy in binary and multiclass classification problems to deal with imbalanced datasets. It is defined as the average of recall obtained on each class.",
def create_model(self):
# TODO: if learning rates are identical throughout - create a regular Classifier
self.model_params['n_estimators'] = self.best_n_iterations
self.model_params["learning_rate"] = self.learning_rates[0] # TODO change
final_model = LGBMRegressor(**self.model_params)
return final_model