Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_sum_match_extra_trees():
import shap
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import ExtraTreesRegressor
import sklearn
X_train,X_test,Y_train,Y_test = train_test_split(*shap.datasets.adult(), test_size=0.2, random_state=0)
clf = ExtraTreesRegressor(random_state=202, n_estimators=10, max_depth=10)
clf.fit(X_train, Y_train)
predicted = clf.predict(X_test)
ex = shap.TreeExplainer(clf)
shap_values = ex.shap_values(X_test)
assert np.abs(shap_values.sum(1) + ex.expected_value - predicted).max() < 1e-6, \
"SHAP values don't sum to model output!"
def test_sum_match_random_forest():
import shap
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import sklearn
X_train,X_test,Y_train,Y_test = train_test_split(*shap.datasets.adult(), test_size=0.2, random_state=0)
clf = RandomForestClassifier(random_state=202, n_estimators=10, max_depth=10)
clf.fit(X_train, Y_train)
predicted = clf.predict_proba(X_test)
ex = shap.TreeExplainer(clf)
shap_values = ex.shap_values(X_test)
assert np.abs(shap_values[0].sum(1) + ex.expected_value[0] - predicted[:,0]).max() < 1e-6, \
"SHAP values don't sum to model output!"
def test_sum_match_gradient_boosting_regressor():
import shap
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
import sklearn
X_train,X_test,Y_train,Y_test = train_test_split(*shap.datasets.adult(), test_size=0.2, random_state=0)
clf = GradientBoostingRegressor(random_state=202, n_estimators=10, max_depth=10)
clf.fit(X_train, Y_train)
predicted = clf.predict(X_test)
ex = shap.TreeExplainer(clf)
shap_values = ex.shap_values(X_test)
assert np.abs(shap_values.sum(1) + ex.expected_value - predicted).max() < 1e-6, \
"SHAP values don't sum to model output!"
def test_sum_match_random_forest():
import shap
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import sklearn
X_train,X_test,Y_train,Y_test = train_test_split(*shap.datasets.adult(), test_size=0.2, random_state=0)
clf = RandomForestClassifier(random_state=202, n_estimators=10, max_depth=10)
clf.fit(X_train, Y_train)
predicted = clf.predict_proba(X_test)
ex = shap.TreeExplainer(clf)
shap_values = ex.shap_values(X_test)
assert np.abs(shap_values[0].sum(1) + ex.expected_value[0] - predicted[:,0]).max() < 1e-6, \
"SHAP values don't sum to model output!"
def test_explain_model_keras(self, tabular_explainer):
X, y = shap.datasets.adult()
x_train, x_test, y_train, _ = train_test_split(X, y, test_size=0.2, random_state=7)
# Fit a keras DNN model
model = create_keras_classifier(x_train.values, y_train)
test_logger.info('Running explain global for test_explain_model_keras')
self._explain_model_dnn_common(tabular_explainer, model, x_train, x_test, y_train, X.columns.values)