Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from sklearn.model_selection import train_test_split
# print the JS visualization code to the notebook
shap.initjs()
# train a SVM classifier
X_train, X_test, Y_train, Y_test = train_test_split(*shap.datasets.iris(), test_size=0.1, random_state=0)
svm = sklearn.svm.SVC(kernel='rbf', probability=True)
svm.fit(X_train, Y_train)
# use Kernel SHAP to explain test set predictions
explainer = shap.KernelExplainer(svm.predict_proba, X_train, nsamples=100, link="logit", l1_reg="rank(3)")
shap_values = explainer.shap_values(X_test)
# plot the SHAP values for the Setosa output of the first instance
shap.force_plot(explainer.expected_value[0], shap_values[0][0, :], X_test.iloc[0, :], link="logit")
from sklearn.model_selection import train_test_split
# print the JS visualization code to the notebook
shap.initjs()
# train a SVM classifier
X_train, X_test, Y_train, Y_test = train_test_split(*shap.datasets.iris(), test_size=0.2, random_state=0)
svm = sklearn.svm.SVC(kernel='rbf', probability=True)
svm.fit(X_train, Y_train)
# use Kernel SHAP to explain test set predictions
explainer = shap.KernelExplainer(svm.predict_proba, X_train, nsamples=100, link="logit")
shap_values = explainer.shap_values(X_test)
# plot the SHAP values for the Setosa output of the first instance
shap.force_plot(shap_values[0][0, :], X_test.iloc[0, :], link="logit")
# load JS visualization code to notebook
shap.initjs()
# train XGBoost model
X, y = shap.datasets.boston()
model = xgboost.train({"learning_rate": 0.01, "silent": 1}, xgboost.DMatrix(X, label=y), 100)
# explain the model's predictions using SHAP values
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)
# visualize the first prediction's explaination
shap.force_plot(explainer.expected_value, shap_values[0, :], X.iloc[0, :])
# visualize the training set predictions
shap.force_plot(explainer.expected_value, shap_values, X)
# create a SHAP dependence plot to show the effect of a single feature across the whole dataset
shap.dependence_plot(5, shap_values, X, show=False)
shap.dependence_plot("RM", shap_values, X, show=False)
# summarize the effects of all the features
shap.summary_plot(shap_values, X, show=False)
return
import shap
# load JS visualization code to notebook
shap.initjs()
# train XGBoost model
X, y = shap.datasets.boston()
model = xgboost.train({"learning_rate": 0.01, "silent": 1}, xgboost.DMatrix(X, label=y), 100)
# explain the model's predictions using SHAP values
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)
# visualize the first prediction's explaination
shap.force_plot(explainer.expected_value, shap_values[0, :], X.iloc[0, :])
# visualize the training set predictions
shap.force_plot(explainer.expected_value, shap_values, X)
# create a SHAP dependence plot to show the effect of a single feature across the whole dataset
shap.dependence_plot(5, shap_values, X, show=False)
shap.dependence_plot("RM", shap_values, X, show=False)
# summarize the effects of all the features
shap.summary_plot(shap_values, X, show=False)
Plots a SHAP force plot.
"""
import shap
shap_values = forceplot_kwargs.pop("shap_values", self.shap_values)
if sample_no is not None:
if sample_no < 1 or not isinstance(sample_no, int):
raise ValueError("Sample number must be greater than 1.")
samples = slice(sample_no - 1, sample_no)
else:
samples = slice(0, len(shap_values))
s = shap.force_plot(
self.expected_value,
shap_values[samples],
self.x_train.columns,
**forceplot_kwargs,
)
if output_file: # pragma: no cover
pl.savefig(os.path.join(IMAGE_DIR, self.model_name, output_file))
return s
EXPLANATION_TARGET = 1
# get the actual explanation
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(training_df)
# show plot
shap.summary_plot(shap_values, training_df)
shap.summary_plot(shap_values, training_df, plot_type="bar")
# TODO not yet working
shap.force_plot(explainer.expected_value, shap_values[EXPLANATION_TARGET, :],
training_df.iloc[EXPLANATION_TARGET, :])
shap.force_plot(explainer.expected_value, shap_values, training_df)
shap.dependence_plot("RM", shap_values, training_df)
shap.force_plot(explainer.expected_value[0], shap_values[0][0, :], test_df.iloc[0, :],
link="logit") # TODO subst with EXPLANATION_TARGET
shap.force_plot(explainer.expected_value[0], shap_values[0], test_df, link="logit")
print('done')
return Response(shap.values, status=200)
def many_force_plot(self, loc, sample = 10000, plot_cmap = ["#00cc00", "#002266"]):
return shap.force_plot(self.explainer.expected_value, self.shap_values[:loc,:], features = self.features, plot_cmap = plot_cmap)
inter1 = pdp.pdp_interact(model=model, dataset=X_val, model_features=X_val.columns, features=features_to_plot)
pdp.pdp_interact_plot(pdp_interact_out=inter1, feature_names=features_to_plot, plot_type='contour')
plt.show()
#ALE Plots: faster and unbiased alternative to partial dependence plots (PDPs). They have a serious problem when the features are correlated.
#The computation of a partial dependence plot for a feature that is strongly correlated with other features involves averaging predictions of artificial data instances that are unlikely in reality. This can greatly bias the estimated feature effect.
#https://github.com/blent-ai/ALEPython
#SHAP Values: Understand how each feature affects every individual prediciton
import shap
data_for_prediction = X_val.iloc[row_num]
explainer = shap.TreeExplainer(model) #Use DeepExplainer for Deep Learning models, KernelExplainer for all other models
shap_vals = explainer.shap_values(data_for_prediction)
shap.initjs()
shap.force_plot(explainer.expected_value[1], shap_vals[1], data_for_prediction)
#We can also do a SHAP plot of the whole dataset
shap_vals = explainer.shap_values(X_val)
shap.summary_plot(shap_vals[1], X_val)
#SHAP Dependence plot
shap.dependence_plot('feature_for_x', shap_vals[1], X_val, interaction_index="feature_for_color")
#Local interpretable model-agnostic explanations (LIME)
#Surrogate models are trained to approximate the predictions of the underlying black box model. Instead of training a global surrogate model, LIME focuses on training local surrogate models to explain individual predictions.
#https://github.com/marcotcr/lime
#Dimensionality reduction
#SVD: Find the percentage of variance explained by each principal component
#First scale the data
U, S, V = np.linalg.svd(df, full_matrices=False)
importance = S/S.sum()
# get the actual explanation
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(training_df)
# show plot
shap.summary_plot(shap_values, training_df)
shap.summary_plot(shap_values, training_df, plot_type="bar")
# TODO not yet working
shap.force_plot(explainer.expected_value, shap_values[EXPLANATION_TARGET, :],
training_df.iloc[EXPLANATION_TARGET, :])
shap.force_plot(explainer.expected_value, shap_values, training_df)
shap.dependence_plot("RM", shap_values, training_df)
shap.force_plot(explainer.expected_value[0], shap_values[0][0, :], test_df.iloc[0, :],
link="logit") # TODO subst with EXPLANATION_TARGET
shap.force_plot(explainer.expected_value[0], shap_values[0], test_df, link="logit")
print('done')
return Response(shap.values, status=200)