Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# explain the model's predictions using SHAP values
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)
# visualize the first prediction's explaination
shap.force_plot(explainer.expected_value, shap_values[0, :], X.iloc[0, :])
# visualize the training set predictions
shap.force_plot(explainer.expected_value, shap_values, X)
# create a SHAP dependence plot to show the effect of a single feature across the whole dataset
shap.dependence_plot(5, shap_values, X, show=False)
shap.dependence_plot("RM", shap_values, X, show=False)
# summarize the effects of all the features
shap.summary_plot(shap_values, X, show=False)
# explain the model's predictions using SHAP values (use pred_contrib in LightGBM)
shap_values = shap.TreeExplainer(model).shap_values(X)
# visualize the first prediction's explaination
shap.force_plot(shap_values[0, :], X.iloc[0, :])
# visualize the training set predictions
shap.force_plot(shap_values, X)
# create a SHAP dependence plot to show the effect of a single feature across the whole dataset
shap.dependence_plot(5, shap_values, X, show=False)
shap.dependence_plot("RM", shap_values, X, show=False)
# summarize the effects of all the features
shap.summary_plot(shap_values, X, show=False)
# explain the model's predictions using SHAP values
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)
# visualize the first prediction's explaination
shap.force_plot(explainer.expected_value, shap_values[0, :], X.iloc[0, :])
# visualize the training set predictions
shap.force_plot(explainer.expected_value, shap_values, X)
# create a SHAP dependence plot to show the effect of a single feature across the whole dataset
shap.dependence_plot(5, shap_values, X, show=False)
shap.dependence_plot("RM", shap_values, X, show=False)
# summarize the effects of all the features
shap.summary_plot(shap_values, X, show=False)
#ALE Plots: faster and unbiased alternative to partial dependence plots (PDPs). They have a serious problem when the features are correlated.
#The computation of a partial dependence plot for a feature that is strongly correlated with other features involves averaging predictions of artificial data instances that are unlikely in reality. This can greatly bias the estimated feature effect.
#https://github.com/blent-ai/ALEPython
#SHAP Values: Understand how each feature affects every individual prediciton
import shap
data_for_prediction = X_val.iloc[row_num]
explainer = shap.TreeExplainer(model) #Use DeepExplainer for Deep Learning models, KernelExplainer for all other models
shap_vals = explainer.shap_values(data_for_prediction)
shap.initjs()
shap.force_plot(explainer.expected_value[1], shap_vals[1], data_for_prediction)
#We can also do a SHAP plot of the whole dataset
shap_vals = explainer.shap_values(X_val)
shap.summary_plot(shap_vals[1], X_val)
#SHAP Dependence plot
shap.dependence_plot('feature_for_x', shap_vals[1], X_val, interaction_index="feature_for_color")
#Local interpretable model-agnostic explanations (LIME)
#Surrogate models are trained to approximate the predictions of the underlying black box model. Instead of training a global surrogate model, LIME focuses on training local surrogate models to explain individual predictions.
#https://github.com/marcotcr/lime
#Dimensionality reduction
#SVD: Find the percentage of variance explained by each principal component
#First scale the data
U, S, V = np.linalg.svd(df, full_matrices=False)
importance = S/S.sum()
varinace_explained = importance.cumsum()*100
#PCA: Decompose the data in a defined number of variables keeping the most variance possible.
from sklearn.decomposition import PCA
pca = PCA(n_components=2, svd_solver='full')
feature_names = self.get_human_readable_feature_names()
if self.calculate_importance and len(feature_names):
explainer = shap.TreeExplainer(self.clf)
shap_values = explainer.shap_values(X_train)
# In the binary case, sometimes shap returns a single shap values matrix.
if is_binary and not isinstance(shap_values, list):
shap_values = [-shap_values, shap_values]
summary_plot_value = shap_values[1]
summary_plot_type = "layered_violin"
else:
summary_plot_value = shap_values
summary_plot_type = None
shap.summary_plot(
summary_plot_value,
to_array(X_train),
feature_names=feature_names,
class_names=self.class_names,
plot_type=summary_plot_type,
show=False,
)
matplotlib.pyplot.savefig("feature_importance.png", bbox_inches="tight")
matplotlib.pyplot.xlabel("Impact on model output")
matplotlib.pyplot.clf()
important_features = self.get_important_features(
importance_cutoff, shap_values
)
def plot_shap_values(self, shap_dict=None):
"""
Calculates and plots the distribution of shapley values of each feature, for each treatment group.
Skips the calculation part if shap_dict is given.
"""
if shap_dict is None:
shap_dict = self.get_shap_values()
for group, values in shap_dict.items():
plt.title(group)
shap.summary_plot(values, feature_names=self.features)
def summary_plot(self, plot_type = 'violin', alpha=0.3):
"""violin, layered_violin, dot"""
return shap.summary_plot(self.shap_values, self.df, alpha=alpha, plot_type = plot_type)
def visualize_model(model, X, idx, configuration, namespace, name):
if configuration['enabled'] and idx % configuration['n_iterations'] == 0:
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)
shap.summary_plot(shap_values, X, plot_type="bar", show=False)
save_fig(namespace, name, idx, importance_type='shap')
if name == 'XGBOOST':
for i in ['weight', 'cover', 'gain']:
if i == 'gain':
xgb.plot_importance(model.get_score(fmap='', importance_type=i), importance_type=i, max_num_features=20)
else:
xgb.plot_importance(model, importance_type=i, max_num_features=20)
save_fig(namespace, name, idx, importance_type=i)
elif name == 'LIGHTGBM':
for i in ['split', 'gain']:
lgb.plot_importance(model, importance_type=i, max_num_features=20)
save_fig(namespace, name, idx, importance_type=i)
else:
def summary_plot(self, output_file="", **summaryplot_kwargs):
"""
Plots a SHAP summary plot.
Parameters
----------
output_file: str
Output file name including extension (.png, .jpg, etc.) to save image as.
"""
import shap
shap.summary_plot(
self.shap_values,
self.x_test_array,
feature_names=self.x_train.columns,
show=False,
**summaryplot_kwargs,
)
if output_file: # pragma: no cover
pl.savefig(os.path.join(IMAGE_DIR, self.model_name, output_file))
def importance_plot(self):
return shap.summary_plot(self.shap_values, self.df, plot_type="bar")