How to use shap - 10 common examples

To help you get started, we’ve selected a few shap examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github slundberg / shap / tests / explainers / test_tree.py View on Github external
def test_isolation_forest():
    import shap
    import numpy as np
    from sklearn.ensemble import IsolationForest
    from sklearn.ensemble.iforest import _average_path_length

    X,y = shap.datasets.boston()
    iso = IsolationForest( behaviour='new', contamination='auto')
    iso.fit(X)

    explainer = shap.TreeExplainer(iso)
    shap_values = explainer.shap_values(X)

    score_from_shap = - 2**(
        - (np.sum(shap_values, axis=1) + explainer.expected_value) /
        _average_path_length(np.array([iso.max_samples_]))[0]
        )
    assert np.allclose(iso.score_samples(X), score_from_shap, atol=1e-7)
github slundberg / shap / tests / explainers / test_kernel.py View on Github external
def test_front_page_model_agnostic():
    import sklearn
    import shap
    from sklearn.model_selection import train_test_split

    # print the JS visualization code to the notebook
    shap.initjs()

    # train a SVM classifier
    X_train, X_test, Y_train, Y_test = train_test_split(*shap.datasets.iris(), test_size=0.1, random_state=0)
    svm = sklearn.svm.SVC(kernel='rbf', probability=True)
    svm.fit(X_train, Y_train)

    # use Kernel SHAP to explain test set predictions
    explainer = shap.KernelExplainer(svm.predict_proba, X_train, nsamples=100, link="logit")
    shap_values = explainer.shap_values(X_test)

    # plot the SHAP values for the Setosa output of the first instance
    shap.force_plot(explainer.expected_value[0], shap_values[0][0, :], X_test.iloc[0, :], link="logit")
github slundberg / shap / tests / explainers / test_kernel.py View on Github external
def test_kernel_sparse_vs_dense_multirow_background():
    import sklearn
    import shap
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression

    # train a logistic regression classifier
    X_train, X_test, Y_train, _ = train_test_split(*shap.datasets.iris(), test_size=0.1, random_state=0)
    lr = LogisticRegression(solver='lbfgs')
    lr.fit(X_train, Y_train)

    # use Kernel SHAP to explain test set predictions with dense data
    explainer = shap.KernelExplainer(lr.predict_proba, X_train, nsamples=100, link="logit", l1_reg="rank(3)")
    shap_values = explainer.shap_values(X_test)

    X_sparse_train = sp.sparse.csr_matrix(X_train)
    X_sparse_test = sp.sparse.csr_matrix(X_test)

    lr_sparse = LogisticRegression(solver='lbfgs')
    lr_sparse.fit(X_sparse_train, Y_train)

    # use Kernel SHAP again but with sparse data
    sparse_explainer = shap.KernelExplainer(lr.predict_proba, X_sparse_train, nsamples=100, link="logit", l1_reg="rank(3)")
    sparse_shap_values = sparse_explainer.shap_values(X_sparse_test)

    assert(np.allclose(shap_values, sparse_shap_values, rtol=1e-05, atol=1e-05))
github slundberg / shap / tests / explainers / test_kernel.py View on Github external
def test_kernel_shap_with_a1a_sparse_nonzero_background():
    np.set_printoptions(threshold=100000)
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LinearRegression
    from sklearn.utils.sparsefuncs import csc_median_axis_0
    import shap
    np.random.seed(0)

    X, y = shap.datasets.a1a() # pylint: disable=unbalanced-tuple-unpacking
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.01, random_state=0)
    linear_model = LinearRegression()
    linear_model.fit(x_train, y_train)
    # Calculate median of background data
    median_dense = csc_median_axis_0(x_train.tocsc())
    median = sp.sparse.csr_matrix(median_dense)
    explainer = shap.KernelExplainer(linear_model.predict, median)
    shap_values = explainer.shap_values(x_test)

    def dense_to_sparse_predict(data):
        sparse_data = sp.sparse.csr_matrix(data)
        return linear_model.predict(sparse_data)

    explainer_dense = shap.KernelExplainer(dense_to_sparse_predict, median_dense.reshape((1, len(median_dense))))
    x_test_dense = x_test.toarray()
    shap_values_dense = explainer_dense.shap_values(x_test_dense)
    # Validate sparse and dense result is the same
    assert(np.allclose(shap_values, shap_values_dense, rtol=1e-02, atol=1e-01))
github slundberg / shap / tests / explainers / test_kernel.py View on Github external
from sklearn.model_selection import train_test_split

    # print the JS visualization code to the notebook
    shap.initjs()

    # train a SVM classifier
    X_train, X_test, Y_train, Y_test = train_test_split(*shap.datasets.iris(), test_size=0.1, random_state=0)
    svm = sklearn.svm.SVC(kernel='rbf', probability=True)
    svm.fit(X_train, Y_train)

    # use Kernel SHAP to explain test set predictions
    explainer = shap.KernelExplainer(svm.predict_proba, X_train, nsamples=100, link="logit", l1_reg="rank(3)")
    shap_values = explainer.shap_values(X_test)

    # plot the SHAP values for the Setosa output of the first instance
    shap.force_plot(explainer.expected_value[0], shap_values[0][0, :], X_test.iloc[0, :], link="logit")
github slundberg / shap / tests / test_basic.py View on Github external
from sklearn.model_selection import train_test_split

    # print the JS visualization code to the notebook
    shap.initjs()

    # train a SVM classifier
    X_train, X_test, Y_train, Y_test = train_test_split(*shap.datasets.iris(), test_size=0.2, random_state=0)
    svm = sklearn.svm.SVC(kernel='rbf', probability=True)
    svm.fit(X_train, Y_train)

    # use Kernel SHAP to explain test set predictions
    explainer = shap.KernelExplainer(svm.predict_proba, X_train, nsamples=100, link="logit")
    shap_values = explainer.shap_values(X_test)

    # plot the SHAP values for the Setosa output of the first instance
    shap.force_plot(shap_values[0][0, :], X_test.iloc[0, :], link="logit")
github slundberg / shap / tests / explainers / test_tree.py View on Github external
for model in models:
        model.fit(X, y)

        # explain the model's predictions using SHAP values
        explainer = shap.TreeExplainer(model)
        shap_values = explainer.shap_values(X)

        # visualize the first prediction's explaination
        shap.force_plot(explainer.expected_value, shap_values[0, :], X.iloc[0, :])

        # visualize the training set predictions
        shap.force_plot(explainer.expected_value, shap_values, X)

        # create a SHAP dependence plot to show the effect of a single feature across the whole dataset
        shap.dependence_plot(5, shap_values, X, show=False)
        shap.dependence_plot("RM", shap_values, X, show=False)

        # summarize the effects of all the features
        shap.summary_plot(shap_values, X, show=False)
github slundberg / shap / tests / explainers / test_tree.py View on Github external
import lightgbm
    except:
        print("Skipping test_lightgbm_multiclass!")
        return
    import shap

    # train lightgbm model
    X, Y = shap.datasets.iris()
    model = lightgbm.sklearn.LGBMClassifier()
    model.fit(X, Y)

    # explain the model's predictions using SHAP values
    shap_values = shap.TreeExplainer(model).shap_values(X)

    # ensure plot works for first class
    shap.dependence_plot(0, shap_values[0], X, show=False)
github slundberg / shap / tests / explainers / test_tree.py View on Github external
import xgboost
    except Exception as e:
        print("Skipping test_xgboost_multiclass!")
        return
    import shap

    # train XGBoost model
    X, Y = shap.datasets.iris()
    model = xgboost.XGBClassifier(objective="binary:logistic", max_depth=4)
    model.fit(X, Y)

    # explain the model's predictions using SHAP values (use pred_contrib in LightGBM)
    shap_values = shap.TreeExplainer(model).shap_values(X)

    # ensure plot works for first class
    shap.dependence_plot(0, shap_values[0], X, show=False)
github slundberg / shap / tests / test_basic.py View on Github external
# train model
    X, y = shap.datasets.boston()
    model = sklearn.ensemble.RandomForestRegressor(n_estimators=100)
    model.fit(X, y)

    # explain the model's predictions using SHAP values (use pred_contrib in LightGBM)
    shap_values = shap.TreeExplainer(model).shap_values(X)

    # visualize the first prediction's explaination
    shap.force_plot(shap_values[0, :], X.iloc[0, :])

    # visualize the training set predictions
    shap.force_plot(shap_values, X)

    # create a SHAP dependence plot to show the effect of a single feature across the whole dataset
    shap.dependence_plot(5, shap_values, X, show=False)
    shap.dependence_plot("RM", shap_values, X, show=False)

    # summarize the effects of all the features
    shap.summary_plot(shap_values, X, show=False)