Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from sklearn.linear_model import Ridge
np.random.seed(0)
coef = np.array([1, 2]).T
# generate linear data
X = np.random.normal(1, 10, size=(1000, len(coef)))
y = np.dot(X, coef) + 1 + np.random.normal(scale=0.1, size=1000)
# train linear model
model = Ridge(0.1)
model.fit(X, y)
# explain the model's predictions using SHAP values
explainer = shap.LinearExplainer(model, X)
values = explainer.shap_values(X)
assert values.shape == (1000, 2)
expected = (X - X.mean(0)) * coef
np.testing.assert_allclose(expected - values, 0, atol=0.01)
from scipy.special import expit
np.random.seed(0)
n_features = 20
X, y = make_multilabel_classification(n_samples=100,
sparse=True,
n_features=n_features,
n_classes=1,
n_labels=2)
# train linear model
model = sklearn.linear_model.LogisticRegression()
model.fit(X, y)
# explain the model's predictions using SHAP values
explainer = shap.LinearExplainer(model, X)
shap_values = explainer.shap_values(X)
assert np.max(np.abs(expit(explainer.expected_value + shap_values[0].sum(1)) - model.predict_proba(X)[:, 1])) < 1e-6
def test_perfect_colinear():
import shap
from sklearn.linear_model import LinearRegression
import numpy as np
X,y = shap.datasets.boston()
X.iloc[:,0] = X.iloc[:,4] # test duplicated features
X.iloc[:,5] = X.iloc[:,6] - X.iloc[:,6] # test multiple colinear features
X.iloc[:,3] = 0 # test null features
model = LinearRegression()
model.fit(X, y)
explainer = shap.LinearExplainer(model, X, feature_dependence="correlation")
shap_values = explainer.shap_values(X)
assert np.abs(shap_values.sum(1) - model.predict(X) + model.predict(X).mean()).sum() < 1e-7
def test_tied_pair():
np.random.seed(0)
beta = np.array([1, 0, 0])
mu = np.zeros(3)
Sigma = np.array([[1, 0.999999, 0], [0.999999, 1, 0], [0, 0, 1]])
X = np.ones((1,3))
explainer = shap.LinearExplainer((beta, 0), (mu, Sigma), feature_dependence="correlation")
assert np.abs(explainer.shap_values(X) - np.array([0.5, 0.5, 0])).max() < 0.05
scores = cross_val_score(model, dataframe_train.values, dataframe_label.values.ravel(),
cv=int(variables.get("N_SPLITS")), scoring=alg.scoring)
loss = 1 - np.mean(scores)
if (not alg.name.startswith("TPOT") and not alg.name.startswith("AutoSklearn")):
model_explainer = shap.KernelExplainer(model.predict_proba, dataframe_train) # feature importance
if alg.type == 'anomaly':
scores = cross_val_score(model, dataframe_train.values, dataframe_label.values.ravel(),
cv=int(variables.get("N_SPLITS")), scoring=alg.scoring)
loss = 1 - np.mean(scores)
model_explainer = shap.KernelExplainer(model.predict, dataframe_train) # feature importance
if alg.type == 'regression':
scores = cross_val_score(model, dataframe_train.values, dataframe_label.values.ravel(),
cv=int(variables.get("N_SPLITS")), scoring=alg.scoring)
loss = np.abs(np.mean(scores))
if alg.name == 'BayesianRidgeRegression' or alg.name == 'LinearRegression':
model_explainer = shap.LinearExplainer(model, dataframe_train)
else:
if (not alg.name.startswith("TPOT") and not alg.name.startswith("AutoSklearn")):
model_explainer = shap.KernelExplainer(model.predict, dataframe_train)
# -------------------------------------------------------------
# Check if sampling is enabled for AutoSklearn
#
if alg.sampling:
model.refit(dataframe_train.values.copy(), dataframe_label.values.ravel().copy())
# -------------------------------------------------------------
# Get the fitted model from TPOT
#
if alg.name == 'TPOT_Regressor' or alg.name == 'TPOT_Classifier':
model = model.fitted_pipeline_
else:
# -------------------------------------------------------------
# Non-supervised algorithms
:param seed: Random number seed.
:type seed: int
"""
np.random.seed(seed)
if multiclass:
explainers = []
coefs = model.coef_
intercepts = model.intercept_
if isinstance(intercepts, np.ndarray):
intercepts = intercepts.tolist()
if isinstance(intercepts, list):
coef_intercept_list = zip(coefs, intercepts)
else:
coef_intercept_list = [(coef, intercepts) for coef in coefs]
for class_coef, intercept in coef_intercept_list:
linear_explainer = shap.LinearExplainer((class_coef, intercept), (mean, covariance),
feature_dependence=SHAPDefaults.INDEPENDENT)
explainers.append(linear_explainer)
return explainers
else:
model_coef = model.coef_
model_intercept = model.intercept_
return shap.LinearExplainer((model_coef, model_intercept), (mean, covariance),
feature_dependence=SHAPDefaults.INDEPENDENT)
]
This would not work since it is hard to make out whether my_own_transformer gives a many to many or one to many
mapping when taking a sequence of columns.
:type transformations: sklearn.compose.ColumnTransformer or list[tuple]
:param allow_all_transformations: Allow many to many and many to one transformations
:type allow_all_transformations: bool
"""
self._datamapper = None
if transformations is not None:
self._datamapper, initialization_examples = get_datamapper_and_transformed_data(
examples=initialization_examples, transformations=transformations,
allow_all_transformations=allow_all_transformations)
super(LinearExplainer, self).__init__(model, initialization_examples, **kwargs)
self._logger.debug('Initializing LinearExplainer')
self._method = 'shap.linear'
self.explainer = shap.LinearExplainer(self.model, self.initialization_examples,
feature_dependence=SHAPDefaults.INDEPENDENT)
self.explain_subset = explain_subset
self.features = features
self.classes = classes
self.transformations = transformations
self._allow_all_transformations = allow_all_transformations
intercepts = model.intercept_
if isinstance(intercepts, np.ndarray):
intercepts = intercepts.tolist()
if isinstance(intercepts, list):
coef_intercept_list = zip(coefs, intercepts)
else:
coef_intercept_list = [(coef, intercepts) for coef in coefs]
for class_coef, intercept in coef_intercept_list:
linear_explainer = shap.LinearExplainer((class_coef, intercept), (mean, covariance),
feature_dependence=SHAPDefaults.INDEPENDENT)
explainers.append(linear_explainer)
return explainers
else:
model_coef = model.coef_
model_intercept = model.intercept_
return shap.LinearExplainer((model_coef, model_intercept), (mean, covariance),
feature_dependence=SHAPDefaults.INDEPENDENT)
def __init__(self, *argv, **kwargs):
"""
Initialize shap kernelexplainer object.
"""
super(LinearExplainer, self).__init__(*argv, **kwargs)
self.explainer = shap.LinearExplainer(*argv, **kwargs)
def linear_shap_ind(model, data):
""" Linear SHAP (ind)
"""
return LinearExplainer(model, data, feature_dependence="independent").shap_values