Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_explain_linear_dense():
clf = LogisticRegression(random_state=42)
data = [{'day': 'mon', 'moon': 'full'},
{'day': 'tue', 'moon': 'rising'},
{'day': 'tue', 'moon': 'rising'},
{'day': 'mon', 'moon': 'rising'}]
vec = DictVectorizer(sparse=False)
X = vec.fit_transform(data)
clf.fit(X, [0, 1, 1, 0])
test_day = {'day': 'tue', 'moon': 'full'}
target_names = ['sunny', 'shady']
res1 = explain_prediction(clf, test_day, vec=vec, target_names=target_names)
expl_text, expl_html = format_as_all(res1, clf)
assert 'day=tue' in expl_text
assert 'day=tue' in expl_html
[test_day_vec] = vec.transform(test_day)
res2 = explain_prediction(
clf, test_day_vec, target_names=target_names,
vectorized=True, feature_names=vec.get_feature_names())
assert res1 == res2
def test_explain_prediction_clf_multitarget(newsgroups_train):
docs, ys, target_names = newsgroups_train
vec = CountVectorizer(stop_words='english', dtype=np.float64)
xs = vec.fit_transform(docs)
clf = LGBMClassifier(n_estimators=100, max_depth=2,
min_child_samples=1, min_child_weight=1)
clf.fit(xs, ys)
doc = 'computer graphics in space: a new religion'
res = explain_prediction(clf, doc, vec=vec, target_names=target_names)
format_as_all(res, clf)
check_targets_scores(res)
graphics_weights = res.targets[1].feature_weights
assert 'computer' in get_all_features(graphics_weights.pos)
religion_weights = res.targets[3].feature_weights
assert 'religion' in get_all_features(religion_weights.pos)
top_target_res = explain_prediction(clf, doc, vec=vec, top_targets=2)
assert len(top_target_res.targets) == 2
assert sorted(t.proba for t in top_target_res.targets) == sorted(
t.proba for t in res.targets)[-2:]
def test_explain_regression_hashing_vectorizer(newsgroups_train_binary):
docs, y, target_names = newsgroups_train_binary
vec = HashingVectorizer(norm=None)
clf = LinearRegression()
clf.fit(vec.fit_transform(docs), y)
# Setting large "top" in order to compare it with CountVectorizer below
# (due to small differences in the coefficients they might have cutoffs
# at different points).
res = explain_prediction(
clf, docs[0], vec=vec, target_names=[target_names[1]], top=1000)
expl, _ = format_as_all(res, clf)
assert len(res.targets) == 1
e = res.targets[0]
assert e.target == 'comp.graphics'
neg = get_all_features(e.feature_weights.neg)
assert 'objective' in neg
assert 'that' in neg
assert 'comp.graphics' in expl
assert 'objective' in expl
assert 'that' in expl
# HashingVectorizer with norm=None is "the same" as CountVectorizer,
# so we can compare it and check that explanation is almost the same.
count_vec = CountVectorizer()
count_clf = LinearRegression()
def test_explain_hashing_vectorizer(newsgroups_train_binary):
# test that we can pass InvertableHashingVectorizer explicitly
vec = HashingVectorizer(n_features=1000)
ivec = InvertableHashingVectorizer(vec)
clf = LogisticRegression(random_state=42)
docs, y, target_names = newsgroups_train_binary
ivec.fit([docs[0]])
X = vec.fit_transform(docs)
clf.fit(X, y)
get_res = lambda **kwargs: explain_prediction(
clf, docs[0], vec=ivec, target_names=target_names, top=20, **kwargs)
res = get_res()
check_explain_linear_binary(res, clf)
assert res == get_res()
res_vectorized = explain_prediction(
clf, vec.transform([docs[0]])[0], vec=ivec, target_names=target_names,
top=20, vectorized=True)
pprint(res_vectorized)
assert res_vectorized == _without_weighted_spans(res)
assert res == get_res(
feature_names=ivec.get_feature_names(always_signed=False))
def test_explain_tree_regressor_multitarget(reg):
X, y = make_regression(n_samples=100, n_targets=3, n_features=10,
random_state=42)
reg.fit(X, y)
res = explain_prediction(reg, X[0])
for expl in format_as_all(res, reg):
for target in ['y0', 'y1', 'y2']:
assert target in expl
assert 'BIAS' in expl
assert any('x%d' % i in expl for i in range(10))
check_targets_scores(res)
top_targets_res = explain_prediction(reg, X[0], top_targets=1)
assert len(top_targets_res.targets) == 1
def test_explain_prediction_not_supported():
res = eli5.explain_prediction(Sequential(), np.zeros((0,)))
assert 'supported' in res.error
def assert_class_used(clf, X, y, **explain_kwargs):
# type: (...) -> List[Explanation]
""" Check that classes y are used for explanations of X predictions """
explanations = []
for x, pred_target in zip(X, y):
res = explain_prediction(clf, x, **explain_kwargs) # type: Explanation
explanations.append(res)
assert len(res.targets) == 1
if res.targets[0].score != 0:
assert res.targets[0].target == pred_target
return explanations
def test_explain_weights_unsupported():
clf = BaseEstimator()
res = explain_weights(clf)
assert 'BaseEstimator' in res.error
with pytest.raises(TypeError):
explain_prediction(clf, unknown_argument=True)
def test_unsupported():
vec = CountVectorizer()
clf = BaseEstimator()
doc = 'doc'
res = explain_prediction(clf, doc, vec=vec)
assert 'BaseEstimator' in res.error
for expl in format_as_all(res, clf):
assert 'Error' in expl
assert 'BaseEstimator' in expl
with pytest.raises(TypeError):
explain_prediction(clf, doc, unknown_argument=True)
def explain_prediction_df(estimator, doc, **kwargs):
# type: (...) -> pd.DataFrame
""" Explain prediction and export explanation to ``pandas.DataFrame``
All keyword arguments are passed to :func:`eli5.explain_prediction`.
Weights of all features are exported by default.
"""
kwargs = _set_defaults(kwargs)
return format_as_dataframe(
eli5.explain_prediction(estimator, doc, **kwargs))