Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_explain_linear_tuple_top(newsgroups_train):
docs, y, target_names = newsgroups_train
vec = TfidfVectorizer()
clf = LogisticRegression(random_state=42)
X = vec.fit_transform(docs)
clf.fit(X, y)
res_neg = explain_weights(clf, vec=vec, target_names=target_names, top=(0, 10))
expl_neg, _ = format_as_all(res_neg, clf)
for target in res_neg.targets:
assert len(target.feature_weights.pos) == 0
assert len(target.feature_weights.neg) == 10
assert "+0." not in expl_neg
res_pos = explain_weights(clf, vec=vec, target_names=target_names, top=(10, 2))
format_as_all(res_pos, clf)
for target in res_pos.targets:
assert len(target.feature_weights.pos) == 10
assert len(target.feature_weights.neg) == 2
def assert_explained_weights_linear_regressor(boston_train, reg, has_bias=True):
X, y, feature_names = boston_train
reg.fit(X, y)
res = explain_weights(reg)
expl_text, expl_html = format_as_all(res, reg)
for expl in [expl_text, expl_html]:
assert 'x12' in expl
assert 'x5' in expl
if has_bias:
assert '' in expl_text
assert '<BIAS>' in expl_html
pos, neg = top_pos_neg(res, 'y')
assert 'x12' in pos or 'x12' in neg
assert 'x5' in neg or 'x5' in pos
if has_bias:
assert '' in neg or '' in pos
def test_feature_importances_no_remaining(clf):
""" Check that number of remaining features is not shown if it is zero,
and that features with zero importance are not shown either.
"""
n = 100
clf.fit(np.array([[i % 2 + 0.1 * np.random.random(), 0] for i in range(n)]),
np.array([i % 2 for i in range(n)]))
res = explain_weights(clf)
for expl in format_as_all(res, clf):
assert 'more features' not in expl and 'more …' not in expl
assert 'x1' not in expl # it has zero importance
docs, y, target_names = newsgroups_train
X = vec.fit_transform(docs)
clf.fit(X, y)
if isinstance(vec, HashingVectorizer):
vec = InvertableHashingVectorizer(vec)
vec.fit(docs)
res = explain_weights(clf, vec=vec, feature_re='^ath')
text_expl, _ = expls = format_as_all(res, clf)
for expl in expls:
assert 'atheists' in expl
assert 'atheism' in expl
assert 'space' not in expl
assert 'BIAS' not in expl
res = explain_weights(
clf, vec=vec,
feature_filter=lambda name: name.startswith('ath') or name == '')
text_expl, _ = expls = format_as_all(res, clf)
for expl in expls:
assert 'atheists' in expl
assert 'atheism' in expl
assert 'space' not in expl
assert 'BIAS' in expl
assert '' in text_expl
for expl in [expl_text, expl_html]:
assert 'x12' in expl
assert 'x5' in expl
if has_bias:
assert '' in expl_text
assert '<BIAS>' in expl_html
pos, neg = top_pos_neg(res, 'y')
assert 'x12' in pos or 'x12' in neg
assert 'x5' in neg or 'x5' in pos
if has_bias:
assert '' in neg or '' in pos
assert res == explain_weights(reg)
def explain_weights_dfs(estimator, **kwargs):
# type: (...) -> Dict[str, pd.DataFrame]
""" Explain weights and export them to a dict with ``pandas.DataFrame``
values (as :func:`eli5.formatters.as_dataframe.format_as_dataframes` does).
All keyword arguments are passed to :func:`eli5.explain_weights`.
Weights of all features are exported by default.
"""
kwargs = _set_defaults(kwargs)
return format_as_dataframes(
eli5.explain_weights(estimator, **kwargs))
def __explain_model(dataset, round_id, pipe_model, model, feature_names):
"""
explain the weights and the prediction of the model
:param dataset: dataset
:param round_id: round if
:param pipe_model: the pipeline including the model
:param model: the model only
:param feature_names: feature names
:return:
"""
try:
exp = eli5.explain_weights(model, feature_names=list(feature_names))
with open(get_dataset_folder(dataset.dataset_id) + '/predict/eli5_model_%s.html' % round_id, 'w') as f:
f.write(eli5.format_as_html(exp))
except:
return
def explain_weights_df(estimator, **kwargs):
# type: (...) -> pd.DataFrame
""" Explain weights and export them to ``pandas.DataFrame``.
All keyword arguments are passed to :func:`eli5.explain_weights`.
Weights of all features are exported by default.
"""
kwargs = _set_defaults(kwargs)
return format_as_dataframe(
eli5.explain_weights(estimator, **kwargs))
def explain_weights(self, **kwargs):
"""
Call :func:`eli5.show_weights` for the locally-fit
classification pipeline. Keyword arguments are passed
to :func:`eli5.show_weights`.
:func:`fit` must be called before using this method.
"""
self._fix_target_names(kwargs)
return eli5.explain_weights(self.clf_, vec=self.vec_, **kwargs)