Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_explain_prediction_clf_interval():
true_xs = [[np.random.randint(3), np.random.randint(10)] for _ in range(1000)]
xs = np.array([[np.random.normal(x, 0.2), np.random.normal(y, 0.2)]
for x, y in true_xs])
ys = np.array([x == 1 for x, _ in true_xs])
clf = XGBClassifier(n_estimators=100, max_depth=2)
clf.fit(xs, ys)
res = explain_prediction(clf, np.array([1.23, 1.45]))
for expl in format_as_all(res, clf, show_feature_values=True):
assert 'x0' in expl
assert '1.23' in expl
for x in [[0, 1], [1, 1], [2, 1], [0.8, 5], [1.2, 5]]:
res = explain_prediction(clf, np.array(x))
print(x)
print(format_as_text(res, show=fields.WEIGHTS))
check_targets_scores(res)
# Test FeatureUnion handling and missing features in dense matrix
transformer = lambda key: FunctionTransformer(
lambda xs: np.array([[x.get(key, np.nan)] for x in xs]),
validate=False)
vec = FeatureUnion([('x', transformer('x')), ('y', transformer('y'))])
gauss = np.random.normal
data = [(gauss(1), 2 + 10 * gauss(1)) for _ in range(200)]
ys = [-3 * x + y for x, y in data]
xs = [{'x': gauss(x), 'y': gauss(y)} for x, y in data]
for x in xs[:50]:
del x['x']
for x in xs[-50:]:
del x['y']
reg = XGBRegressor()
reg.fit(vec.transform(xs), ys)
res = explain_prediction(reg, xs[0], vec=vec, feature_names=['_x_', '_y_'])
check_targets_scores(res)
for expl in format_as_all(res, reg, show_feature_values=True):
assert 'Missing' in expl
assert '_y_' in expl
assert '_x_' in expl
def test_explain_prediction_pandas_dot_in_feature_name(boston_train):
pd = pytest.importorskip('pandas')
X, y, feature_names = boston_train
feature_names = ["%s.%s" % (name, idx)
for idx, name in enumerate(feature_names)]
df = pd.DataFrame(X, columns=feature_names)
reg = XGBRegressor()
reg.fit(df, y)
res = explain_prediction(reg, df.iloc[0])
for expl in format_as_all(res, reg):
assert 'PTRATIO.1' in expl
docs, ys, target_names = newsgroups_train
vec = CountVectorizer(stop_words='english')
xs = vec.fit_transform(docs)
if use_booster:
clf = xgboost.train(
params={'objective': 'multi:softprob', 'num_class': len(target_names),
'silent': True, 'max_depth': 2},
dtrain=xgboost.DMatrix(xs, label=ys, missing=np.nan),
num_boost_round=100,
)
else:
clf = XGBClassifier(n_estimators=100, max_depth=2)
clf.fit(xs, ys)
feature_filter = (lambda _, v: not np.isnan(v)) if filter_missing else None
doc = 'computer graphics in space: a new religion'
res = explain_prediction(clf, doc, vec=vec, target_names=target_names,
feature_filter=feature_filter)
format_as_all(res, clf)
if not filter_missing:
check_targets_scores(res)
graphics_weights = res.targets[1].feature_weights
assert 'computer' in get_all_features(graphics_weights.pos)
religion_weights = res.targets[3].feature_weights
assert 'religion' in get_all_features(religion_weights.pos)
top_target_res = explain_prediction(clf, doc, vec=vec, top_targets=2)
assert len(top_target_res.targets) == 2
assert sorted(t.proba for t in top_target_res.targets) == sorted(
t.proba for t in res.targets)[-2:]
@explain_prediction.register(lightgbm.LGBMRegressor)
def explain_prediction_lightgbm(
lgb, doc,
vec=None,
top=None,
top_targets=None,
target_names=None,
targets=None,
feature_names=None,
feature_re=None,
feature_filter=None,
vectorized=False,
):
""" Return an explanation of LightGBM prediction (via scikit-learn wrapper
LGBMClassifier or LGBMRegressor) as feature weights.
See :func:`eli5.explain_prediction` for description of
@explain_prediction.register(BaseEstimator)
def explain_prediction_sklearn_not_supported(
estimator, doc,
vec=None,
top=None,
top_targets=None,
target_names=None,
targets=None,
feature_names=None,
feature_re=None,
feature_filter=None,
vectorized=False):
return Explanation(
estimator=repr(estimator),
error="estimator %r is not supported" % estimator,
)
def explain_prediction_ovr(clf, doc, **kwargs):
estimator = clf.estimator
func = explain_prediction.dispatch(estimator.__class__)
return func(clf, doc, **kwargs)