Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_explain_weights(iris_train):
X, y, feature_names, target_names = iris_train
kwargs = dict(n_iter=20, random_state=42)
for perm in [
PermutationImportance(SVC(C=10).fit(X, y), **kwargs),
PermutationImportance(SVC(C=10), cv=None, **kwargs),
PermutationImportance(SVC(C=10), cv=3, **kwargs),
]:
perm.fit(X, y)
print(perm.score(X, y))
expl = eli5.explain_weights(perm, target_names=target_names,
feature_names=feature_names)
assert "generalization" in expl.description
imp = expl.feature_importances.importances
assert len(imp) == 4
assert [n.feature.startswith("petal") for n in imp[:2]]
assert [n.feature.startswith("sepal") for n in imp[2:]]
res = format_as_all(expl, perm.wrapped_estimator_)
for _expl in res:
assert "petal width (cm)" in _expl
def test_explain_weights(iris_train):
X, y, feature_names, target_names = iris_train
kwargs = dict(n_iter=20, random_state=42)
for perm in [
PermutationImportance(SVC(C=10).fit(X, y), **kwargs),
PermutationImportance(SVC(C=10), cv=None, **kwargs),
PermutationImportance(SVC(C=10), cv=3, **kwargs),
]:
perm.fit(X, y)
print(perm.score(X, y))
expl = eli5.explain_weights(perm, target_names=target_names,
feature_names=feature_names)
assert "generalization" in expl.description
imp = expl.feature_importances.importances
assert len(imp) == 4
assert [n.feature.startswith("petal") for n in imp[:2]]
assert [n.feature.startswith("sepal") for n in imp[2:]]
res = format_as_all(expl, perm.wrapped_estimator_)
for _expl in res:
assert "petal width (cm)" in _expl
def test_classifier(iris_train):
X, y, feature_names, target_names = iris_train
clf = LogisticRegression().fit(X, y)
assert is_classifier(clf)
perm = PermutationImportance(clf, random_state=42).fit(X, y)
assert is_classifier(perm)
assert (perm.classes_ == [0, 1, 2]).all()
assert np.allclose(clf.predict(X), perm.predict(X))
assert np.allclose(clf.predict_proba(X), perm.predict_proba(X))
assert np.allclose(clf.predict_log_proba(X), perm.predict_log_proba(X))
assert np.allclose(clf.decision_function(X), perm.decision_function(X))
def test_cv(boston_train):
# noise feature can be important if no cv is used, but not if cv is used
# X_train, y_train are almost empty; we're using test part of the dataset
X_train, X_test, y_train, y_test, feat_names = _boston_with_leak(
*boston_train, noise_ratio=0.99)
reg = PermutationImportance(
SVR(C=100, gamma='auto'),
random_state=42,
cv=None,
n_iter=50, # use the same number of experiments as with cv=10
).fit(X_test, y_test)
assert reg.score(X_test, y_test) > 0
assert reg.estimator_.score(X_test, y_test) > 0
print(reg.score(X_test, y_test))
imp_nocv = _assert_importances_good(reg, feat_names)
# CV feature importances
reg = PermutationImportance(
SVR(C=100, gamma='auto'),
random_state=42,
cv=10,
PermutationImportance(
LogisticRegression(solver='liblinear', random_state=42),
cv=5, random_state=42, refit=False,
),
threshold=0.1,
),
['', '']),
(RFE(LogisticRegression(solver='liblinear', random_state=42, multi_class='ovr'), 2),
['', '']),
(RFECV(LogisticRegression(solver='liblinear', random_state=42, multi_class='ovr'), cv=3),
['', '', '', '']),
] + _additional_test_cases)
def test_transform_feature_names_iris(transformer, expected, iris_train):
X, y, _, _ = iris_train
transformer.fit(X, y)
# Test in_names being provided
res = transform_feature_names(
def test_estimator_type():
perm = PermutationImportance(LogisticRegression(), cv=3)
assert is_classifier(perm)
perm = PermutationImportance(RandomForestRegressor(), cv=3)
assert is_regressor(perm)
def compute_imp_score(model, model_name, training_features, training_classes, random_state):
clf = model.named_steps[model_name]
# pdb.set_trace()
if hasattr(clf, 'coef_'):
coefs = np.abs(clf.coef_.flatten())
else:
coefs = getattr(clf, 'feature_importances_', None)
if coefs is None:
perm = PermutationImportance(
estimator=model,
n_iter=5,
random_state=random_state,
refit=False
)
perm.fit(training_features, training_classes)
coefs = perm.feature_importances_
#return (coefs-np.min(coefs))/(np.max(coefs)-np.min(coefs))
return coefs/np.sum(coefs)
```
If ``node_params=None``, it will be constructed from ``retention_config`` variable, so that:
```
{
'positive_target_event': 'nice_target',
'negative_target_event': 'bad_target',
'source_event': 'source',
}
```
Default: ``None``
"""
self.show_quality_metrics(test_sample, test_target)
if hasattr(self.mod, 'coef_'):
self._plot_perm_imp(__LogRegWrapper__(self.mod.coef_[0]), test_sample, node_params, **kwargs)
return
perm = PermutationImportance(self.mod, random_state=0).fit(test_sample, test_target)
eli5.show_weights(perm, feature_names=[' '.join(i) if type(i) == tuple else i for i in test_sample.columns])
self._plot_perm_imp(perm, test_sample, node_params, **kwargs)
def perm_importance(self):
"""
Calculates feature importances for each treatment group, based on the permutation method.
"""
importance_dict = {}
for group, idx in self.classes.items():
if self.r_learners is None:
perm_estimator = self.model_tau
cv = 3
else:
perm_estimator = self.r_learners[group]
cv = 'prefit'
perm_fitter = PermutationImportance(perm_estimator, cv=cv)
perm_fitter.fit(self.X, self.tau[:, idx])
importance_dict[group] = perm_fitter.feature_importances_
return importance_dict
if is_labeled_data:
feature_partial = variables.get("FEATURE_PARTIAL_PLOTS")
feature_partial_plots = [x.strip() for x in feature_partial.split(',')]
features_to_plot = variables.get("FEATURE_PARTIAL2D_PLOTS")
features_to_plot2d = [x.strip() for x in features_to_plot.split(',')]
shap_row_to_show = int(variables.get("SHAP_ROW_SHOW"))
columns = [LABEL_COLUMN]
dataframe_test = dataframe.drop(columns, axis=1, inplace=False)
dataframe_label = dataframe.filter(columns, axis=1)
feature_names = dataframe_test.columns.values
# -------------------------------------------------------------
# PERMUTATION IMPORTANCE
perm = PermutationImportance(loaded_model, random_state=1).fit(dataframe_test.values,
dataframe_label.values.ravel())
html_table = eli5.show_weights(perm, feature_names=dataframe_test.columns.tolist(), top=50)
# -------------------------------------------------------------
# PARTIAL DEPENDENCE PLOTS
partial_feature_find = [i for i in feature_partial_plots if i in feature_names]
html_partial_plot = ''
for i in partial_feature_find:
pdp_feature = pdp.pdp_isolate(model=loaded_model, dataset=dataframe_test,
model_features=feature_names, feature=i)
pdp_plot_feature = pdp.pdp_plot(pdp_feature, i)
graph_name = ''.join(random.sample((string.ascii_uppercase + string.digits), 3))
html_pdp = 'html_pdp_plot' + graph_name + ' + '
encoded = fig_to_base64(pdp_plot_feature)
html_pdp = '<img src="data:image/png;base64, {}" class="img-fluid">'.format(encoded.decode('utf-8'))
html_partial_plot += html_pdp