Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_binary_macro_error(self):
"""
Test ROCAUC to see if _binary_decision with macro = True raises an error
"""
# Create visualizer with a linear model to force a binary decision
visualizer = ROCAUC(LinearSVC(random_state=42), macro=True)
visualizer.fit(self.binary.X.train, self.binary.y.train)
# Ensure score raises error (macro curves aren't defined for binary decisions)
with pytest.raises(ModelError):
visualizer.score(self.binary.X.test, self.binary.y.test)
def test_classes_greater_than_indices(self):
"""
Assert error when y and y_pred contain zero values for
one of the specified classess
"""
X, y = load_occupancy(return_dataset=True).to_numpy()
classes = ["unoccupied", "occupied", "partytime"]
model = LinearSVC(random_state=42)
model.fit(X, y)
with pytest.raises(ModelError):
visualizer = ClassPredictionError(model, classes=classes)
visualizer.score(X, y)
def test_binary_micro_error(self):
"""
Test ROCAUC to see if _binary_decision with micro = True raises an error
"""
# Create visualizer with a linear model to force a binary decision
visualizer = ROCAUC(LinearSVC(random_state=42), micro=True)
visualizer.fit(self.binary.X.train, self.binary.y.train)
# Ensure score raises error (micro curves aren't defined for binary decisions)
with pytest.raises(ModelError):
visualizer.score(self.binary.X.test, self.binary.y.test)
def test_extra_classes(self):
"""
Assert that any extra classes raise an exception
"""
model = LogisticRegression(random_state=93)
cm = ConfusionMatrix(model, classes=[0, 1, 2, 11])
with pytest.raises(ModelError, match="could not decode"):
cm.fit(self.digits.X.train, self.digits.y.train)
X : array-like of shape (n, m)
A matrix or data frame with n instances and m features
y : array-like of shape (n,), optional
A vector or series with target values for each instance in X. This
vector is used to determine the color of the points in X.
Returns
-------
self : Manifold
Returns the visualizer object.
"""
if not hasattr(self.manifold, 'transform'):
name = self.manifold.__class__.__name__
raise ModelError((
"{} requires data to be simultaneously fit and transformed, "
"use fit_transform instead").format(name)
)
# Call super to compute features, classes, colors, etc.
super(Manifold, self).fit(X, y)
self.manifold.fit(X)
return self
Note
----
This method does not work with MDS, TSNE and SpectralEmbedding because
it is yet to be implemented in sklearn.
"""
# Because some manifolds do not have transform we cannot call super
try:
Xp = self.manifold.transform(X)
self.draw(Xp, y)
return Xp
except NotFittedError:
raise NotFitted.from_estimator(self, 'transform')
except AttributeError:
name = self.manifold.__class__.__name__
raise ModelError((
"{} requires data to be simultaneously fit and transformed, "
"use fit_transform instead").format(name)
)
return Xp
# Return the first resolved function
for attr in attrs:
try:
method = getattr(self.estimator, attr, None)
if method:
return method(X)
except AttributeError:
# Some Scikit-Learn estimators have both probability and
# decision functions but override __getattr__ and raise an
# AttributeError on access.
# Note that because of the ordering of our attrs above,
# estimators with both will *only* ever use probability.
continue
# If we've gotten this far, raise an error
raise ModelError(
"ROCAUC requires estimators with predict_proba or "
"decision_function methods."
# Compute the predictions for the test data
y_pred = self._get_y_scores(X)
# Note: In the above, _get_y_scores calls either a decision_function or
# predict_proba, which should return a 2D array. But in a binary
# classification using an estimator with only a decision_function, y_pred
# will instead be 1D, meaning only one curve can be plotted. In this case,
# we set the _binary_decision attribute to True to ensure only one curve is
# computed and plotted later on.
if y_pred.ndim == 1:
self._binary_decision = True
# Raise an error if it's a binary decision and user has set micro,
# macro, or per_class to True
if self.micro or self.macro or self.per_class:
raise ModelError(
"Micro, macro, and per-class scores are not defined for "
"binary classification for estimators with only "
"decision_function methods; set micro, macro, and "
"per-class params to False."
)
else:
self._binary_decision = False
# If it's not a binary decision, at least one of micro, macro, or
# per_class must be True
if not self.micro and not self.macro and not self.per_class:
raise YellowbrickValueError(
"no curves will be drawn; specify micro, macro, or per_class"
)
# Classes may be label encoded so only use what's in y to compute.
# The self.classes_ attribute will be used as names for labels.
if hasattr(self.encoder, "inverse_transform"):
try:
return self.encoder.inverse_transform(y)
except ValueError:
y_labels = np.unique(y)
raise ModelError(
"could not decode {} y values to {} labels".format(
y_labels, self._labels()
)
)
# Otherwise, treat as a dictionary
try:
return np.asarray([self.encoder[yi] for yi in y])
except KeyError as e:
raise ModelError(
(
"cannot decode class {} to label, "
"key not specified by encoder"
).format(e)
)
if self.classes is not None:
# Determine indices to perform class mappings on
yp = np.asarray(y)
if yp.dtype.kind in {"i", "u"}:
idx = yp
else:
# Use label encoder to get indices by sorted class names
idx = LabelEncoder().fit_transform(yp)
# Use index mapping for classes