Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
y : ndarray or Series of length n
An array or series of target or class values
kwargs : dict
Pass generic arguments to the drawing method
Returns
-------
self : instance
Returns the instance of the transformer/visualizer
"""
# Determine the features, classes, and colors
super(ParallelCoordinates, self).fit(X, y)
# Convert from pandas data types
if is_dataframe(X):
X = X.values
if is_series(y):
y = y.values
# Ticks for each feature specified
self._increments = np.arange(len(self.features_))
# Subsample instances
X, y = self._subsample(X, y)
# Normalize instances
if self.normalize is not None:
X = self.NORMALIZERS[self.normalize].fit_transform(X)
self.draw(X, y, **kwargs)
return self
if self.features is not None:
# Use the user-specified features with some checking
# TODO: allow the user specified features to filter the dataset
if len(self.features) != n_columns:
raise YellowbrickValueError(
(
"number of supplied feature names does not match the number "
"of columns in the training data."
)
)
self.features_ = np.array(self.features)
else:
# Attempt to determine the feature names from the input data
if is_dataframe(X):
self.features_ = np.array(X.columns)
# Otherwise create numeric labels for each column.
else:
self.features_ = np.arange(0, n_columns)
# Ensure super is called and fit is returned
super(MultiFeatureVisualizer, self).fit(X, y)
return self
-------
ranks : ndarray
An n-dimensional, symmetric array of rank scores, where n is the
number of features. E.g. for 1D ranking, it is (n,), for a
2D ranking it is (n,n) and so forth.
"""
algorithm = algorithm or self.ranking_
algorithm = algorithm.lower()
if algorithm not in self.ranking_methods:
raise YellowbrickValueError(
"'{}' is unrecognized ranking method".format(algorithm)
)
# Extract matrix from dataframe if necessary
if is_dataframe(X):
X = X.values
return self.ranking_methods[algorithm](X)
# Apply absolute value filter before normalization
if self.absolute:
self.feature_importances_ = np.abs(self.feature_importances_)
# Normalize features relative to the maximum
if self.relative:
maxv = np.abs(self.feature_importances_).max()
self.feature_importances_ /= maxv
self.feature_importances_ *= 100.0
# Create labels for the feature importances
# NOTE: this code is duplicated from MultiFeatureVisualizer
if self.labels is None:
# Use column names if a dataframe
if is_dataframe(X):
self.features_ = np.array(X.columns)
# Otherwise use the column index as the labels
else:
_, ncols = X.shape
self.features_ = np.arange(0, ncols)
else:
self.features_ = np.array(self.labels)
# Sort the features and their importances
if self.stack:
sort_idx = np.argsort(np.mean(self.feature_importances_, 0))
self.features_ = self.features_[sort_idx]
self.feature_importances_ = self.feature_importances_[:, sort_idx]
else:
sort_idx = np.argsort(self.feature_importances_)
def _create_labels_for_features(self, X):
"""
Create labels for the features
NOTE: this code is duplicated from MultiFeatureVisualizer
"""
if self.labels is None:
# Use column names if a dataframe
if is_dataframe(X):
self.features_ = np.array(X.columns)
# Otherwise use the column index as the labels
else:
_, ncols = X.shape
self.features_ = np.arange(0, ncols)
else:
self.features_ = np.array(self.labels)
""" """
if len(X.shape) == 1:
X_flat = X.copy().view(np.float64).reshape(len(X), -1)
else:
X_flat = X
_, ncols = X_flat.shape
if ncols == 2:
X_two_cols = X
if self.features_ is None:
self.features_ = ["Feature One", "Feature Two"]
# Handle the feature names if they're None.
elif self.features_ is not None and is_dataframe(X):
X_two_cols = X[self.features_].as_matrix()
# handle numpy named/ structured array
elif self.features_ is not None and is_structured_array(X):
X_selected = X[self.features_]
X_two_cols = X_selected.copy().view(np.float64).reshape(len(X_selected), -1)
# handle features that are numeric columns in ndarray matrix
elif self.features_ is not None and has_ndarray_int_columns(self.features_, X):
f_one, f_two = self.features_
X_two_cols = X[:, [int(f_one), int(f_two)]]
else:
raise YellowbrickValueError("""
ScatterVisualizer only accepts two features, please
explicitly set these two features in the init kwargs or
def draw(self, X, y, **kwargs):
"""
Called from the fit method, this method creates the radviz canvas and
draws each instance as a class or target colored point, whose location
is determined by the feature data set.
"""
# Convert from dataframe
if is_dataframe(X):
X = X.values
# Clean out nans and warn that the user they aren't plotted
nan_warnings.warn_if_nans_exist(X)
X, y = nan_warnings.filter_missing(X, y)
# Get the shape of the data
nrows, ncols = X.shape
# Set the axes limits
self.ax.set_xlim([-1, 1])
self.ax.set_ylim([-1, 1])
# Create a data structure to hold scatter plot representations
to_plot = {label: [[], []] for label in self.classes_}
kwargs : dict
Pass generic arguments to the drawing method
Returns
-------
self : instance
Returns the instance of the transformer/visualizer
"""
# Do not call super here - the data visualizer has been refactored
# to provide increased functionality that is not yet compatible with
# the current implementation. This mimicks the previous functionality.
# TODO: Refactor MissingDataVisualizer to make use of new features.
self.features_ = self.features
if is_dataframe(X):
self.X = X.values
if self.features_ is None:
self.features_ = X.columns
else:
self.X = X
self.y = y
self.draw(X, y, **kwargs)
return self