Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_pandas_integration(self):
"""
Test the ROCAUC with Pandas dataframe
"""
X, y = load_occupancy(return_dataset=True).to_pandas()
# Create train/test splits
splits = tts(X, y, test_size=0.2, random_state=4512)
X_train, X_test, y_train, y_test = splits
visualizer = ROCAUC(GaussianNB())
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
# Compare the images
visualizer.finalize()
self.assert_images_similar(visualizer)
def test_pandas_integration(self):
"""
Test with Pandas DataFrame and Series input
"""
_, ax = plt.subplots()
# Load the occupancy dataset from fixtures
X, y = load_occupancy(return_dataset=True).to_pandas()
# Create train/test splits
splits = tts(X, y, test_size=0.2, random_state=8873)
X_train, X_test, y_train, y_test = splits
# Create confusion matrix
model = GaussianNB()
cm = ConfusionMatrix(model, ax=ax, classes=None)
cm.fit(X_train, y_train)
cm.score(X_test, y_test)
self.assert_images_similar(cm, tol=0.1)
# Ensure correct confusion matrix under the hood
npt.assert_array_equal(cm.confusion_matrix_, np.array([[3012, 114], [1, 985]]))
def test_pandas_integration(self):
"""
Test on a real dataset with pandas DataFrame and Series
"""
data = load_occupancy(return_dataset=True)
X, y = data.to_pandas()
# Use only the first 100 samples so the test will run faster
X_t = X[:100]
y_t = y[:100]
assert isinstance(X_t, pd.DataFrame)
assert isinstance(y_t, pd.Series)
cv = StratifiedKFold(n_splits=4, random_state=32)
oz = RFECV(RandomForestClassifier(random_state=83), cv=cv)
oz.fit(X_t, y_t)
oz.finalize()
self.assert_images_similar(oz, remove_legend=True)
def test_parallel_coordinates_quickmethod(self):
"""
Test the quick method producing a valid visualization
"""
X, y = load_occupancy(return_dataset=True).to_numpy()
# Compare the images
# Use only the first 100 samples so the test will run faster
visualizer = parallel_coordinates(X, y, sample=100, show=False)
self.assert_images_similar(visualizer)
def test_integrated_radviz_numpy_classes_features(self):
"""
Test RadViz with classes and features specified using numpy
"""
# Load the data from the fixture
data = load_occupancy(return_dataset=True)
X, y = data.to_numpy()
features = data.meta["features"][0:3]
classes = [
k for k, _ in sorted(data.meta["labels"].items(), key=lambda i: i[1])
]
assert isinstance(X, np.ndarray)
assert isinstance(y, np.ndarray)
# Filter the dataset to make sure it's not just class names
X = X[:, :3]
y = y.astype(int)
# Test the visualizer
visualizer = RadViz(features=features, classes=classes)
def test_integrated_scatter_with_pandas(self):
"""
Test scatterviz on the real, occupancy data set with pandas
"""
# Load the data from the fixture
# Load the data from the fixture
X, y = load_occupancy(return_dataset=True).to_pandas()
# Test the visualizer
features = ["temperature", "relative humidity"]
visualizer = ScatterViz(features=features)
visualizer.fit_transform_poof(X, y)
def rocauc(dataset):
if dataset == "binary":
X, y = load_occupancy()
model = GaussianNB()
elif dataset == "multiclass":
X, y = load_game()
X = OrdinalEncoder().fit_transform(X)
model = RidgeClassifier()
else:
raise ValueError("uknown dataset")
X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2)
oz = ROCAUC(model, ax=newfig())
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
savefig(oz, "rocauc_{}".format(dataset))
def radviz():
X, y = load_occupancy()
oz = RadViz(ax=newfig())
oz.fit_transform(X, y)
savefig(oz, "radviz")
def scatter():
X, y = load_occupancy()
oz = ScatterVisualizer(x="light", y="CO2", ax=newfig())
oz.fit_transform(X, y)
savefig(oz, "scatter")
def manifold(dataset, manifold):
if dataset == "concrete":
X, y = load_concrete()
elif dataset == "occupancy":
X, y = load_occupancy()
else:
raise ValueError("unknown dataset")
oz = Manifold(manifold=manifold, ax=newfig())
oz.fit_transform(X, y)
savefig(oz, "{}_{}_manifold".format(dataset, manifold))