Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_locate_elbow(self):
"""
Test the addition of locate_elbow to an image
"""
X, y = make_blobs(
n_samples=1000, n_features=5, centers=3, shuffle=True, random_state=42
)
visualizer = KElbowVisualizer(
KMeans(random_state=0),
k=6,
metric="calinski_harabasz",
timings=False,
locate_elbow=True,
)
visualizer.fit(X)
assert len(visualizer.k_scores_) == 5
assert visualizer.elbow_value_ == 3
expected = np.array(
[
4286.4798481306625,
12463.383743070379,
8763.75791732466,
6942.167328461612,
5859.608884917707,
def test_no_knee(self):
"""
Assert that a warning is issued if there is no knee detected
"""
X, y = make_blobs(n_samples=1000, centers=3, n_features=12, random_state=12)
message = (
"No 'knee' or 'elbow point' detected "
"This could be due to bad clustering, no "
"actual clusters being formed etc."
)
with pytest.warns(YellowbrickWarning, match=message):
visualizer = KElbowVisualizer(
KMeans(random_state=12), k=(4, 12), locate_elbow=True
)
visualizer.fit(X)
def test_integrated_mini_batch_kmeans_elbow(self):
"""
Test no exceptions for mini-batch kmeans k-elbow visualizer
"""
# NOTE #182: cannot use occupancy dataset because of memory usage
# Generate a blobs data set
X, y = make_blobs(
n_samples=1000, n_features=12, centers=6, shuffle=True, random_state=42
)
try:
_, ax = plt.subplots()
visualizer = KElbowVisualizer(MiniBatchKMeans(random_state=42), k=4, ax=ax)
visualizer.fit(X)
visualizer.finalize()
self.assert_images_similar(visualizer)
except Exception as e:
pytest.fail("error during k-elbow: {}".format(e))
def test_timings(self):
"""
Test the twinx double axes with k-elbow timings
"""
visualizer = KElbowVisualizer(
KMeans(random_state=0), k=5, timings=True, locate_elbow=False
)
visualizer.fit(self.clusters.X)
# Check that we kept track of time
assert len(visualizer.k_timers_) == 4
assert all([t > 0 for t in visualizer.k_timers_])
# Check that we plotted time on a twinx
assert hasattr(visualizer, "axes")
assert len(visualizer.axes) == 2
# delete the timings axes and
# overwrite k_timers_, k_values_ for image similarity Tests
visualizer.axes[1].remove()
visualizer.k_timers_ = [
def test_silhouette_metric(self):
"""
Test the silhouette metric of the k-elbow visualizer
"""
visualizer = KElbowVisualizer(
KMeans(random_state=0),
k=5,
metric="silhouette",
timings=False,
locate_elbow=False,
)
visualizer.fit(self.clusters.X)
expected = np.array(
[
0.6916363804000003,
0.456645663683503,
0.26918583373704463,
0.25523298106687914,
]
)
def test_distortion_metric(self):
"""
Test the distortion metric of the k-elbow visualizer
"""
visualizer = KElbowVisualizer(
KMeans(random_state=0),
k=5,
metric="distortion",
timings=False,
locate_elbow=False,
)
visualizer.fit(self.clusters.X)
expected = np.array(
[
69.10006514142941,
54.081571290449936,
44.491830981793605,
33.99887993254433,
]
)
def test_bad_metric(self):
"""
Assert KElbow raises an exception when a bad metric is supplied
"""
with pytest.raises(YellowbrickValueError):
KElbowVisualizer(KMeans(), k=5, metric="foo")
def test_invalid_k(self):
"""
Assert that invalid values of K raise exceptions
"""
with pytest.raises(YellowbrickValueError):
KElbowVisualizer(KMeans(), k=(1, 2, 3, "foo", 5))
with pytest.raises(YellowbrickValueError):
KElbowVisualizer(KMeans(), k="foo")
Assert that valid values of K generate correct k_values_
"""
# if k is an int, k_values_ = range(2, k+1)
# if k is a tuple of 2 ints, k_values = range(k[0], k[1])
# if k is an iterable, k_values_ = list(k)
visualizer = KElbowVisualizer(KMeans(), k=8)
assert visualizer.k_values_ == list(np.arange(2, 8 + 1))
visualizer = KElbowVisualizer(KMeans(), k=(4, 12))
assert visualizer.k_values_ == list(np.arange(4, 12))
visualizer = KElbowVisualizer(KMeans(), k=np.arange(10, 100, 10))
assert visualizer.k_values_ == list(np.arange(10, 100, 10))
visualizer = KElbowVisualizer(KMeans(), k=[10, 20, 30, 40, 50, 60, 70, 80, 90])
assert visualizer.k_values_ == list(np.arange(10, 100, 10))
Automatically find the "elbow" or "knee" which likely corresponds to the optimal
value of k using the "knee point detection algorithm". The knee point detection
algorithm finds the point of maximum curvature, which in a well-behaved
clustering problem also represents the pivot of the elbow curve. The point is
labeled with a dashed line and annotated with the score and k values.
kwargs : dict
Keyword arguments that are passed to the base class and may influence
the visualization as defined in other Visualizers.
Returns
-------
viz : KElbowVisualizer
The kelbow visualizer, fitted and finalized.
"""
oz = KElbow(
model,
ax=ax,
k=k,
metric=metric,
timings=timings,
locate_elbow=locate_elbow,
**kwargs
)
oz.fit(X, y)
oz.finalize()
return oz