Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if init_tries == MAX_INIT_TRIES:
# Could not get rid of empty clusters. Randomly
# initialize instead.
init = 'random'
elif init_tries == RAISE_INIT_TRIES:
raise ValueError(
"Clustering algorithm could not initialize. "
"Consider assigning the initial clusters manually."
)
# Perform an initial centroid update.
for ik in range(n_clusters):
for iattr in range(nnumattrs):
centroids[0][ik, iattr] = cl_attr_sum[ik, iattr] / cl_memb_sum[ik]
for iattr in range(ncatattrs):
centroids[1][ik, iattr] = get_max_value_key(cl_attr_freq[ik][iattr])
# _____ ITERATION _____
if verbose:
print("Starting iterations...")
itr = 0
labels = None
converged = False
_, cost = _labels_cost(Xnum, Xcat, centroids,
num_dissim, cat_dissim, gamma, membship)
epoch_costs = [cost]
while itr <= max_iter and not converged:
itr += 1
centroids, moves = _k_prototypes_iter(Xnum, Xcat, centroids,
cl_attr_sum, cl_memb_sum, cl_attr_freq,
current_attribute_value_freq = to_attr_counts[curattr]
current_centroid_value = centroids[to_clust][iattr]
current_centroid_freq = to_attr_counts[current_centroid_value]
if current_centroid_freq < current_attribute_value_freq:
# We have incremented this value to the new mode. Update the centroid.
centroids[to_clust][iattr] = curattr
# Decrement the attribute count for the old "from" cluster
from_attr_counts[curattr] -= 1
old_centroid_value = centroids[from_clust][iattr]
if old_centroid_value == curattr:
# We have just removed a count from the old centroid value. We need to
# recalculate the centroid as it may no longer be the maximum
centroids[from_clust][iattr] = get_max_value_key(from_attr_counts)
return cl_attr_freq, membship, centroids
for _ in range(n_clusters)]
for ipoint, curpoint in enumerate(X):
# Initial assignment to clusters
clust = np.argmin(dissim(centroids, curpoint, X=X, membship=membship))
membship[clust, ipoint] = 1
# Count attribute values per cluster.
for iattr, curattr in enumerate(curpoint):
cl_attr_freq[clust][iattr][curattr] += 1
# Perform an initial centroid update.
for ik in range(n_clusters):
for iattr in range(n_attrs):
if sum(membship[ik]) == 0:
# Empty centroid, choose randomly
centroids[ik, iattr] = random_state.choice(X[:, iattr])
else:
centroids[ik, iattr] = get_max_value_key(cl_attr_freq[ik][iattr])
# _____ ITERATION _____
if verbose:
print("Starting iterations...")
itr = 0
labels = None
converged = False
_, cost = _labels_cost(X, centroids, dissim, membship)
epoch_costs = [cost]
while itr <= max_iter and not converged:
itr += 1
centroids, moves = _k_modes_iter(
X,
centroids,