Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
#!/usr/bin/env python
import numpy as np
from kmodes.kmodes import KModes
# reproduce results on small soybean data set
x = np.genfromtxt('soybean.csv', dtype=int, delimiter=',')[:, :-1]
y = np.genfromtxt('soybean.csv', dtype=str, delimiter=',', usecols=(35, ))
kmodes_huang = KModes(n_clusters=4, init='Huang', verbose=1)
kmodes_huang.fit(x)
# Print cluster centroids of the trained model.
print('k-modes (Huang) centroids:')
print(kmodes_huang.cluster_centroids_)
# Print training statistics
print('Final training cost: {}'.format(kmodes_huang.cost_))
print('Training iterations: {}'.format(kmodes_huang.n_iter_))
kmodes_cao = KModes(n_clusters=4, init='Cao', verbose=1)
kmodes_cao.fit(x)
# Print cluster centroids of the trained model.
print('k-modes (Cao) centroids:')
print(kmodes_cao.cluster_centroids_)
# Print training statistics
def _kmodes(k, n_init, n_jobs, seed):
KModes(n_clusters=k, init='Huang', n_init=n_init, n_jobs=n_jobs,
random_state=seed) \
.fit(data[:N_kmodes, :])
# reproduce results on small soybean data set
x = np.genfromtxt('soybean.csv', dtype=int, delimiter=',')[:, :-1]
y = np.genfromtxt('soybean.csv', dtype=str, delimiter=',', usecols=(35, ))
kmodes_huang = KModes(n_clusters=4, init='Huang', verbose=1)
kmodes_huang.fit(x)
# Print cluster centroids of the trained model.
print('k-modes (Huang) centroids:')
print(kmodes_huang.cluster_centroids_)
# Print training statistics
print('Final training cost: {}'.format(kmodes_huang.cost_))
print('Training iterations: {}'.format(kmodes_huang.n_iter_))
kmodes_cao = KModes(n_clusters=4, init='Cao', verbose=1)
kmodes_cao.fit(x)
# Print cluster centroids of the trained model.
print('k-modes (Cao) centroids:')
print(kmodes_cao.cluster_centroids_)
# Print training statistics
print('Final training cost: {}'.format(kmodes_cao.cost_))
print('Training iterations: {}'.format(kmodes_cao.n_iter_))
print('Results tables:')
for result in (kmodes_huang, kmodes_cao):
classtable = np.zeros((4, 4), dtype=int)
for ii, _ in enumerate(y):
classtable[int(y[ii][-1]) - 1, result.labels_[ii]] += 1
print("\n")
def cao():
KModes(
n_clusters=K,
init='Cao',
verbose=2
).fit_predict(data)
def huang():
KModes(
n_clusters=K,
init='Huang',
n_init=1,
verbose=2
).fit_predict(data)
def huang_ng_dissim():
KModes(
n_clusters=K,
init='Huang',
cat_dissim=ng_dissim,
n_init=1,
verbose=2
).fit_predict(data)