Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Additional keyword arguments to pass to the distance function.
Returns
-------
indices : array
Indices that sort the computed mean distances.
mdist : array
Mean distance characterizing each data sample.
"""
if metric_args is None:
metric_args = {}
# compute distances
D = metrics.pdist(data, metric=metric, **metric_args)
D = metrics.squareform(D)
# compute mean
mdist = np.mean(D, axis=0)
# sort
indices = np.argsort(mdist)
return indices, mdist
.. [EKSX96] M. Ester, H. P. Kriegel, J. Sander, and X. Xu,
“A Density-Based Algorithm for Discovering Clusters in Large Spatial
Databases with Noise”, Proceedings of the 2nd International
Conf. on Knowledge Discovery and Data Mining, pp. 226-231, 1996.
"""
# check inputs
if data is None:
raise TypeError("Please specify input data.")
if metric_args is None:
metric_args = {}
# compute distances
D = metrics.pdist(data, metric=metric, **metric_args)
D = metrics.squareform(D)
# fit
db = skc.DBSCAN(eps=eps, min_samples=min_samples, metric='precomputed')
labels = db.fit_predict(D)
# get cluster indices
clusters = _extract_clusters(labels)
return utils.ReturnTuple((clusters,), ('clusters',))
def __init__(self, k=3, metric='euclidean', metric_args=None):
# parent __init__
super(KNN, self).__init__()
# algorithm self things
self.k = k
self.metric = metric
if metric_args is None:
metric_args = {}
self.metric_args = metric_args
# test metric args
_ = metrics.pdist(np.zeros((2, 2)), metric, **metric_args)
# minimum threshold
self.min_thr = 10 * np.finfo('float').eps
N = len(data)
if k > N:
raise ValueError("Number of clusters 'k' is higher than the number" \
" of input samples.")
if metric_args is None:
metric_args = {}
if linkage in ['centroid', 'median', 'ward']:
if metric != 'euclidean':
raise TypeError("Linkage '{}' requires the distance metric to be" \
" 'euclidean'.".format(linkage))
Z = sch.linkage(data, method=linkage)
else:
# compute distances
D = metrics.pdist(data, metric=metric, **metric_args)
# build linkage
Z = sch.linkage(D, method=linkage)
if k < 0:
k = 0
# extract clusters
if k == 0:
# life-time
labels = _life_time(Z, N)
else:
labels = sch.fcluster(Z, k, 'maxclust')
# get cluster indices
clusters = _extract_clusters(labels)