Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_radius_dist(self):
test_cases = [
{"cubes": 1, "range": [0, 4], "overlap": 0.4, "radius": 10.0 / 3},
{"cubes": 1, "range": [0, 4], "overlap": 0.9, "radius": 20.0},
{"cubes": 2, "range": [-4, 4], "overlap": 0.5, "radius": 4.0},
{"cubes": 3, "range": [-4, 4], "overlap": 0.5, "radius": 2.666666666},
{"cubes": 10, "range": [-4, 4], "overlap": 0.5, "radius": 0.8},
{"cubes": 10, "range": [-4, 4], "overlap": 1.0, "radius": np.inf},
]
for test_case in test_cases:
scaler = preprocessing.MinMaxScaler(feature_range=test_case["range"])
data = scaler.fit_transform(np.arange(20).reshape(10, 2))
cover = Cover(n_cubes=test_case["cubes"], perc_overlap=test_case["overlap"])
_ = cover.fit(data)
assert cover.radius_[0] == pytest.approx(test_case["radius"])
def test_125_replication(self):
# uniform data:
data = np.arange(0, 100)
data = data[:, np.newaxis]
lens = data
cov = Cover(10, 0.5)
# Prefix'ing the data with an ID column
ids = np.array([x for x in range(lens.shape[0])])
lens = np.c_[ids, lens]
bins = cov.fit(lens)
cube_entries = [cov.transform_single(lens, cube) for cube in bins]
overlaps = [
len(set(list(c1[:, 0])).intersection(set(list(c2[:, 0]))))
for c1, c2 in zip(cube_entries, cube_entries[1:])
]
assert (
len(set(overlaps)) == 1
), "Each overlap should have the same number of entries. "
def test_diff_overlap_per_dim(self):
data = np.random.rand(100, 3)
c = Cover(perc_overlap=[0.4, 0.2])
c.fit(data)
import numpy as np
from kmapper.cover import Cover
# uniform data:
data = np.arange(0, 1000).reshape((1000, 1))
lens = data
cov = Cover(10, 0.5, verbose=0)
def overlap(c1, c2):
ints = set(c1).intersection(set(c2))
return len(ints) / max(len(c1), len(c2))
# Prefix'ing the data with an ID column
ids = np.array([x for x in range(lens.shape[0])])
lens = np.c_[ids, lens]
bins = cov.fit(lens)
cube_entries = cov.transform(lens, bins)
for i, hypercube in enumerate(cube_entries):
def test_perc_overlap(self, CoverClass):
"""
2 cubes with 50% overlap and a range of [0,1] should lead to two cubes with intervals:
[0, .75]
[.25, 1]
"""
data = np.array([[0, 0], [1, 0.25], [2, 0.5], [3, 0.75], [4, 1]])
cover = Cover(n_cubes=2, perc_overlap=0.5)
cubes = cover.fit(data)
cubes = list(cubes)
entries = [cover.transform_single(data, cube) for cube in cubes]
for i in (0, 1, 2, 3):
assert data[i] in entries[0]
for i in (1, 2, 3, 4):
assert data[i] in entries[1]
centers = centers or self.centers_
hypercubes = [
self.transform_single(data, cube, i) for i, cube in enumerate(centers)
]
# Clean out any empty cubes (common in high dimensions)
hypercubes = [cube for cube in hypercubes if len(cube)]
return hypercubes
def fit_transform(self, data):
self.fit(data)
return self.transform(data)
class CubicalCover(Cover):
"""
Explicit definition of a cubical cover as the default behavior of the cover class. This is currently identical to the default cover class.
"""
pass
from kmapper import KeplerMapper
from kmapper.cover import Cover
except ImportError as e:
print("[warning]", e)
# init mapper
self.mapper = KeplerMapper()
self.verbose = verbose
# [1] fit params
self.projection = projection if projection is not None else PCA(2)
self.scaler = scaler #or MinMaxScaler()
# [2] map params
self.clusterer = clusterer or DBSCAN(eps=1, min_samples=2)
self.cover = cover or Cover(10, 0.5)
self.remove_duplicate_nodes = remove_duplicate_nodes
# setup memory
self.memory = Memory(memory, verbose=verbose)
n_cubes = int(n_cubes)
p_overlap = np.round(p_overlap, 2)
# Define optimized limits
limits = None
if scale_limits is True:
offset = p_overlap / float(n_cubes)
limits = [[-offset, 1+offset] for _ in range(ndim)]
n_cubes += 2 #* ndim
try:
# Initialize Cover with limits
cover = Cover(n_cubes, p_overlap, limits=limits)
except Exception as e:
# Ignore limits, probably using older version
cover = Cover(n_cubes, p_overlap)
print("[warning]", e)
return cover
# Round final values
n_cubes = int(n_cubes)
p_overlap = np.round(p_overlap, 2)
# Define optimized limits
limits = None
if scale_limits is True:
offset = p_overlap / float(n_cubes)
limits = [[-offset, 1+offset] for _ in range(ndim)]
n_cubes += 2 #* ndim
try:
# Initialize Cover with limits
cover = Cover(n_cubes, p_overlap, limits=limits)
except Exception as e:
# Ignore limits, probably using older version
cover = Cover(n_cubes, p_overlap)
print("[warning]", e)
return cover
>>> # Use HDBSCAN as the clusterer
>>> graph = mapper.map(X_projected, X_inverse,
>>> clusterer=hdbscan.HDBSCAN())
>>> # Parametrize the nerve of the covering
>>> graph = mapper.map(X_projected, X_inverse,
>>> nerve=km.GraphNerve(min_intersection=3))
"""
start = datetime.now()
clusterer = clusterer or cluster.DBSCAN(eps=0.5, min_samples=3)
self.cover = cover or Cover(n_cubes=10, perc_overlap=0.1)
nerve = nerve or GraphNerve()
nodes = defaultdict(list)
meta = defaultdict(list)
graph = {}
# If inverse image is not provided, we use the projection as the inverse image (suffer projection loss)
if X is None:
X = lens
if self.verbose > 0:
print(
"Mapping on data shaped %s using lens shaped %s\n"
% (str(X.shape), str(lens.shape))
)