Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
-------
Returns `X[obs_indices], obs_indices` if data is array-like, otherwise
subsamples the passed :class:`~anndata.AnnData` (`copy == False`) or
returns a subsampled copy of it (`copy == True`).
"""
np.random.seed(random_state)
old_n_obs = data.n_obs if isinstance(data, AnnData) else data.shape[0]
if n_obs is not None:
new_n_obs = n_obs
elif fraction is not None:
if fraction > 1 or fraction < 0:
raise ValueError(
f'`fraction` needs to be within [0, 1], not {fraction}'
)
new_n_obs = int(fraction * old_n_obs)
logg.debug(f'... subsampled to {new_n_obs} data points')
else:
raise ValueError('Either pass `n_obs` or `fraction`.')
obs_indices = np.random.choice(old_n_obs, size=new_n_obs, replace=False)
if isinstance(data, AnnData):
adata = data.copy() if copy else data
adata._inplace_subset_obs(obs_indices)
return adata if copy else None
else:
X = data
return X[obs_indices], obs_indices
pos = sp.stats.kendalltau(a[:i+1], b[:i+1])[0]
neg = sp.stats.kendalltau(a[i+1:], b[i+1:])[0]
if False:
# the following is much slower than using sp.stats.kendalltau,
# it is only good for debugging because it allows to compute the
# tau-a version, which does not account for ties, whereas
# sp.stats.kendalltau computes tau-b version, which accounts for
# ties
pos = sp.stats.mstats.kendalltau(a[:i], b[:i], use_ties=False)[0]
neg = sp.stats.mstats.kendalltau(a[i:], b[i:], use_ties=False)[0]
corr_coeff[ii] = pos - neg
iimax = np.argmax(corr_coeff)
imax = min_length + iimax
corr_coeff_max = corr_coeff[iimax]
if corr_coeff_max < 0.3:
logg.debug(' is root itself, never obtain significant correlation')
return imax
"""
if subsample == 1 and seed == 0:
return X, np.arange(X.shape[0], dtype=int)
if seed == 0:
# this sequence is defined simply by skipping rows
# is faster than sampling
rows = np.arange(0, X.shape[0], subsample, dtype=int)
n = rows.size
Xsampled = np.array(X[rows])
else:
if seed < 0:
raise ValueError(f'Invalid seed value < 0: {seed}')
n = int(X.shape[0]/subsample)
np.random.seed(seed)
Xsampled, rows = subsample_n(X, n=n)
logg.debug(f'... subsampled to {n} of {X.shape[0]} data points')
return Xsampled, rows
dseg += Dseg[third_tip]
fourth_tip = np.argmax(dseg)
if fourth_tip != tips[0] and fourth_tip != third_tip:
tips[1] = fourth_tip
dseg -= Dseg[tips[1]]
else:
dseg -= Dseg[third_tip]
tips3 = np.append(tips, third_tip)
# compute the score as ratio of the added distance to the third tip,
# to what it would be if it were on the straight line between the
# two first tips, given by Dseg[tips[:2]]
# if we did not normalize, there would be a danger of simply
# assigning the highest score to the longest segment
score = dseg[tips3[2]] / Dseg[tips3[0], tips3[1]]
score = len(seg) if self.choose_largest_segment else score # simply the number of points
logg.debug(
f' group {iseg} score {score} n_points {len(seg)} ' +
'(too small)' if len(seg) < self.min_group_size else '',
)
if len(seg) <= self.min_group_size: score = 0
# write result
scores_tips[iseg, 0] = score
scores_tips[iseg, 1:] = tips3
iseg = np.argmax(scores_tips[:, 0])
if scores_tips[iseg, 0] == 0: return -1, None
tips3 = scores_tips[iseg, 1:].astype(int)
return iseg, tips3
view_to_actual(adata)
# need to add the following here to make inplace logic work
if zero_center and issparse(adata.X):
logg.debug(
'... scale_data: as `zero_center=True`, sparse input is '
'densified and may lead to large memory consumption'
)
adata.X = adata.X.toarray()
scale(adata.X, zero_center=zero_center, max_value=max_value, copy=False)
return adata if copy else None
X = data.copy() if copy else data # proceed with the data matrix
zero_center = zero_center if zero_center is not None else False if issparse(X) else True
if not zero_center and max_value is not None:
logg.debug('... scale_data: be careful when using `max_value` without `zero_center`')
if max_value is not None:
logg.debug(f'... clipping at max_value {max_value}')
if zero_center and issparse(X):
logg.debug(
'... scale_data: as `zero_center=True`, sparse input is '
'densified and may lead to large memory consumption, returning copy'
)
X = X.toarray()
copy = True
_scale(X, zero_center)
if max_value is not None: X[X > max_value] = max_value
return X if copy else None
X_pca = pca_.fit_transform(X)
if X_pca.dtype.descr != np.dtype(dtype).descr: X_pca = X_pca.astype(dtype)
if data_is_AnnData:
adata.obsm['X_pca'] = X_pca
if use_highly_variable:
adata.varm['PCs'] = np.zeros(shape=(adata.n_vars, n_comps))
adata.varm['PCs'][adata.var['highly_variable']] = pca_.components_.T
else:
adata.varm['PCs'] = pca_.components_.T
adata.uns['pca'] = {}
adata.uns['pca']['variance'] = pca_.explained_variance_
adata.uns['pca']['variance_ratio'] = pca_.explained_variance_ratio_
logg.info(' finished', time=start)
logg.debug(
'and added\n'
' \'X_pca\', the PCA coordinates (adata.obs)\n'
' \'PC1\', \'PC2\', ..., the loadings (adata.var)\n'
' \'pca_variance\', the variance / eigenvalues (adata.uns)\n'
' \'pca_variance_ratio\', the variance ratio (adata.uns)'
)
return adata if copy else None
else:
logg.info(' finished', time=start)
if return_info:
return X_pca, pca_.components_, pca_.explained_variance_ratio_, pca_.explained_variance_
else:
return X_pca
max_value : `float` or `None`, optional (default: `None`)
Clip (truncate) to this value after scaling. If `None`, do not clip.
copy : `bool`, optional (default: `False`)
If an :class:`~anndata.AnnData` is passed, determines whether a copy
is returned.
Returns
-------
Depending on `copy` returns or updates `adata` with a scaled `adata.X`.
"""
if isinstance(data, AnnData):
adata = data.copy() if copy else data
view_to_actual(adata)
# need to add the following here to make inplace logic work
if zero_center and issparse(adata.X):
logg.debug(
'... scale_data: as `zero_center=True`, sparse input is '
'densified and may lead to large memory consumption'
)
adata.X = adata.X.toarray()
scale(adata.X, zero_center=zero_center, max_value=max_value, copy=False)
return adata if copy else None
X = data.copy() if copy else data # proceed with the data matrix
zero_center = zero_center if zero_center is not None else False if issparse(X) else True
if not zero_center and max_value is not None:
logg.debug('... scale_data: be careful when using `max_value` without `zero_center`')
if max_value is not None:
logg.debug(f'... clipping at max_value {max_value}')
if zero_center and issparse(X):
logg.debug(
'... scale_data: as `zero_center=True`, sparse input is '
'densified and may lead to large memory consumption, returning copy'
paths_all = nx.single_source_dijkstra_path(G, source=kseg)
if jseg_min not in paths_all:
segs_adjacency[jseg_min].append(kseg)
segs_connects[jseg_min].append(closest_points_in_kseg[idx])
segs_adjacency[kseg].append(jseg_min)
segs_connects[kseg].append(closest_points_in_jseg[idx])
logg.debug(f' attaching new segment {kseg} at {jseg_min}')
# if we split the cluster, we should not attach kseg
do_not_attach_kseg = True
else:
logg.debug(
f' cannot attach new segment {kseg} at {jseg_min} '
'(would produce cycle)'
)
if kseg != kseg_list[-1]:
logg.debug(' continue')
continue
else:
logg.debug(' do not add another link')
break
if jseg_min in kseg_list and not do_not_attach_kseg:
segs_adjacency[jseg_min].append(kseg)
segs_connects[jseg_min].append(closest_points_in_kseg[idx])
segs_adjacency[kseg].append(jseg_min)
segs_connects[kseg].append(closest_points_in_jseg[idx])
break
segs_undecided += [False for i in range(n_add)]
if data_is_AnnData:
adata.obsm['X_pca'] = X_pca
adata.uns['pca'] = {}
adata.uns['pca']['params'] = {
'zero_center': zero_center,
'use_highly_variable': use_highly_variable
}
if use_highly_variable:
adata.varm['PCs'] = np.zeros(shape=(adata.n_vars, n_comps))
adata.varm['PCs'][adata.var['highly_variable']] = pca_.components_.T
else:
adata.varm['PCs'] = pca_.components_.T
adata.uns['pca']['variance'] = pca_.explained_variance_
adata.uns['pca']['variance_ratio'] = pca_.explained_variance_ratio_
logg.info(' finished', time=start)
logg.debug(
'and added\n'
' \'X_pca\', the PCA coordinates (adata.obs)\n'
' \'PC1\', \'PC2\', ..., the loadings (adata.var)\n'
' \'pca_variance\', the variance / eigenvalues (adata.uns)\n'
' \'pca_variance_ratio\', the variance ratio (adata.uns)'
)
return adata if copy else None
else:
logg.info(' finished', time=start)
if return_info:
return X_pca, pca_.components_, pca_.explained_variance_ratio_, pca_.explained_variance_
else:
return X_pca
if n_dcs is not None:
if n_dcs > len(self._eigen_values):
raise ValueError(
'Cannot instantiate using `n_dcs`={}. '
'Compute diffmap/spectrum with more components first.'
.format(n_dcs))
self._eigen_values = self._eigen_values[:n_dcs]
self._eigen_basis = self._eigen_basis[:, :n_dcs]
self.n_dcs = len(self._eigen_values)
info_str += '`.eigen_values` `.eigen_basis` `.distances_dpt`'
else:
self._eigen_values = None
self._eigen_basis = None
self.n_dcs = None
if info_str != '':
logg.debug(f' initialized {info_str}')