Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
:param data: Input data matrix (observations x features) or (cells x genes).
:param sample_description: pandas.DataFrame containing sample annotations, can be None.
:return: Assembled sample annotations.
"""
if sample_description is None:
if anndata is not None and isinstance(data, anndata.AnnData):
sample_description = data.obs
else:
raise ValueError(
"Please specify `sample_description` or provide `data` as anndata.AnnData " +
"with corresponding sample annotations"
)
if sample_description is not None:
if anndata is not None and isinstance(data, Raw):
# Raw does not have attribute shape.
assert data.X.shape[0] == sample_description.shape[0], \
"data matrix and sample description must contain same number of cells: %i, %i" % \
(data.X.shape[0], sample_description.shape[0])
elif isinstance(data, glm.typing.InputDataBase):
assert data.x.shape[0] == sample_description.shape[0], \
"data matrix and sample description must contain same number of cells: %i, %i" % \
(data.x.shape[0], sample_description.shape[0])
else:
assert data.shape[0] == sample_description.shape[0], \
"data matrix and sample description must contain same number of cells: %i, %i" % \
(data.shape[0], sample_description.shape[0])
return sample_description
gene_names: Union[np.ndarray, list] = None,
sample_description: pd.DataFrame = None
):
"""
:param data: Array-like or anndata.Anndata object containing observations.
Input data matrix (observations x features) or (cells x genes).
:param parts: str, array
- column in data.obs/sample_description which contains the split of observations into the two groups.
- array of length `num_observations` containing group labels
:param gene_names: optional list/array of gene names which will be used if `data` does not implicitly store these
:param sample_description: optional pandas.DataFrame containing sample annotations
"""
if isinstance(data, glm.typing.InputDataBase):
self.x = data.x
elif isinstance(data, anndata.AnnData) or isinstance(data, Raw):
self.x = data.X
elif isinstance(data, np.ndarray):
self.x = data
else:
raise ValueError("data type %s not recognized" % type(data))
self.gene_names = parse_gene_names(data, gene_names)
self.sample_description = parse_sample_description(data, sample_description)
self.partition = parse_grouping(data, sample_description, parts)
self.partitions = np.unique(self.partition)
self.partition_idx = [np.where(self.partition == x)[0] for x in self.partitions]
if lazy:
de_test = DifferentialExpressionTestZTestLazy(
model_estim=model,
grouping=grouping,
groups=np.unique(grouping),
correction_type=pval_correction
)
else:
de_test = DifferentialExpressionTestZTest(
model_estim=model,
grouping=grouping,
groups=np.unique(grouping),
correction_type=pval_correction
)
else:
if isinstance(data, anndata.AnnData) or isinstance(data, anndata.Raw):
data = data.X
elif isinstance(data, glm.typing.InputDataBase):
data = data.x
groups = np.unique(grouping)
pvals = np.tile(np.NaN, [len(groups), len(groups), data.shape[1]])
pvals[np.eye(pvals.shape[0]).astype(bool)] = 0
logfc = np.tile(np.NaN, [len(groups), len(groups), data.shape[1]])
logfc[np.eye(logfc.shape[0]).astype(bool)] = 0
if keep_full_test_objs:
tests = np.tile([None], [len(groups), len(groups)])
else:
tests = None
for i, g1 in enumerate(groups):
for j, g2 in enumerate(groups[(i + 1):]):
- array of length `num_observations` containing group labels
:param gene_names: optional list/array of gene names which will be used if `data` does not implicitly store these
:param sample_description: optional pandas.DataFrame containing sample annotations.
:param dmat_loc: Pre-built location model design matrix.
This over-rides formula_loc and sample description information given in
data or sample_description.
:param dmat_scale: Pre-built scale model design matrix.
This over-rides formula_scale and sample description information given in
data or sample_description.
:param size_factors: 1D array of transformed library size factors for each cell in the
same order as in data or string-type column identifier of size-factor containing
column in sample description.
"""
if isinstance(data, glm.typing.InputDataBase):
self.x = data.x
elif isinstance(data, anndata.AnnData) or isinstance(data, Raw):
self.x = data.X
elif isinstance(data, np.ndarray):
self.x = data
else:
raise ValueError("data type %s not recognized" % type(data))
self.gene_names = parse_gene_names(data, gene_names)
self.sample_description = parse_sample_description(data, sample_description)
self.dmat_loc = dmat_loc
self.dmat_scale = dmat_scale
self.size_factors = size_factors
self.partition = parse_grouping(data, sample_description, parts)
self.partitions = np.unique(self.partition)
self.partition_idx = [np.where(self.partition == x)[0] for x in self.partitions]