Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def create_data(data_dict):
data_name = data_dict['name']
source_keys = data_dict.get("source_conditions")
target_keys = data_dict.get("target_conditions")
cell_type_key = data_dict.get("cell_type_key", None)
condition_key = data_dict.get('condition_key', 'condition')
spec_cell_type = data_dict.get("spec_cell_types", None)[0]
adata = sc.read(f"./data/{data_name}/{data_name}_normalized.h5ad")
adata = adata[adata.obs[condition_key].isin(source_keys + target_keys)]
if adata.shape[1] > 2000:
sc.pp.highly_variable_genes(adata, n_top_genes=2000)
adata = adata[:, adata.var['highly_variable']]
train_adata, valid_adata = train_test_split(adata, 0.80)
net_train_adata = train_adata.copy()[~((train_adata.obs[cell_type_key] == spec_cell_type) &
(train_adata.obs[condition_key].isin(target_keys)))]
net_valid_adata = valid_adata.copy()[~((valid_adata.obs[cell_type_key] == spec_cell_type) &
(valid_adata.obs[condition_key].isin(target_keys)))]
return adata, net_train_adata, net_valid_adata
mmd_latent_with_true_labels.obs['condition'] = data.obs['condition'].values
mmd_latent_with_true_labels.obs[cell_type_key] = data.obs[cell_type_key].values
mmd_latent_with_fake_labels = sc.AnnData(X=mmd_latent_with_fake_labels)
mmd_latent_with_fake_labels.obs['condition'] = data.obs['condition'].values
mmd_latent_with_fake_labels.obs[cell_type_key] = data.obs[cell_type_key].values
color = ['condition', cell_type_key]
sc.pp.neighbors(train_data)
sc.tl.umap(train_data)
sc.pl.umap(train_data, color=color,
save=f'_{data_name}_{cell_type}_train_data',
show=False)
sc.pp.neighbors(latent_with_true_labels)
sc.tl.umap(latent_with_true_labels)
sc.pl.umap(latent_with_true_labels, color=color,
save=f"_{data_name}_{cell_type}_latent_with_true_labels",
show=False)
sc.pp.neighbors(latent_with_fake_labels)
sc.tl.umap(latent_with_fake_labels)
sc.pl.umap(latent_with_fake_labels, color=color,
save=f"_{data_name}_{cell_type}_latent_with_fake_labels",
show=False)
sc.pp.neighbors(mmd_latent_with_true_labels)
sc.tl.umap(mmd_latent_with_true_labels)
sc.pl.umap(mmd_latent_with_true_labels, color=color,
save=f"_{data_name}_{cell_type}_mmd_latent_with_true_labels",
show=False)
mmd_latent_with_true_labels.obs[condition_key] = data.obs[condition_key].values
mmd_latent_with_true_labels.obs[cell_type_key] = data.obs[cell_type_key].values
latent_with_true_labels = sc.AnnData(X=latent_with_true_labels)
latent_with_true_labels.obs[condition_key] = data.obs[condition_key].values
latent_with_true_labels.obs[cell_type_key] = data.obs[cell_type_key].values
sc.pp.neighbors(train_data)
sc.tl.umap(train_data)
sc.pl.umap(train_data, color=color,
save=f'_{data_name}_{cell_type}_train_data',
show=False,
wspace=0.15,
frameon=False)
sc.pp.neighbors(mmd_latent_with_true_labels)
sc.tl.umap(mmd_latent_with_true_labels)
sc.pl.umap(mmd_latent_with_true_labels, color=color,
save=f"_{data_name}_{cell_type}_mmd_latent_with_true_labels",
show=False,
wspace=0.15,
frameon=False)
sc.pp.neighbors(latent_with_true_labels)
sc.tl.umap(latent_with_true_labels)
sc.pl.umap(latent_with_true_labels, color=color,
save=f"_{data_name}_{cell_type}_latent_with_true_labels",
show=False,
wspace=0.15,
frameon=False)
# mmd_latent_with_true_labels.obs['mmd'] = 'others'
feed_data = data.X
latent = network.to_latent(feed_data)
latent = sc.AnnData(X=latent)
latent.obs[cell_type_key] = data.obs[cell_type_key].values
color = [cell_type_key]
sc.pp.neighbors(train_data)
sc.tl.umap(train_data)
sc.pl.umap(train_data, color=color,
save=f'_{data_name}_train_data.pdf',
show=False)
sc.pp.neighbors(latent)
sc.tl.umap(latent)
sc.pl.umap(latent, color=color,
save=f"_{data_name}_latent.pdf",
show=False)
plt.close("all")
X_tsne : `np.ndarray` (`adata.obs`, dtype `float`)
tSNE coordinates of data.
X_umap : `adata.obsm`
UMAP coordinates of data.
'''
if copy:
adata = adata.copy()
else:
adata
if pp_pca:
sc.pp.pca(adata, n_comps=nb_pcs, svd_solver=svd_solver)
sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=nb_pcs, method=method, metric=metric)
#sc.tl.pca(adata, n_comps=nb_pcs)
sc.tl.tsne(adata, n_pcs=nb_pcs, perplexity=perplexity)
sc.tl.umap(adata, min_dist, spread, n_components)
if copy:
return(adata)
else:
None
def recipe_seurat(adata):
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.scale(adata, max_value=10, zero_center=False)
return adata
plt.close("all")
elif isinstance(network, scgen.CVAE):
true_labels, _ = scgen.label_encoder(train)
if sparse.issparse(train.X):
latent = network.to_latent(train.X.A, labels=true_labels)
else:
latent = network.to_latent(train.X, labels=true_labels)
latent = sc.AnnData(X=latent,
obs={condition_key: train.obs[condition_key].tolist(),
cell_type_key: train.obs[cell_type_key].tolist()})
if plot_umap:
sc.pp.neighbors(latent)
sc.tl.umap(latent)
sc.pl.umap(latent, color=[condition_key, cell_type_key],
save=f"_latent",
show=False)
cell_type_data = train[train.obs[cell_type_key] == cell_type]
fake_labels = np.ones(shape=(cell_type_data.shape[0], 1))
pred = network.predict(data=cell_type_data, labels=fake_labels)
pred_adata = anndata.AnnData(pred, obs={condition_key: ["pred"] * len(pred)},
var={"var_names": cell_type_data.var_names})
all_adata = cell_type_data.concatenate(pred_adata)
sc.tl.rank_genes_groups(cell_type_data, groupby=condition_key, n_genes=100)
diff_genes = cell_type_data.uns["rank_genes_groups"]["names"][conditions["stim"]]
def run(self):
sc.pp.normalize_per_cell(self.data, copy=True)
sc.pp.log1p(self.data)
# Normalized matrix
self.dump_to_h5ad("lognorm_normalized")
def normalize(adata, filter_min_counts=True, size_factors=True, normalize_input=True, logtrans_input=True):
if filter_min_counts:
sc.pp.filter_genes(adata, min_counts=1)
sc.pp.filter_cells(adata, min_counts=1)
if size_factors or normalize_input or logtrans_input:
adata.raw = adata.copy()
else:
adata.raw = adata
if size_factors:
sc.pp.normalize_per_cell(adata)
adata.obs['size_factors'] = adata.obs.n_counts / np.median(adata.obs.n_counts)
else:
adata.obs['size_factors'] = 1.0
if logtrans_input:
sc.pp.log1p(adata)
if normalize_input:
data=dict(batch=self.batch_indices.squeeze()),
index=np.arange(self.nb_cells),
).astype("category")
counts = sp_sparse.csc_matrix(self.X.copy())
adata = sc.AnnData(X=counts, obs=obs)
batch_key = "batch" if (batch_correction and self.n_batches >= 2) else None
if flavor in ["cell_ranger", "seurat_v2"]:
if flavor == "seurat_v2":
# name expected by scanpy
flavor = "seurat"
# Counts normalization
sc.pp.normalize_total(adata, target_sum=1e4)
# logarithmed data
sc.pp.log1p(adata)
# Finding top genes
sc.pp.highly_variable_genes(
adata=adata,
n_top_genes=n_top_genes,
flavor=flavor,
batch_key=batch_key,
inplace=True, # inplace=False looks buggy
**highly_var_genes_kwargs,
)
elif flavor == "seurat_v3":
seurat_v3_highly_variable_genes(
adata, n_top_genes=n_top_genes, batch_key=batch_key
)