Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
early_stop_limit=250,
lr_reducer=200,
monitor='val_loss',
shuffle=True,
save=False)
cell_type_adata = train_adata.copy()[train_adata.obs[cell_type_key] == cell_type]
sc.tl.rank_genes_groups(cell_type_adata,
key_added='up_reg_genes',
groupby=condition_key,
groups=[target_condition],
reference=source_condition,
n_genes=10)
sc.tl.rank_genes_groups(cell_type_adata,
key_added='down_reg_genes',
groupby=condition_key,
groups=[source_condition],
reference=target_condition,
n_genes=10)
up_genes = cell_type_adata.uns['up_reg_genes']['names'][target_condition].tolist()
down_genes = cell_type_adata.uns['down_reg_genes']['names'][source_condition].tolist()
top_genes = up_genes + down_genes
source_adata = cell_type_adata.copy()[cell_type_adata.obs[condition_key] == source_condition]
source_label = condition_encoder[source_condition]
target_label = condition_encoder[target_condition]
early_stop_limit=100,
lr_reducer=80,
monitor='val_loss',
shuffle=True,
save=False)
cell_type_adata = train_adata.copy()[train_adata.obs[cell_type_key] == cell_type]
sc.tl.rank_genes_groups(cell_type_adata,
key_added='up_reg_genes',
groupby=condition_key,
groups=[target_condition],
reference=source_condition,
n_genes=10)
sc.tl.rank_genes_groups(cell_type_adata,
key_added='down_reg_genes',
groupby=condition_key,
groups=[source_condition],
reference=target_condition,
n_genes=10)
up_genes = cell_type_adata.uns['up_reg_genes']['names'][target_condition].tolist()
down_genes = cell_type_adata.uns['down_reg_genes']['names'][source_condition].tolist()
top_genes = up_genes + down_genes
source_adata = cell_type_adata.copy()[cell_type_adata.obs[condition_key] == source_condition]
source_label = condition_encoder[source_condition]
target_label = condition_encoder[target_condition]
cell_type_key: net_train_data.obs[cell_type_key].tolist()})
sc.pp.neighbors(mmd_with_fake_labels)
sc.tl.umap(mmd_with_fake_labels)
sc.pl.umap(mmd_with_fake_labels, color=[condition_key, cell_type_key],
save=f"_mmd_fake_labels_{z_dim}",
show=False)
decoded_latent_with_true_labels = network.predict(data=latent_with_true_labels, encoder_labels=true_labels,
decoder_labels=true_labels, data_space='latent')
cell_type_data = train[train.obs[cell_type_key] == cell_type]
unperturbed_data = train[((train.obs[cell_type_key] == cell_type) & (train.obs[condition_key] == ctrl_key))]
true_labels = np.zeros((len(unperturbed_data), 1))
fake_labels = np.ones((len(unperturbed_data), 1))
sc.tl.rank_genes_groups(cell_type_data, groupby=condition_key, n_genes=100)
diff_genes = cell_type_data.uns["rank_genes_groups"]["names"][stim_key]
# cell_type_data = cell_type_data.copy()[:, diff_genes.tolist()]
pred = network.predict(data=unperturbed_data, encoder_labels=true_labels, decoder_labels=fake_labels)
pred_adata = anndata.AnnData(pred, obs={condition_key: ["pred"] * len(pred)},
var={"var_names": cell_type_data.var_names})
all_adata = cell_type_data.concatenate(pred_adata)
scgen.plotting.reg_mean_plot(all_adata, condition_key=condition_key,
axis_keys={"x": ctrl_key, "y": stim_key, "y1": "pred"},
gene_list=diff_genes,
path_to_save=f"./figures/reg_mean_{z_dim}.pdf")
scgen.plotting.reg_var_plot(all_adata, condition_key=condition_key,
axis_keys={"x": ctrl_key, "y": stim_key, 'y1': "pred"},
gene_list=diff_genes,
path_to_save=f"./figures/reg_var_{z_dim}.pdf")
def score(adata, n_deg=10, condition_key="condition",
conditions={"stim": "stimulated", "ctrl": "control"},
sortby="median_score"):
import scanpy as sc
import numpy as np
from scipy.stats import entropy
import pandas as pd
sc.tl.rank_genes_groups(adata, groupby=condition_key, method="wilcoxon", n_genes=n_deg)
adata_deg = adata[:, adata.uns["rank_genes_groups"]["names"][conditions["stim"]].tolist()].copy()
cell_types = adata_deg.obs["cell_type"].cat.categories.tolist()
lfc_temp = np.zeros((len(cell_types), n_deg))
for j , ct in enumerate(cell_types):
stim = adata_deg[(adata_deg.obs["cell_type"] == ct) &
(adata_deg.obs[condition_key] == conditions["stim"])].X.mean(0).A1
ctrl = adata_deg[(adata_deg.obs["cell_type"] == ct) &
(adata_deg.obs[condition_key] == conditions["ctrl"])].X.mean(0).A1
lfc_temp[j] = np.abs((stim - ctrl)[None, :])
norm_lfc = lfc_temp/lfc_temp.sum(0).reshape((1, n_deg))
ent_scores = entropy(norm_lfc)
median = np.median(lfc_temp, axis=0)
med_scores = np.max(np.abs((lfc_temp - median)), axis=0)
df_score = pd.DataFrame({"genes": adata_deg.var_names.tolist(), "median_score": med_scores,
"entropy_score": ent_scores })
if (sortby == "median_score"):
mmd_latent_with_fake_labels = network.to_mmd_layer(feed_data, fake_labels)
cell_type_ctrl = cell_type_adata.copy()[cell_type_adata.obs['condition'] == source_key]
print(cell_type_ctrl.shape, cell_type_adata.shape)
pred_celltypes = network.predict(cell_type_ctrl, labels=np.ones((cell_type_ctrl.shape[0], 1)))
pred_adata = anndata.AnnData(X=pred_celltypes)
pred_adata.obs['condition'] = ['predicted'] * pred_adata.shape[0]
pred_adata.var = cell_type_adata.var
if data_name == "pbmc":
sc.tl.rank_genes_groups(cell_type_adata, groupby="condition", n_genes=100, method="wilcoxon")
top_100_genes = cell_type_adata.uns["rank_genes_groups"]["names"][target_key].tolist()
gene_list = top_100_genes[:10]
else:
sc.tl.rank_genes_groups(cell_type_adata, groupby="condition", n_genes=100, method="wilcoxon")
top_50_down_genes = cell_type_adata.uns["rank_genes_groups"]["names"][source_key].tolist()
top_50_up_genes = cell_type_adata.uns["rank_genes_groups"]["names"][target_key].tolist()
top_100_genes = top_50_up_genes + top_50_down_genes
gene_list = top_50_down_genes[:5] + top_50_up_genes[:5]
cell_type_adata = cell_type_adata.concatenate(pred_adata)
trvae.plotting.reg_mean_plot(cell_type_adata,
top_100_genes=top_100_genes,
gene_list=gene_list,
condition_key='condition',
axis_keys={"x": 'predicted', 'y': target_key},
labels={'x': 'pred stim', 'y': 'real stim'},
legend=False,
fontsize=20,
textsize=14,
latent_with_fake_labels = network.to_latent(feed_data)
cell_type_ctrl = cell_type_adata.copy()[cell_type_adata.obs['condition'] == source_key]
print(cell_type_ctrl.shape, cell_type_adata.shape)
pred_celltypes = network.predict(cell_type_ctrl)
pred_adata = anndata.AnnData(X=pred_celltypes)
pred_adata.obs['condition'] = ['predicted'] * pred_adata.shape[0]
pred_adata.var = cell_type_adata.var
if data_name == "pbmc":
sc.tl.rank_genes_groups(cell_type_adata, groupby="condition", n_genes=100, method="wilcoxon")
top_100_genes = cell_type_adata.uns["rank_genes_groups"]["names"][target_key].tolist()
gene_list = top_100_genes[:10]
else:
sc.tl.rank_genes_groups(cell_type_adata, groupby="condition", n_genes=100, method="wilcoxon")
top_50_down_genes = cell_type_adata.uns["rank_genes_groups"]["names"][source_key].tolist()
top_50_up_genes = cell_type_adata.uns["rank_genes_groups"]["names"][target_key].tolist()
top_100_genes = top_50_up_genes + top_50_down_genes
gene_list = top_50_down_genes[:5] + top_50_up_genes[:5]
cell_type_adata = cell_type_adata.concatenate(pred_adata)
trvae.plotting.reg_mean_plot(cell_type_adata,
top_100_genes=top_100_genes,
gene_list=gene_list,
condition_key='condition',
axis_keys={"x": 'predicted', 'y': target_key},
labels={'x': 'pred stim', 'y': 'real stim'},
legend=False,
fontsize=20,
textsize=14,
for j in range(0,len(matching_peaks)):
if matching_gene_name[j] in cell_type_markers[cell_type]:
cell_type_peaks[cell_type].append(matching_peaks[j]+"_"+matching_gene_name[j])
# make the peaks unique
cell_type_peaks[cell_type] = list(set(cell_type_peaks[cell_type]))
cell_type_peak = []
cell_type_gene = []
for elem in cell_type_peaks[cell_type]:
ctype = elem.split("_")
cell_type_peak.append(ctype[0]+"_"+ctype[1]+"_"+ctype[2])
cell_type_gene.append(ctype[3])
sc.tl.rank_genes_groups(adata, groupby="louvain", n_genes=n_peaks_per_cluster)
# top 100 to 1000 ranked peaks per louvain group
ATAC_ranking_dict = {}
for group in range(0, len(set(list(adata.obs["louvain"])))):
ATAC_ranking_dict[str(group)] = []
for idx in range(0,n_peaks_per_cluster):
ATAC_ranking_dict[str(group)].append(list(adata.uns["rank_genes_groups"]['names'])[idx][group])
sig_markers = []
sig_peak = []
for group in range(0, len(set(list(adata.obs["louvain"])))):
for elem in ATAC_ranking_dict[str(group)]:
if elem in cell_type_peak:
sig_markers.append(elem+":"+str(group))
sig_peak.append(elem)
else:
warn("""Attention: no omic specified. We used default settings of the original Scanpy function\n
When the parameters where not specified in input""")
omic = 'RNA'
if (method!='') and (adata.uns['omic'] != 'methylation'):
method='t-test_overestim_var'
else:
method='t-test'
if omic == 'methylation':
if copy==False:
sc.tl.rank_genes_groups(adata=adata, groupby=groupby, use_raw=use_raw,
groups=groups, reference=reference, n_genes=n_features,
rankby_abs=True, key_added=key_added, copy=False, method='t-test', corr_method='benjamini-hochberg')
else:
adata2 = sc.tl.rank_genes_groups(adata=adata, groupby=groupby, use_raw=use_raw,
groups=groups, reference=reference, n_genes=n_features,
rankby_abs=True, key_added=key_added, copy=True, method='t-test', corr_method='benjamini-hochberg')
return(adata2)
else:
if copy==False:
sc.tl.rank_genes_groups(adata=adata, groupby=groupby, use_raw=use_raw,
groups=groups, reference=reference, n_genes=n_features,
rankby_abs=rankby_abs, key_added=key_added, copy=False, method=method,
corr_method=corr_method, **kwds)
else:
adata2 = sc.tl.rank_genes_groups(adata=adata, groupby=groupby, use_raw=use_raw,
groups=groups, reference=reference, n_genes=n_features,
rankby_abs=rankby_ab, key_added=key_added, copy=True, method=method,
corr_method=corr_method, **kwds)
return(adata2)
method='t-test_overestim_var'
else:
method='t-test'
if omic == 'methylation':
if copy==False:
sc.tl.rank_genes_groups(adata=adata, groupby=groupby, use_raw=use_raw,
groups=groups, reference=reference, n_genes=n_features,
rankby_abs=True, key_added=key_added, copy=False, method='t-test', corr_method='benjamini-hochberg')
else:
adata2 = sc.tl.rank_genes_groups(adata=adata, groupby=groupby, use_raw=use_raw,
groups=groups, reference=reference, n_genes=n_features,
rankby_abs=True, key_added=key_added, copy=True, method='t-test', corr_method='benjamini-hochberg')
return(adata2)
else:
if copy==False:
sc.tl.rank_genes_groups(adata=adata, groupby=groupby, use_raw=use_raw,
groups=groups, reference=reference, n_genes=n_features,
rankby_abs=rankby_abs, key_added=key_added, copy=False, method=method,
corr_method=corr_method, **kwds)
else:
adata2 = sc.tl.rank_genes_groups(adata=adata, groupby=groupby, use_raw=use_raw,
groups=groups, reference=reference, n_genes=n_features,
rankby_abs=rankby_ab, key_added=key_added, copy=True, method=method,
corr_method=corr_method, **kwds)
return(adata2)