How to use the scanpy.tl.rank_genes_groups function in scanpy

To help you get started, we’ve selected a few scanpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github theislab / trVAE / tests / hyperopt_trVAEMulti.py View on Github external
early_stop_limit=250,
                  lr_reducer=200,
                  monitor='val_loss',
                  shuffle=True,
                  save=False)

    cell_type_adata = train_adata.copy()[train_adata.obs[cell_type_key] == cell_type]

    sc.tl.rank_genes_groups(cell_type_adata,
                            key_added='up_reg_genes',
                            groupby=condition_key,
                            groups=[target_condition],
                            reference=source_condition,
                            n_genes=10)

    sc.tl.rank_genes_groups(cell_type_adata,
                            key_added='down_reg_genes',
                            groupby=condition_key,
                            groups=[source_condition],
                            reference=target_condition,
                            n_genes=10)

    up_genes = cell_type_adata.uns['up_reg_genes']['names'][target_condition].tolist()
    down_genes = cell_type_adata.uns['down_reg_genes']['names'][source_condition].tolist()

    top_genes = up_genes + down_genes

    source_adata = cell_type_adata.copy()[cell_type_adata.obs[condition_key] == source_condition]

    source_label = condition_encoder[source_condition]
    target_label = condition_encoder[target_condition]
github theislab / trVAE / tests / hyperopt_trvae_task_specific.py View on Github external
early_stop_limit=100,
                  lr_reducer=80,
                  monitor='val_loss',
                  shuffle=True,
                  save=False)

    cell_type_adata = train_adata.copy()[train_adata.obs[cell_type_key] == cell_type]

    sc.tl.rank_genes_groups(cell_type_adata,
                            key_added='up_reg_genes',
                            groupby=condition_key,
                            groups=[target_condition],
                            reference=source_condition,
                            n_genes=10)

    sc.tl.rank_genes_groups(cell_type_adata,
                            key_added='down_reg_genes',
                            groupby=condition_key,
                            groups=[source_condition],
                            reference=target_condition,
                            n_genes=10)

    up_genes = cell_type_adata.uns['up_reg_genes']['names'][target_condition].tolist()
    down_genes = cell_type_adata.uns['down_reg_genes']['names'][source_condition].tolist()

    top_genes = up_genes + down_genes

    source_adata = cell_type_adata.copy()[cell_type_adata.obs[condition_key] == source_condition]

    source_label = condition_encoder[source_condition]
    target_label = condition_encoder[target_condition]
github theislab / scgen / tests / test_mmd_cvae.py View on Github external
cell_type_key: net_train_data.obs[cell_type_key].tolist()})
        sc.pp.neighbors(mmd_with_fake_labels)
        sc.tl.umap(mmd_with_fake_labels)
        sc.pl.umap(mmd_with_fake_labels, color=[condition_key, cell_type_key],
                   save=f"_mmd_fake_labels_{z_dim}",
                   show=False)

        decoded_latent_with_true_labels = network.predict(data=latent_with_true_labels, encoder_labels=true_labels,
                                                          decoder_labels=true_labels, data_space='latent')

        cell_type_data = train[train.obs[cell_type_key] == cell_type]
        unperturbed_data = train[((train.obs[cell_type_key] == cell_type) & (train.obs[condition_key] == ctrl_key))]
        true_labels = np.zeros((len(unperturbed_data), 1))
        fake_labels = np.ones((len(unperturbed_data), 1))

        sc.tl.rank_genes_groups(cell_type_data, groupby=condition_key, n_genes=100)
        diff_genes = cell_type_data.uns["rank_genes_groups"]["names"][stim_key]
        # cell_type_data = cell_type_data.copy()[:, diff_genes.tolist()]

        pred = network.predict(data=unperturbed_data, encoder_labels=true_labels, decoder_labels=fake_labels)
        pred_adata = anndata.AnnData(pred, obs={condition_key: ["pred"] * len(pred)},
                                     var={"var_names": cell_type_data.var_names})
        all_adata = cell_type_data.concatenate(pred_adata)

        scgen.plotting.reg_mean_plot(all_adata, condition_key=condition_key,
                                     axis_keys={"x": ctrl_key, "y": stim_key, "y1": "pred"},
                                     gene_list=diff_genes,
                                     path_to_save=f"./figures/reg_mean_{z_dim}.pdf")
        scgen.plotting.reg_var_plot(all_adata, condition_key=condition_key,
                                    axis_keys={"x": ctrl_key, "y": stim_key, 'y1': "pred"},
                                    gene_list=diff_genes,
                                    path_to_save=f"./figures/reg_var_{z_dim}.pdf")
github theislab / scgen / tests / test_score.py View on Github external
def score(adata, n_deg=10, condition_key="condition",
                 conditions={"stim": "stimulated", "ctrl": "control"},
         sortby="median_score"):

    import scanpy as sc
    import numpy as np
    from scipy.stats import entropy
    import pandas as pd
    sc.tl.rank_genes_groups(adata, groupby=condition_key, method="wilcoxon", n_genes=n_deg)
    adata_deg = adata[:, adata.uns["rank_genes_groups"]["names"][conditions["stim"]].tolist()].copy()
    cell_types = adata_deg.obs["cell_type"].cat.categories.tolist()
    lfc_temp = np.zeros((len(cell_types), n_deg))
    for j , ct in enumerate(cell_types):
        stim = adata_deg[(adata_deg.obs["cell_type"] == ct) &
                         (adata_deg.obs[condition_key] == conditions["stim"])].X.mean(0).A1
        ctrl = adata_deg[(adata_deg.obs["cell_type"] == ct) &
                         (adata_deg.obs[condition_key] == conditions["ctrl"])].X.mean(0).A1
        lfc_temp[j] = np.abs((stim - ctrl)[None, :])
    norm_lfc = lfc_temp/lfc_temp.sum(0).reshape((1, n_deg))
    ent_scores = entropy(norm_lfc)
    median = np.median(lfc_temp, axis=0)
    med_scores = np.max(np.abs((lfc_temp - median)), axis=0)
    df_score = pd.DataFrame({"genes": adata_deg.var_names.tolist(), "median_score": med_scores,
                             "entropy_score": ent_scores })
    if (sortby == "median_score"):
github theislab / trVAE / tests / test_cvae.py View on Github external
mmd_latent_with_fake_labels = network.to_mmd_layer(feed_data, fake_labels)

        cell_type_ctrl = cell_type_adata.copy()[cell_type_adata.obs['condition'] == source_key]
        print(cell_type_ctrl.shape, cell_type_adata.shape)

        pred_celltypes = network.predict(cell_type_ctrl, labels=np.ones((cell_type_ctrl.shape[0], 1)))
        pred_adata = anndata.AnnData(X=pred_celltypes)
        pred_adata.obs['condition'] = ['predicted'] * pred_adata.shape[0]
        pred_adata.var = cell_type_adata.var

        if data_name == "pbmc":
            sc.tl.rank_genes_groups(cell_type_adata, groupby="condition", n_genes=100, method="wilcoxon")
            top_100_genes = cell_type_adata.uns["rank_genes_groups"]["names"][target_key].tolist()
            gene_list = top_100_genes[:10]
        else:
            sc.tl.rank_genes_groups(cell_type_adata, groupby="condition", n_genes=100, method="wilcoxon")
            top_50_down_genes = cell_type_adata.uns["rank_genes_groups"]["names"][source_key].tolist()
            top_50_up_genes = cell_type_adata.uns["rank_genes_groups"]["names"][target_key].tolist()
            top_100_genes = top_50_up_genes + top_50_down_genes
            gene_list = top_50_down_genes[:5] + top_50_up_genes[:5]

        cell_type_adata = cell_type_adata.concatenate(pred_adata)

        trvae.plotting.reg_mean_plot(cell_type_adata,
                                     top_100_genes=top_100_genes,
                                     gene_list=gene_list,
                                     condition_key='condition',
                                     axis_keys={"x": 'predicted', 'y': target_key},
                                     labels={'x': 'pred stim', 'y': 'real stim'},
                                     legend=False,
                                     fontsize=20,
                                     textsize=14,
github theislab / trVAE / tests / test_trAE.py View on Github external
latent_with_fake_labels = network.to_latent(feed_data)

        cell_type_ctrl = cell_type_adata.copy()[cell_type_adata.obs['condition'] == source_key]
        print(cell_type_ctrl.shape, cell_type_adata.shape)

        pred_celltypes = network.predict(cell_type_ctrl)
        pred_adata = anndata.AnnData(X=pred_celltypes)
        pred_adata.obs['condition'] = ['predicted'] * pred_adata.shape[0]
        pred_adata.var = cell_type_adata.var

        if data_name == "pbmc":
            sc.tl.rank_genes_groups(cell_type_adata, groupby="condition", n_genes=100, method="wilcoxon")
            top_100_genes = cell_type_adata.uns["rank_genes_groups"]["names"][target_key].tolist()
            gene_list = top_100_genes[:10]
        else:
            sc.tl.rank_genes_groups(cell_type_adata, groupby="condition", n_genes=100, method="wilcoxon")
            top_50_down_genes = cell_type_adata.uns["rank_genes_groups"]["names"][source_key].tolist()
            top_50_up_genes = cell_type_adata.uns["rank_genes_groups"]["names"][target_key].tolist()
            top_100_genes = top_50_up_genes + top_50_down_genes
            gene_list = top_50_down_genes[:5] + top_50_up_genes[:5]

        cell_type_adata = cell_type_adata.concatenate(pred_adata)

        trvae.plotting.reg_mean_plot(cell_type_adata,
                                     top_100_genes=top_100_genes,
                                     gene_list=gene_list,
                                     condition_key='condition',
                                     axis_keys={"x": 'predicted', 'y': target_key},
                                     labels={'x': 'pred stim', 'y': 'real stim'},
                                     legend=False,
                                     fontsize=20,
                                     textsize=14,
github colomemaria / epiScanpy / episcanpy / tools / _cell_id.py View on Github external
for j in range(0,len(matching_peaks)):
        if matching_gene_name[j] in cell_type_markers[cell_type]:
            cell_type_peaks[cell_type].append(matching_peaks[j]+"_"+matching_gene_name[j])
            
    # make the peaks unique
    cell_type_peaks[cell_type] = list(set(cell_type_peaks[cell_type]))
        
        
    cell_type_peak = []
    cell_type_gene = []
    for elem in cell_type_peaks[cell_type]:
        ctype = elem.split("_")
        cell_type_peak.append(ctype[0]+"_"+ctype[1]+"_"+ctype[2])
        cell_type_gene.append(ctype[3])
    
    sc.tl.rank_genes_groups(adata, groupby="louvain", n_genes=n_peaks_per_cluster)

    # top 100 to 1000 ranked peaks per louvain group
    ATAC_ranking_dict = {}
    for group in range(0, len(set(list(adata.obs["louvain"])))):
        ATAC_ranking_dict[str(group)] = []
        for idx in range(0,n_peaks_per_cluster):
            ATAC_ranking_dict[str(group)].append(list(adata.uns["rank_genes_groups"]['names'])[idx][group])
        
    sig_markers = []
    sig_peak = []
    for group in range(0, len(set(list(adata.obs["louvain"])))):
        for elem in ATAC_ranking_dict[str(group)]:
            if elem in cell_type_peak:
                sig_markers.append(elem+":"+str(group))
                sig_peak.append(elem)
github colomemaria / epiScanpy / episcanpy / tools / _features_selection.py View on Github external
else:
    		warn("""Attention: no omic specified. We used default settings of the original Scanpy function\n
    			When the parameters where not specified in input""")
    		omic = 'RNA'

    if (method!='') and (adata.uns['omic'] != 'methylation'):
        method='t-test_overestim_var'
    else:
        method='t-test'
    if omic == 'methylation':
    	if copy==False:
    		sc.tl.rank_genes_groups(adata=adata, groupby=groupby, use_raw=use_raw,
                groups=groups, reference=reference, n_genes=n_features,
    			rankby_abs=True, key_added=key_added, copy=False, method='t-test', corr_method='benjamini-hochberg')
    	else:
    		adata2 = sc.tl.rank_genes_groups(adata=adata, groupby=groupby, use_raw=use_raw,
                groups=groups, reference=reference, n_genes=n_features,
    			rankby_abs=True, key_added=key_added, copy=True, method='t-test', corr_method='benjamini-hochberg')
    		return(adata2)
    else:
    	if copy==False:
    		sc.tl.rank_genes_groups(adata=adata, groupby=groupby, use_raw=use_raw,
                            groups=groups, reference=reference, n_genes=n_features,
                     		rankby_abs=rankby_abs, key_added=key_added, copy=False, method=method,
                     		corr_method=corr_method, **kwds)
    	else:
    		adata2 = sc.tl.rank_genes_groups(adata=adata, groupby=groupby, use_raw=use_raw,
                            groups=groups, reference=reference, n_genes=n_features,
                     		rankby_abs=rankby_ab, key_added=key_added, copy=True, method=method,
                            corr_method=corr_method, **kwds)
    		return(adata2)
github colomemaria / epiScanpy / episcanpy / tools / _features_selection.py View on Github external
method='t-test_overestim_var'
    else:
        method='t-test'
    if omic == 'methylation':
    	if copy==False:
    		sc.tl.rank_genes_groups(adata=adata, groupby=groupby, use_raw=use_raw,
                groups=groups, reference=reference, n_genes=n_features,
    			rankby_abs=True, key_added=key_added, copy=False, method='t-test', corr_method='benjamini-hochberg')
    	else:
    		adata2 = sc.tl.rank_genes_groups(adata=adata, groupby=groupby, use_raw=use_raw,
                groups=groups, reference=reference, n_genes=n_features,
    			rankby_abs=True, key_added=key_added, copy=True, method='t-test', corr_method='benjamini-hochberg')
    		return(adata2)
    else:
    	if copy==False:
    		sc.tl.rank_genes_groups(adata=adata, groupby=groupby, use_raw=use_raw,
                            groups=groups, reference=reference, n_genes=n_features,
                     		rankby_abs=rankby_abs, key_added=key_added, copy=False, method=method,
                     		corr_method=corr_method, **kwds)
    	else:
    		adata2 = sc.tl.rank_genes_groups(adata=adata, groupby=groupby, use_raw=use_raw,
                            groups=groups, reference=reference, n_genes=n_features,
                     		rankby_abs=rankby_ab, key_added=key_added, copy=True, method=method,
                            corr_method=corr_method, **kwds)
    		return(adata2)