Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
else:
samp_idx = sampling_fn(X_dimred, N)
log('Found {} entries'.format(len(set(samp_idx))))
log('Visualizing sampled...')
if not gene_names is None and \
not gene_expr is None and \
not genes is None:
expr = gene_expr[samp_idx, :]
else:
expr = None
if viz_type == 'umap':
adata = AnnData(X=X_dimred[samp_idx, :])
sc.pp.neighbors(adata, use_rep='X')
sc.tl.umap(adata, min_dist=0.5)
embedding = np.array(adata.obsm['X_umap'])
embedding[embedding < -20] = -20
embedding[embedding > 20] = -20
visualize(None, cell_labels[samp_idx],
name + '_umap_{}{}'.format(sample_type, N), cell_types,
embedding=embedding,
gene_names=gene_names, gene_expr=expr, genes=genes,
size=max(int(30000/N), 5), image_suffix='.png')
else:
visualize([ X_dimred[samp_idx, :] ], cell_labels[samp_idx],
name + '_{}{}'.format(sample_type, N), cell_types,
gene_names=gene_names, gene_expr=expr, genes=genes,
#perplexity=5, n_iter=500,
perplexity=max(N/200, 50), n_iter=500,
size=max(int(30000/N), 5), image_suffix='.png')
if __name__ == "__main__":
sc.settings.figdir = "../results"
train(1000, initial_run=True)
adata_list = dr.extractor(data,"CD4T")
ctrl_CD4T = adata_list[1]
predicted_cells = predict(ctrl_CD4T.X.A)
all_Data = sc.AnnData(np.concatenate([adata_list[1].X.A, adata_list[2].X.A, predicted_cells]))
all_Data.obs["condition"] = ["ctrl"] * len(adata_list[1].X.A) + ["real_stim"] * len(adata_list[2].X.A) +\
["pred_stim"] * len(predicted_cells)
all_Data.var_names = adata_list[3].var_names
dr.reg_mean_plot(all_Data, "../results/", "style_trasnfer")
dr.reg_var_plot(all_Data, "../results/", "style_trasnfer ")
sc.pl.violin(all_Data, groupby="condition", keys="ISG15", save = "_ISG15_style_trasnfer.pdf", show=False)
sc.pp.neighbors(all_Data)
sc.tl.umap(all_Data)
sc.pl.umap(all_Data, color=["condition"], save="style_trasnfer.pdf", show=False)
low_dim = low_embed_stim(train_real.X)
dt = sc.AnnData(low_dim)
sc.pp.neighbors(dt)
sc.tl.umap(dt)
dt.obs["cell_type"] = train_real.obs["cell_type"]
dt.obs["condition"] = train_real.obs["condition"]
sc.pl.umap(dt, color=["cell_type", "condition"], show=False, save="_style_transfer_latent.pdf")
top = [["A", 'B'], ['B', 'C'], ['C', 'D'], ['D', 'E'], ['E', 'F']]
branches = np.unique(np.array(top).flatten())
time = {b: 30 for b in branches}
G = 500
t = tree.Tree(topology=top, G=G, time=time, num_branches=len(branches), branch_points=0, modules=40)
uMs, Ws, Hs = sim.simulate_lineage(t, intra_branch_tol=-1, inter_branch_tol=0)
gene_scale = sut.simulate_base_gene_exp(t, uMs)
t.add_genes(uMs, gene_scale)
alpha = np.exp(random.normal(loc=np.log(0.2), scale=np.log(1.5), size=t.G))
beta = np.exp(random.normal(loc=np.log(1), scale=np.log(1.5), size=t.G)) + 1
X1, labs1, brns1, scalings1 = sim.sample_whole_tree(t, 5, alpha=alpha, beta=beta)
print(labs1[0:5])
# normalize gene expression by library size
X1 = (X1.transpose() / scalings1).transpose()
data1 = ad.AnnData(np.log(X1+1))
pp.neighbors(data1, use_rep='X', n_neighbors=700)
umap(data1)
dm1 = data1.obsm["X_umap"]
job_id = "linear"
save_dir = "/home/npapado/Desktop"
tu.save_matrices(job_id, save_dir, X1, uMs, Hs)
tu.save_params(job_id, save_dir, t, rseed)
tu.save_cell_params(job_id, save_dir, labs1, brns1, scalings1)
np.savetxt("/home/npapado/Desktop/linear_umap.csv", dm1)
def louvain(X, N, resolution=1, seed=None, replace=False):
from anndata import AnnData
import scanpy.api as sc
adata = AnnData(X=X)
sc.pp.neighbors(adata, use_rep='X')
sc.tl.louvain(adata, resolution=resolution, key_added='louvain')
cluster_labels_full = adata.obs['louvain'].tolist()
louv = {}
for i, cluster in enumerate(cluster_labels_full):
if cluster not in louv:
louv[cluster] = []
louv[cluster].append(i)
lv_idx = []
for n in range(N):
louv_cells = list(louv.keys())
louv_cell = louv_cells[np.random.choice(len(louv_cells))]
samples = list(louv[louv_cell])
sample = samples[np.random.choice(len(samples))]
if not replace:
pc_nb=0
for i in adata.uns['pca']['variance']:
if i>pc_cutoff:
pc_nb+=1
pipeLog('%d PCs will be used for tSNE and clustering' % pc_nb)
else:
pc_nb = pcCount
pipeLog("Using %d PCs as configured in config" % pcCount)
pipeLog('Performing tSNE')
sc.tl.tsne(adata, n_pcs=int(pc_nb), random_state=2, n_jobs=8)
neighbors = int(conf.get("louvainNeighbors", 6))
res = int(conf.get("louvainRes", 1.0))
pipeLog('Performing Louvain Clustering, using %d PCs and %d neighbors' % (pc_nb, neighbors))
sc.pp.neighbors(adata, n_pcs=int(pc_nb), n_neighbors=neighbors)
sc.tl.louvain(adata, resolution=res)
pipeLog("Found %d louvain clusters" % len(set(adata.obs[['louvain']])))
sc.pl.tsne(adata, color='louvain')
#Clustering. Default Resolution: 1
#res = 1.0
#pipeLog('Performing Louvain Clustering, resolution = %f' % res)
#sc.pp.neighbors(adata, n_pcs=int(pc_nb))
#sc.tl.louvain(adata, resolution=res)
#sc.pl.tsne(adata, color='louvain')
if conf.get("doUmap", True):
pipeLog("Performing UMAP")
sc.tl.umap(adata)
if conf.get("doPhate", True):
pc_nb=0
for i in adata.uns['pca']['variance']:
if i>pc_cutoff:
pc_nb+=1
pipeLog('%d PCs will be used for tSNE and clustering' % pc_nb)
else:
pc_nb = pcCount
pipeLog("Using %d PCs as configured in config" % pcCount)
pipeLog('Performing tSNE')
sc.tl.tsne(adata, n_pcs=int(pc_nb), random_state=2, n_jobs=8)
neighbors = int(conf.get("louvainNeighbors", 6))
res = int(conf.get("louvainRes", 1.0))
pipeLog('Performing Louvain Clustering, using %d PCs and %d neighbors' % (pc_nb, neighbors))
sc.pp.neighbors(adata, n_pcs=int(pc_nb), n_neighbors=neighbors)
sc.tl.louvain(adata, resolution=res)
pipeLog("Found %d louvain clusters" % len(adata.obs['louvain'].unique()))
sc.pl.tsne(adata, color='louvain')
#Clustering. Default Resolution: 1
#res = 1.0
#pipeLog('Performing Louvain Clustering, resolution = %f' % res)
#sc.pp.neighbors(adata, n_pcs=int(pc_nb))
#sc.tl.louvain(adata, resolution=res)
#sc.pl.tsne(adata, color='louvain')
if "umap" in doLayouts:
pipeLog("Performing UMAP")
sc.tl.umap(adata)
if "phate" in doLayouts:
return predicted_cells
if __name__ == "__main__":
sc.settings.figdir = "../results"
adata_list = dr.extractor(data, "CD4T")
ctrl_CD4T_PCA = pca.transform(adata_list[1].X.A)
predicted_cells = predict(train_real_cd_PCA, train_real_stimulated_PCA, ctrl_CD4T_PCA)
all_Data = sc.AnnData(np.concatenate([adata_list[1].X.A, adata_list[2].X.A, predicted_cells]))
all_Data.obs["condition"] = ["ctrl"] * len(adata_list[1].X.A) + ["real_stim"] * len(adata_list[2].X.A) +\
["pred_stim"] * len(predicted_cells)
all_Data.var_names = adata_list[3].var_names
dr.reg_mean_plot(all_Data, "../results/", "Vec_Arith_PCA")
dr.reg_var_plot(all_Data, "../results/", "Vec_Arith_PCA ")
sc.pp.neighbors(all_Data)
sc.tl.umap(all_Data)
sc.pl.umap(all_Data, color=["condition"], save="Vec_Arith_PCA.pdf", show=False)
sc.pl.violin(all_Data, groupby='condition', keys="ISG15", save="Vec_Arith_PCA.pdf", show=False)