Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
save=f"_latent_true_labels_{z_dim}",
show=False)
latent_with_fake_labels = network.to_latent(net_train_data.X, fake_labels)
latent_with_fake_labels = sc.AnnData(X=latent_with_fake_labels,
obs={condition_key: net_train_data.obs[condition_key].tolist(),
cell_type_key: net_train_data.obs[cell_type_key].tolist()})
sc.pp.neighbors(latent_with_fake_labels)
sc.tl.umap(latent_with_fake_labels)
sc.pl.umap(latent_with_fake_labels, color=[condition_key, cell_type_key],
save=f"_latent_fake_labels_{z_dim}",
show=False)
mmd_with_true_labels = network.to_mmd_layer(network.cvae_model, net_train_data.X,
encoder_labels=true_labels, feed_fake=False)
mmd_with_true_labels = sc.AnnData(X=mmd_with_true_labels,
obs={condition_key: net_train_data.obs[condition_key].tolist(),
cell_type_key: net_train_data.obs[cell_type_key].tolist()})
sc.pp.neighbors(mmd_with_true_labels)
sc.tl.umap(mmd_with_true_labels)
sc.pl.umap(mmd_with_true_labels, color=[condition_key, cell_type_key],
save=f"_mmd_true_labels_{z_dim}",
show=False)
mmd_with_fake_labels = network.to_mmd_layer(network.cvae_model, net_train_data.X,
encoder_labels=true_labels, feed_fake=True)
mmd_with_fake_labels = sc.AnnData(X=mmd_with_fake_labels,
obs={condition_key: net_train_data.obs[condition_key].tolist(),
cell_type_key: net_train_data.obs[cell_type_key].tolist()})
sc.pp.neighbors(mmd_with_fake_labels)
sc.tl.umap(mmd_with_fake_labels)
sc.pl.umap(mmd_with_fake_labels, color=[condition_key, cell_type_key],
train = sc.read("./data/train.h5ad")
# train = train[train.obs["cell_type"] == "CD4T"]
train = train[~((train.obs["cell_type"] == "CD4T") & (train.obs["condition"] == "stimulated"))]
z_dim = 20
network = scgen.CVAE(x_dimension=train.X.shape[1], z_dimension=z_dim, alpha=0.1)
network.restore_model()
# network.train(train, n_epochs=100)
labels, _ = scgen.label_encoder(train)
latent = network.to_latent(train.X.A, labels=labels)
adata = sc.AnnData(X=latent, obs={"condition": train.obs["condition"].tolist(), "cell_type": train.obs["cell_type"].tolist()})
sc.pp.neighbors(adata)
sc.tl.umap(adata)
sc.pl.umap(adata, color =["condition", "cell_type"], save=f"train_{z_dim}")
mmd = network.to_mmd_layer(train.X.A, labels=labels)
adata_mmd = sc.AnnData(X=mmd, obs={"condition": train.obs["condition"].tolist(), "cell_type": train.obs["cell_type"].tolist()})
sc.pp.neighbors(adata_mmd)
sc.tl.umap(adata_mmd)
sc.pl.umap(adata_mmd, color =["condition", "cell_type"], save=f"true_labels_{z_dim}")
train = sc.read("./data/train.h5ad")
CD4T = train[train.obs["cell_type"] == "CD4T"]
unperturbed_data = train[((train.obs["cell_type"] == "CD4T") & (train.obs["condition"] == "control"))]
fake_labels = np.ones((len(unperturbed_data), 1))
predicted_cells = network.predict(unperturbed_data, fake_labels)
adata = sc.AnnData(predicted_cells, obs={"condition": ["pred"]*len(fake_labels)})
adata.var_names = CD4T.var_names
all_adata = CD4T.concatenate(adata)
scgen.plotting.reg_mean_plot(all_adata, condition_key="condition",
axis_keys={"x": "pred", "y": "stimulated"},
gene_list= ["ISG15", "CD3D"],
path_to_save=f"figures/reg_mean_{z_dim}.pdf")
os.chdir(f"./results/{data_name}/{cell_type}")
# net_train_data = train[~((train.obs[cell_type_key] == cell_type) & (train.obs[condition_key] == stim_key))]
net_train_data = train
network = scgen.MMDCCVAE(x_dimension=(256, 256, 3,), z_dimension=z_dim, alpha=alpha, beta=beta,
batch_mmd=True, kernel=kernel, train_with_fake_labels=False,
model_path=f"./", arch_style=arch_style)
# network.restore_model()
network.train(net_train_data, n_epochs=n_epochs, batch_size=batch_size, verbose=1)
print(f"network_{cell_type} has been trained!")
true_labels, _ = scgen.label_encoder(net_train_data)
fake_labels = np.ones(shape=(net_train_data.shape[0], 1))
latent_with_true_labels = network.to_latent(net_train_data.X, labels=true_labels)
latent_with_true_labels = sc.AnnData(X=latent_with_true_labels,
obs={condition_key: net_train_data.obs[condition_key].tolist(),
cell_type_key: pd.Categorical(net_train_data.obs[cell_type_key])})
sc.pp.neighbors(latent_with_true_labels)
sc.tl.umap(latent_with_true_labels)
sc.pl.umap(latent_with_true_labels, color=[condition_key, cell_type_key],
save=f"_latent_true_labels_{z_dim}",
show=False)
latent_with_fake_labels = network.to_latent(net_train_data.X, fake_labels)
latent_with_fake_labels = sc.AnnData(X=latent_with_fake_labels,
obs={condition_key: net_train_data.obs[condition_key].tolist(),
cell_type_key: pd.Categorical(net_train_data.obs[cell_type_key])})
sc.pp.neighbors(latent_with_fake_labels)
sc.tl.umap(latent_with_fake_labels)
sc.pl.umap(latent_with_fake_labels, color=[condition_key, cell_type_key],
save=f"_latent_fake_labels_{z_dim}",
pred_adatas = pred_adata
else:
pred_adatas = pred_adatas.concatenate(pred_adata)
pred_adatas.write_h5ad(filename=f"../data/reconstructed/RCVAEMulti/{data_name}.h5ad")
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)
color = [condition_key, cell_type_key]
latent_with_true_labels = sc.AnnData(X=latent_with_true_labels)
latent_with_true_labels.obs[condition_key] = data.obs[condition_key].values
latent_with_true_labels.obs[cell_type_key] = data.obs[cell_type_key].values
latent_with_fake_labels = [sc.AnnData(X=latent_with_fake_labels[i]) for i in range(n_conditions)]
for i in range(n_conditions):
latent_with_fake_labels[i].obs[condition_key] = data.obs[condition_key].values
latent_with_fake_labels[i].obs[cell_type_key] = data.obs[cell_type_key].values
sc.pp.neighbors(latent_with_fake_labels[i])
sc.tl.umap(latent_with_fake_labels[i])
sc.pl.umap(latent_with_fake_labels[i], color=color,
save=f"_{data_name}_{cell_type}_latent_with_fake_labels_{i}",
show=False,
wspace=0.15,
frameon=False)
mmd_latent_with_true_labels = sc.AnnData(X=mmd_latent_with_true_labels)
mmd_latent_with_true_labels.obs[condition_key] = data.obs[condition_key].values
mmd_latent_with_true_labels.obs[cell_type_key] = data.obs[cell_type_key].values
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)
latent_with_true_labels = sc.AnnData(X=latent_with_true_labels)
latent_with_true_labels.obs['condition'] = data.obs['condition'].values
# latent_with_true_labels.obs[cell_type_key] = data.obs[cell_type_key].values
latent_with_fake_labels = sc.AnnData(X=latent_with_fake_labels)
latent_with_fake_labels.obs['condition'] = data.obs['condition'].values
# latent_with_fake_labels.obs[cell_type_key] = data.obs[cell_type_key].values
mmd_latent_with_true_labels = sc.AnnData(X=mmd_latent_with_true_labels)
mmd_latent_with_true_labels.obs['condition'] = data.obs['condition'].values
# mmd_latent_with_true_labels.obs[cell_type_key] = data.obs[cell_type_key].values
mmd_latent_with_fake_labels = sc.AnnData(X=mmd_latent_with_fake_labels)
mmd_latent_with_fake_labels.obs['condition'] = data.obs['condition'].values
# mmd_latent_with_fake_labels.obs[cell_type_key] = data.obs[cell_type_key].values
color = ['condition']
sc.pp.neighbors(data)
sc.tl.umap(data)
sc.pl.umap(data, color=color,
save=f'_{data_name}_train_data',
show=False)
sc.pp.neighbors(latent_with_true_labels)
sc.tl.umap(latent_with_true_labels)
sc.pl.umap(latent_with_true_labels, color=color,
save=f"_{data_name}_latent_with_true_labels",
show=False)
network = trvae.VAE(x_dimension=data.shape[1],
z_dimension=z_dim,
arch_style=arch_style,
model_path=f"../models/VAE/{data_name}-{arch_style}/{z_dim}/", )
network.restore_model()
if sparse.issparse(data.X):
data.X = data.X.A
feed_data = data.X
latent = network.to_latent(feed_data)
latent = sc.AnnData(X=latent)
latent.obs[cell_type_key] = data.obs[cell_type_key].values
color = [cell_type_key]
sc.pp.neighbors(train_data)
sc.tl.umap(train_data)
sc.pl.umap(train_data, color=color,
save=f'_{data_name}_train_data.pdf',
show=False)
sc.pp.neighbors(latent)
sc.tl.umap(latent)
sc.pl.umap(latent, color=color,
save=f"_{data_name}_latent.pdf",
show=False)
top_100_genes=top_100_genes,
gene_list=gene_list,
condition_key='condition',
axis_keys={"x": 'predicted', 'y': target_key},
labels={'x': 'pred stim', 'y': 'real stim'},
legend=False,
fontsize=20,
textsize=14,
title=cell_type,
path_to_save=os.path.join(path_to_save,
f'rcvae_reg_var_{data_name}_{cell_type}.pdf'))
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)
latent_with_true_labels = sc.AnnData(X=latent_with_true_labels)
latent_with_true_labels.obs['condition'] = data.obs['condition'].values
latent_with_true_labels.obs[cell_type_key] = data.obs[cell_type_key].values
latent_with_fake_labels = sc.AnnData(X=latent_with_fake_labels)
latent_with_fake_labels.obs['condition'] = data.obs['condition'].values
latent_with_fake_labels.obs[cell_type_key] = data.obs[cell_type_key].values
mmd_latent_with_true_labels = sc.AnnData(X=mmd_latent_with_true_labels)
mmd_latent_with_true_labels.obs['condition'] = data.obs['condition'].values
mmd_latent_with_true_labels.obs[cell_type_key] = data.obs[cell_type_key].values
mmd_latent_with_fake_labels = sc.AnnData(X=mmd_latent_with_fake_labels)
mmd_latent_with_fake_labels.obs['condition'] = data.obs['condition'].values
mmd_latent_with_fake_labels.obs[cell_type_key] = data.obs[cell_type_key].values
color = ['condition', cell_type_key]
sc.pl.violin(all_adata, keys=diff_genes.tolist()[0], groupby=condition_key,
save=f"_{diff_genes.tolist()[0]}",
show=False)
plt.close("all")
elif isinstance(network, scgen.CVAE):
true_labels, _ = scgen.label_encoder(train)
if sparse.issparse(train.X):
latent = network.to_latent(train.X.A, labels=true_labels)
else:
latent = network.to_latent(train.X, labels=true_labels)
latent = sc.AnnData(X=latent,
obs={condition_key: train.obs[condition_key].tolist(),
cell_type_key: train.obs[cell_type_key].tolist()})
if plot_umap:
sc.pp.neighbors(latent)
sc.tl.umap(latent)
sc.pl.umap(latent, color=[condition_key, cell_type_key],
save=f"_latent",
show=False)
cell_type_data = train[train.obs[cell_type_key] == cell_type]
fake_labels = np.ones(shape=(cell_type_data.shape[0], 1))
pred = network.predict(data=cell_type_data, labels=fake_labels)
pred_adata = anndata.AnnData(pred, obs={condition_key: ["pred"] * len(pred)},
var={"var_names": cell_type_data.var_names})
"Choose one of the following flavors: 'seurat_v2', 'seurat_v3', 'cell_ranger', 'poisson_zeros'"
)
if flavor == "seurat_v3" and n_top_genes is None:
raise ValueError("n_top_genes must not be None with flavor=='seurat_v3'")
logger.info("extracting highly variable genes using {} flavor".format(flavor))
# Creating AnnData structure
obs = pd.DataFrame(
data=dict(batch=self.batch_indices.squeeze()),
index=np.arange(self.nb_cells),
).astype("category")
counts = sp_sparse.csc_matrix(self.X.copy())
adata = sc.AnnData(X=counts, obs=obs)
batch_key = "batch" if (batch_correction and self.n_batches >= 2) else None
if flavor in ["cell_ranger", "seurat_v2"]:
if flavor == "seurat_v2":
# name expected by scanpy
flavor = "seurat"
# Counts normalization
sc.pp.normalize_total(adata, target_sum=1e4)
# logarithmed data
sc.pp.log1p(adata)
# Finding top genes
sc.pp.highly_variable_genes(
adata=adata,
n_top_genes=n_top_genes,
flavor=flavor,
if issparse(X):
X = np.array(X.todense())
if np.allclose(X, X.astype(np.int)):
X = X.astype(np.int)
else:
raise TypeError(
"Molecular cross-validation requires integer count data.")
if random_seed:
np.random.seed(random_seed)
X1 = np.random.binomial(X, p).astype(np.float)
X2 = X - X1
adata1 = sc.AnnData(X=X1)
adata2 = sc.AnnData(X=X2)
return adata1, adata2