Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from deepchem.models import GraphConvModel
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc
from deepchem.molnet import load_chembl
# Load ChEMBL dataset
chembl_tasks, datasets, transformers = load_chembl(
shard_size=2000, featurizer="GraphConv", set="5thresh", split="random")
train_dataset, valid_dataset, test_dataset = datasets
# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)
# Do setup required for tf/keras models
# Number of features on conv-mols
n_feat = 75
# Batch size of models
batch_size = 128
model = GraphConvModel(
len(chembl_tasks), batch_size=batch_size, mode='regression')
# Fit trained model
model.fit(train_dataset, nb_epoch=20)
print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)
# Batch size of models
max_atoms = 26
batch_size = 128
layer_structures = [128, 128, 64]
atom_number_cases = [1, 6, 7, 8, 9]
ANItransformer = dc.trans.ANITransformer(
max_atoms=max_atoms, atom_cases=atom_number_cases)
train_dataset = ANItransformer.transform(train_dataset)
valid_dataset = ANItransformer.transform(valid_dataset)
test_dataset = ANItransformer.transform(test_dataset)
n_feat = ANItransformer.get_num_feats() - 1
# Fit models
metric = [
dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]
model = dc.models.ANIRegression(
len(tasks),
max_atoms,
n_feat,
layer_structures=layer_structures,
atom_number_cases=atom_number_cases,
batch_size=batch_size,
learning_rate=0.001,
use_queue=False,
mode="regression")
# Fit trained model
model.fit(train_dataset, nb_epoch=300, checkpoint_interval=100)
from __future__ import unicode_literals
import numpy as np
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc
# Load Tox21 dataset
tasks, datasets, transformers = dc.molnet.load_qm7_from_mat(
featurizer='GraphConv', move_mean=True)
train_dataset, valid_dataset, test_dataset = datasets
# Fit models
metric = [
dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]
# Batch size of models
batch_size = 64
model = dc.models.GraphConvModel(
len(tasks), batch_size=batch_size, learning_rate=0.001, mode="regression")
# Fit trained model
model.fit(train_dataset, nb_epoch=50)
print("Evaluating model")
train_scores = model.evaluate(train_dataset, metric, transformers)
valid_scores = model.evaluate(valid_dataset, metric, transformers)
n_neg = 1
# Set batch sizes for network
test_batch_size = 128
support_batch_size = n_pos + n_neg
nb_epochs = 1
n_train_trials = 2000
n_eval_trials = 20
learning_rate = 1e-4
log_every_n_samples = 50
# Number of features on conv-mols
n_feat = 75
tox21_tasks, dataset, transformers = load_tox21_convmol()
# Define metric
metric = dc.metrics.Metric(dc.metrics.roc_auc_score, mode="classification")
task_splitter = dc.splits.TaskSplitter()
fold_datasets = task_splitter.k_fold_split(dataset, K)
train_folds = fold_datasets[:-1]
train_dataset = dc.splits.merge_fold_datasets(train_folds)
test_dataset = fold_datasets[-1]
# Train support model on train
support_model = dc.nn.SequentialSupportGraph(n_feat)
# Add layers
support_model.add(dc.nn.GraphConv(64, n_feat, activation='relu'))
support_model.add(dc.nn.GraphPool())
support_model.add(dc.nn.GraphConv(128, 64, activation='relu'))
support_model.add(dc.nn.GraphPool())
bias_init_consts=[0., 0., 0.],
penalty=penalty,
penalty_type=penalty_type,
dropouts=dropouts,
learning_rate=0.002,
momentum=0.8,
optimizer="adam",
batch_size=64,
conv_layers=1,
boxsize=None,
verbose=True,
seed=seed)
model.fit(train_dataset, nb_epoch=100)
metric = [
dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]
train_evaluator = dc.utils.evaluate.Evaluator(model, train_dataset,
transformers)
train_scores = train_evaluator.compute_model_performance(
metric,
csv_out="train_predict_ac_stratified.csv",
stats_out="train_stats_ac_stratified.csv")
print("Train scores")
print(train_scores)
test_evaluator = dc.utils.evaluate.Evaluator(model, test_dataset, transformers)
test_scores = test_evaluator.compute_model_performance(
metric,
csv_out="test_predict_ac_stratified.csv",
stats_out="test_stats_ac_stratified.csv")
print("Test scores")
print(test_scores)
bias_init_consts=[0., 0., 0.],
penalty=penalty,
penalty_type=penalty_type,
dropouts=dropouts,
learning_rate=0.002,
momentum=0.8,
optimizer="adam",
batch_size=64,
conv_layers=1,
boxsize=None,
verbose=True,
seed=seed)
model.fit(train_dataset, nb_epoch=100)
metric = [
dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]
train_evaluator = dc.utils.evaluate.Evaluator(model, train_dataset,
transformers)
train_scores = train_evaluator.compute_model_performance(
metric,
csv_out="train_predict_ac_scaffold.csv",
stats_out="train_stats_ac_scaffold.csv")
print("Train scores")
print(train_scores)
test_evaluator = dc.utils.evaluate.Evaluator(model, test_dataset, transformers)
test_scores = test_evaluator.compute_model_performance(
metric,
csv_out="test_predict_ac_scaffold.csv",
stats_out="test_stats_ac_scaffold.csv")
print("Test scores")
print(test_scores)
print("Valid Data - added RP")
new_test_data = generate_new_X(test_dataset.X)
new_test_dataset = dc.data.datasets.DiskDataset.from_numpy(new_test_data, test_dataset.y, test_dataset.w ,test_dataset.ids, data_dir=None)
print("Test Data - added RP")
else :
new_train_data = generate_new_X(train_dataset.X[:3800])
new_train_dataset = dc.data.datasets.DiskDataset.from_numpy(new_train_data, train_dataset.y[:3800], train_dataset.w[:3800] ,train_dataset.ids[:3800], data_dir=None)
print("Train Data - added RP - tox21")
new_valid_data = generate_new_X(train_dataset.X[3800:5000])
new_valid_dataset = dc.data.datasets.DiskDataset.from_numpy(new_valid_data, train_dataset.y[3800:5000], train_dataset.w[3800:5000] ,train_dataset.ids[3800:5000], data_dir=None)
print("Valid Data - added RP - tox21")
new_test_data = generate_new_X(train_dataset.X[5000:])
new_test_dataset = dc.data.datasets.DiskDataset.from_numpy(new_test_data, train_dataset.y[5000:], train_dataset.w[5000:] ,train_dataset.ids[5000:], data_dir=None)
print("Test Data - added RP - tox21")
metric = dc.metrics.Metric(
dc.metrics.roc_auc_score, np.mean, mode="classification")
best_auc_score = 0.0
for i in range(NUM_EPOCHS):
print("Epoch Num: ", i)
sys.stdout.flush()
tg.fit_generator(data_generator(new_train_dataset, epochs=1))
if TASK != 'tox_21':
new_train_data = generate_new_X(train_dataset.X)
new_train_dataset = dc.data.datasets.DiskDataset.from_numpy(new_train_data, train_dataset.y, train_dataset.w ,train_dataset.ids, data_dir=None)
else :
new_train_data = generate_new_X(train_dataset.X[:3800])
new_train_dataset = dc.data.datasets.DiskDataset.from_numpy(new_train_data, train_dataset.y[:3800], train_dataset.w[:3800] ,train_dataset.ids[:3800], data_dir=None)
print("Validation Loss")
valid_predictions = tg.predict_on_generator(data_generator(new_valid_dataset, predict=True))
valid_predictions = reshape_y_pred(new_valid_dataset.y, valid_predictions)
from __future__ import division
from __future__ import unicode_literals
import numpy as np
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc
from deepchem.molnet import load_sampl
# Load SAMPL(FreeSolv) dataset
SAMPL_tasks, SAMPL_datasets, transformers = load_sampl(featurizer='GraphConv')
train_dataset, valid_dataset, test_dataset = SAMPL_datasets
# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)
# Do setup required for tf/keras models
# Number of features on conv-mols
n_feat = 75
# Batch size of models
batch_size = 128
graph_model = dc.nn.SequentialGraph(n_feat)
graph_model.add(dc.nn.GraphConv(128, n_feat, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphPool())
graph_model.add(dc.nn.GraphConv(128, 128, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphPool())
# Gather Projection
graph_model.add(dc.nn.Dense(256, 128, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
import numpy as np
import pandas as pd
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc
from datasets import load_delaney
import timeit
# Load Delaney dataset
delaney_tasks, delaney_datasets, transformers = load_delaney(
featurizer='GraphConv', split='random')
train_dataset, valid_dataset, test_dataset = delaney_datasets
# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)
# Do setup required for tf/keras models
# Number of features on conv-mols
n_feat = 75
# Batch size of models
batch_size = 128
graph_model = dc.nn.SequentialGraph(n_feat)
graph_model.add(dc.nn.GraphConv(128, n_feat, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphPool())
graph_model.add(dc.nn.GraphConv(128, 128, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphPool())
# Gather Projection
graph_model.add(dc.nn.Dense(256, 128, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))