Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
import shutil
import numpy as np
import deepchem as dc
from deepchem.molnet import load_hopv
# Only for debug!
np.random.seed(123)
# Load HOPV dataset
n_features = 1024
hopv_tasks, hopv_datasets, transformers = load_hopv()
train_dataset, valid_dataset, test_dataset = hopv_datasets
# Fit models
metric = [
dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean, mode="regression"),
dc.metrics.Metric(
dc.metrics.mean_absolute_error, np.mean, mode="regression")
]
model = dc.models.ProgressiveMultitaskRegressor(
len(hopv_tasks),
n_features,
layer_sizes=[1000],
dropouts=[.25],
learning_rate=0.001,
batch_size=50)
# Fit trained model
model.fit(train_dataset, nb_epoch=25)
print("Evaluating model")
import numpy as np
import tensorflow as tf
# For stable runs
np.random.seed(123)
tf.set_random_seed(123)
import deepchem as dc
from deepchem.molnet import load_pdbbind_grid
split = "random"
subset = "full"
pdbbind_tasks, pdbbind_datasets, transformers = load_pdbbind_grid(
split=split, subset=subset)
train_dataset, valid_dataset, test_dataset = pdbbind_datasets
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)
current_dir = os.path.dirname(os.path.realpath(__file__))
model_dir = os.path.join(current_dir, "%s_%s_DNN" % (split, subset))
n_features = train_dataset.X.shape[1]
model = dc.models.MultitaskRegressor(
len(pdbbind_tasks),
n_features,
logdir=model_dir,
dropouts=[.25],
learning_rate=0.0003,
weight_init_stddevs=[.1],
batch_size=64)
# Fit trained model
model.fit(train_dataset, nb_epoch=100)
print("Number of compounds in test set")
print(len(test_dataset))
###Create model###
n_layers = 2
nb_epoch = 50
model = dc.models.TensorflowMultiTaskRegressor(
len(chembl_tasks), train_dataset.get_data_shape()[0],
layer_sizes=[1000]*n_layers, dropouts=[0.25]*n_layers,
weight_init_stddevs=[0.02]*n_layers,
bias_init_consts=[1.]*n_layers, learning_rate=0.0008,
penalty=0.0005, penalty_type="l2", optimizer="adam", batch_size=128,
seed=123, verbosity="high")
#Use R2 classification metric
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, task_averager=np.mean)
start = timeit.default_timer()
print("Training model")
model.fit(train_dataset, nb_epoch=nb_epoch)
train_time = timeit.default_timer() - start
start = timeit.default_timer()
train_score, train_scores = model.evaluate(train_dataset, [metric], transformers, per_task_metrics=True)
valid_score, valid_scores = model.evaluate(valid_dataset, [metric], transformers, per_task_metrics=True)
test_score, test_scores = model.evaluate(test_dataset, [metric], transformers, per_task_metrics=True)
eval_time = timeit.default_timer() - start
for idx in ids:
new_metadata.append(metadata[idx])
return new_metadata
if __name__ == "__main__":
max_atoms = 23
batch_size = 64 # CHANGED FROM 16
layer_structures = [128, 128, 64]
atom_number_cases = [1, 6, 7, 8]
metric = [
dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]
print("Fitting new model...")
train_valid_dataset, test_dataset, all_groups = load_roiterberg_ANI(
mode="atomization")
splitter = dc.splits.RandomGroupSplitter(
broadcast(train_valid_dataset, all_groups))
print("Performing 1-fold split...")
train_dataset, valid_dataset = splitter.train_test_split(
train_valid_dataset, train_dir=train_dir, test_dir=valid_dir)
transformers = [
dc.trans.NormalizationTransformer(
dropouts=dropouts,
learning_rate=learning_rate,
momentum=momentum,
optimizer="adam",
batch_size=batch_size,
conv_layers=1,
boxsize=None,
verbose=True,
seed=seed)
# Fit model
model.fit(train_dataset, nb_epoch=10)
# Evaluate model
metric = [
dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression"),
dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression")
]
train_evaluator = dc.utils.evaluate.Evaluator(model, train_dataset,
transformers)
train_scores = train_evaluator.compute_model_performance(metric)
print("Train scores")
print(train_scores)
test_evaluator = dc.utils.evaluate.Evaluator(model, test_dataset, transformers)
test_scores = test_evaluator.compute_model_performance(metric)
print("Test scores")
print(test_scores)
from __future__ import division
from __future__ import unicode_literals
import numpy as np
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc
from membrane_permeability_datasets import load_permeability
# Load Tox21 dataset
permeability_tasks, permeability_datasets, transformers = load_permeability(
featurizer='GraphConv')
train_dataset, valid_dataset, test_dataset = permeability_datasets
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)
n_atom_feat = 75
batch_size = 64
max_atoms_train = max([mol.get_num_atoms() for mol in train_dataset.X])
max_atoms_valid = max([mol.get_num_atoms() for mol in valid_dataset.X])
max_atoms_test = max([mol.get_num_atoms() for mol in test_dataset.X])
max_atoms = max([max_atoms_train, max_atoms_valid, max_atoms_test])
reshard_size = 512
transformer = dc.trans.DAGTransformer(max_atoms=max_atoms)
train_dataset.reshard(reshard_size)
train_dataset = transformer.transform(train_dataset)
valid_dataset.reshard(reshard_size)
valid_dataset = transformer.transform(valid_dataset)
num_trials = 5
print("About to load KAGGLE data.")
KAGGLE_tasks, datasets, transformers = load_kaggle(shard_size=shard_size)
train_dataset, valid_dataset, test_dataset = datasets
print("Number of compounds in train set")
print(len(train_dataset))
print("Number of compounds in validation set")
print(len(valid_dataset))
print("Number of compounds in test set")
print(len(test_dataset))
num_features = train_dataset.get_data_shape()[0]
print("Num features: %d" % num_features)
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, task_averager=np.mean)
def task_model_builder(model_dir):
sklearn_model = RandomForestRegressor(
#n_estimators=100, max_features=int(num_features/3),
n_estimators=1,
max_features=int(num_features / 3),
min_samples_split=5,
n_jobs=-1)
return dc.models.SklearnModel(sklearn_model, model_dir)
all_results = []
for trial in range(num_trials):
print("Starting trial %d" % trial)
model = dc.models.SingletaskToMultitask(KAGGLE_tasks, task_model_builder)
from __future__ import division
from __future__ import unicode_literals
import numpy as np
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc
# Load Delaney dataset
delaney_tasks, delaney_datasets, transformers = dc.molnet.load_delaney(
featurizer='Weave', split='index')
train_dataset, valid_dataset, test_dataset = delaney_datasets
# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)
n_atom_feat = 75
n_pair_feat = 14
# Batch size of models
batch_size = 64
model = dc.models.MPNNModel(
len(delaney_tasks),
n_atom_feat=n_atom_feat,
n_pair_feat=n_pair_feat,
T=3,
M=5,
batch_size=batch_size,
learning_rate=0.0001,
use_queue=False,
mode="regression")
from __future__ import unicode_literals
import os
import deepchem as dc
import numpy as np
from deepchem.molnet import load_qm7_from_mat
from deepchem.models.optimizers import ExponentialDecay
np.random.seed(123)
qm7_tasks, datasets, transformers = load_qm7_from_mat(
split='stratified', move_mean=True)
train_dataset, valid_dataset, test_dataset = datasets
fit_transformers = [dc.trans.CoulombFitTransformer(train_dataset)]
metric = [
dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]
rate = 0.001
model = dc.models.MultitaskFitTransformRegressor(
n_tasks=1,
n_features=[23, 23],
learning_rate=rate,
momentum=.8,
batch_size=25,
weight_init_stddevs=[1 / np.sqrt(400), 1 / np.sqrt(100), 1 / np.sqrt(100)],
bias_init_consts=[0., 0., 0.],
layer_sizes=[400, 100, 100],
dropouts=[0.01, 0.01, 0.01],
fit_transformers=fit_transformers,
seed=123)
KINASE_tasks, datasets, transformers = load_kinase(shard_size=shard_size)
train_dataset, valid_dataset, test_dataset = datasets
print("Number of compounds in train set")
print(len(train_dataset))
print("Number of compounds in validation set")
print(len(valid_dataset))
print("Number of compounds in test set")
print(len(test_dataset))
n_layers = 3
n_bypass_layers = 3
nb_epoch = 50
#Use R2 classification metric
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, task_averager=np.mean)
all_results = []
for trial in range(num_trials):
model = dc.models.RobustMultitaskRegressor(
len(KINASE_tasks),
train_dataset.get_data_shape()[0],
layer_sizes=[500] * n_layers,
bypass_layer_sizes=[50] * n_bypass_layers,
dropouts=[.25] * n_layers,
bypass_dropouts=[.25] * n_bypass_layers,
weight_init_stddevs=[.02] * n_layers,
bias_init_consts=[.5] * n_layers,
bypass_weight_init_stddevs=[.02] * n_bypass_layers,
bypass_bias_init_consts=[.5] * n_bypass_layers,
learning_rate=.0003,
weight_decay_penalty=.0001,