Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
import os
import numpy as np
import shutil
import deepchem as dc
from deepchem.molnet import load_muv
np.random.seed(123)
# Load MUV data
muv_tasks, muv_datasets, transformers = load_muv()
train_dataset, valid_dataset, test_dataset = muv_datasets
# Build model
metric = dc.metrics.Metric(
dc.metrics.roc_auc_score, np.mean, mode="classification")
rate = dc.models.optimizers.ExponentialDecay(0.001, 0.8, 1000)
model = dc.models.MultitaskClassifier(
len(muv_tasks),
n_features=1024,
dropouts=[.25],
learning_rate=rate,
weight_init_stddevs=[.1],
batch_size=64,
verbosity="high")
# Fit trained model
model.fit(train_dataset)
# Evaluate train/test scores
train_scores = model.evaluate(train_dataset, [metric], transformers)
all_results = []
for trial in range(num_trials):
###Create model###
n_layers = 3
nb_epoch = 50
model = dc.models.TensorflowMultiTaskRegressor(
len(KINASE_tasks), train_dataset.get_data_shape()[0],
layer_sizes=[1000]*n_layers, dropouts=[.25]*n_layers,
weight_init_stddevs=[.02]*n_layers,
bias_init_consts=[.5]*n_layers, learning_rate=.0003,
penalty=.0001, penalty_type="l2", optimizer="adam", batch_size=100,
verbosity="high")
#Use R2 classification metric
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, task_averager=np.mean)
print("Training model")
model.fit(train_dataset, nb_epoch=nb_epoch, max_checkpoints_to_keep=1)
print("Evaluating models")
train_score, train_task_scores = model.evaluate(
train_dataset, [metric], transformers, per_task_metrics=True)
valid_score, valid_task_scores = model.evaluate(
valid_dataset, [metric], transformers, per_task_metrics=True)
test_score, test_task_scores = model.evaluate(
test_dataset, [metric], transformers, per_task_metrics=True)
all_results.append((train_score, train_task_scores,
valid_score, valid_task_scores,
test_score, test_task_scores))
# Set some global variables up top
verbosity = "high"
base_dir = "/tmp/nci_rf"
model_dir = os.path.join(base_dir, "model")
if os.path.exists(base_dir):
shutil.rmtree(base_dir)
os.makedirs(base_dir)
nci_tasks, nci_dataset, transformers = load_nci(
base_dir)
(train_dataset, valid_dataset, test_dataset) = nci_dataset
classification_metric = Metric(metrics.roc_auc_score, np.mean,
verbosity=verbosity,
mode="classification")
def model_builder(model_dir):
sklearn_model = RandomForestRegressor(n_estimators=500)
return SklearnModel(sklearn_model, model_dir)
model = SingletaskToMultitask(nci_tasks, model_builder, model_dir)
# Fit trained model
model.fit(train_dataset)
model.save()
train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity)
train_scores = train_evaluator.compute_model_performance([classification_metric])
print("Train scores")
print(train_scores)
from deepchem.data.datasets import DiskDataset
from sklearn.model_selection import KFold
# Load Tox21 dataset
tox21_tasks, tox21_datasets, transformers = load_tox21(
featurizer='GraphConv', split='index')
train_dataset, valid_dataset, test_dataset = tox21_datasets
X = train_dataset.X
y = train_dataset.y
w = train_dataset.w
# Fit models
metric = dc.metrics.Metric(
dc.metrics.roc_auc_score, np.mean, mode="classification")
kf = KFold(n_splits=5, shuffle=True, random_state=123)
all_train_scores = []
all_test_scores = []
start = timeit.default_timer()
for train_index, test_index in kf.split(X):
train_dataset = DiskDataset.from_numpy(X[train_index], y[train_index, :], w[train_index, :], verbose=False)
test_dataset = DiskDataset.from_numpy(X[test_index], y[test_index, :], w[test_index, :], verbose=False)
# Number of features on conv-mols
n_feat = 75
# Batch size of models
from __future__ import division
from __future__ import unicode_literals
import numpy as np
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc
# Load tox21 dataset
tox21_tasks, tox21_datasets, transformers = dc.molnet.load_tox21(
featurizer='Weave', split='index')
train_dataset, valid_dataset, test_dataset = tox21_datasets
# Fit models
metric = dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean)
n_atom_feat = 75
n_pair_feat = 14
# Batch size of models
batch_size = 64
n_feat = 128
model = dc.models.WeaveTensorGraph(
len(tox21_tasks),
batch_size=batch_size,
learning_rate=1e-3,
use_queue=False,
mode='classification')
# Fit trained model
model.fit(train_dataset, nb_epoch=50)
__license__ = "MIT"
import os
import deepchem as dc
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from deepchem.molnet import load_pdbbind
# For stable runs
np.random.seed(123)
pdbbind_tasks, pdbbind_datasets, transformers = load_pdbbind(
featurizer="grid", split="random", subset="core")
train_dataset, valid_dataset, test_dataset = pdbbind_datasets
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)
current_dir = os.path.dirname(os.path.realpath(__file__))
model_dir = os.path.join(current_dir, "%s_%s_RF" % (split, subset))
sklearn_model = RandomForestRegressor(n_estimators=500)
model = dc.models.SklearnModel(sklearn_model, model_dir=model_dir)
# Fit trained model
print("Fitting model on train dataset")
model.fit(train_dataset)
model.save()
print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)
import numpy as np
import pandas as pd
import deepchem as dc
from datasets import load_delaney
import timeit
# Only for debug!
np.random.seed(123)
# Load Delaney dataset
delaney_tasks, delaney_datasets, transformers = load_delaney(
featurizer='ECFP', split='random')
train_dataset, valid_dataset, test_dataset = delaney_datasets
# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)
n_layers = 2
nb_epoch = 50
model = dc.models.TensorflowMultiTaskRegressor(
len(delaney_tasks), train_dataset.get_data_shape()[0],
layer_sizes=[1000]*n_layers, dropouts=[0.25]*n_layers,
weight_init_stddevs=[0.02]*n_layers,
bias_init_consts=[1.]*n_layers, learning_rate=0.0008,
penalty=0.0005, penalty_type="l2", optimizer="adam", batch_size=128,
seed=123, verbosity="high")
start = timeit.default_timer()
# Fit trained model
model.fit(train_dataset, nb_epoch=nb_epoch)
from __future__ import unicode_literals
import numpy as np
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc
# Load Tox21 dataset
tox21_tasks, tox21_datasets, transformers = dc.molnet.load_tox21(
featurizer='Weave')
train_dataset, valid_dataset, test_dataset = tox21_datasets
# Fit models
metric = dc.metrics.Metric(
dc.metrics.roc_auc_score, np.mean, mode="classification")
max_atoms_train = max([mol.get_num_atoms() for mol in train_dataset.X])
max_atoms_valid = max([mol.get_num_atoms() for mol in valid_dataset.X])
max_atoms_test = max([mol.get_num_atoms() for mol in test_dataset.X])
max_atoms = max([max_atoms_train, max_atoms_valid, max_atoms_test])
n_atom_feat = 75
n_pair_feat = 14
# Batch size of models
batch_size = 64
n_feat = 128
graph = dc.nn.AlternateSequentialWeaveGraph(
batch_size,
max_atoms=max_atoms,
n_atom_feat=n_atom_feat,
n_pair_feat=n_pair_feat)
FACTORS_tasks, datasets, transformers = load_factors(shard_size=shard_size)
train_dataset, valid_dataset, test_dataset = datasets
print("Number of compounds in train set")
print(len(train_dataset))
print("Number of compounds in validation set")
print(len(valid_dataset))
print("Number of compounds in test set")
print(len(test_dataset))
n_layers = 3
n_bypass_layers = 3
nb_epoch = 125
#Use R2 classification metric
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, task_averager=np.mean)
all_results = []
for trial in range(num_trials):
model = dc.models.RobustMultitaskRegressor(
len(FACTORS_tasks),
train_dataset.get_data_shape()[0],
layer_sizes=[1000] * n_layers,
bypass_layer_sizes=[100] * n_bypass_layers,
dropouts=[.25] * n_layers,
bypass_dropouts=[.25] * n_bypass_layers,
weight_init_stddevs=[.02] * n_layers,
bias_init_consts=[1.] * n_layers,
bypass_weight_init_stddevs=[.02] * n_bypass_layers,
bypass_bias_init_consts=[1.] * n_bypass_layers,
learning_rate=.0003,
weight_decay_penalty=.0001,
import deepchem as dc
from deepchem.models.graph_models import GraphConvModel
# 4-fold splits
K = 4
# num positive/negative ligands
n_pos = 10
n_neg = 10
# 10 trials on test-set
n_trials = 20
sider_tasks, fold_datasets, transformers = dc.molnet.load_sider(
featurizer='GraphConv', split="task")
# Define metric
metric = dc.metrics.Metric(dc.metrics.roc_auc_score, mode="classification")
train_folds = fold_datasets[:-1]
train_dataset = dc.splits.merge_fold_datasets(train_folds)
test_dataset = fold_datasets[-1]
# Get supports on test-set
support_generator = dc.data.SupportGenerator(test_dataset, n_pos, n_neg,
n_trials)
# Compute accuracies
task_scores = {task: [] for task in range(len(test_dataset.get_task_names()))}
for trial_num, (task, support) in enumerate(support_generator):
print("Starting trial %d" % trial_num)
# Number of features on conv-mols