Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_load_singleton_csv(self):
fin = tempfile.NamedTemporaryFile(mode='w', delete=False)
fin.write("smiles,endpoint\nc1ccccc1,1")
fin.close()
print(fin.name)
featurizer = dc.feat.CircularFingerprint(size=1024)
tasks = ["endpoint"]
loader = dc.data.CSVLoader(
tasks=tasks, smiles_field="smiles", featurizer=featurizer)
X = loader.featurize(fin.name)
self.assertEqual(1, len(X))
os.remove(fin.name)
from deepchem.models import GraphConvModel
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc
from deepchem.molnet import load_chembl
# Load ChEMBL dataset
chembl_tasks, datasets, transformers = load_chembl(
shard_size=2000, featurizer="GraphConv", set="5thresh", split="random")
train_dataset, valid_dataset, test_dataset = datasets
# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)
# Do setup required for tf/keras models
# Number of features on conv-mols
n_feat = 75
# Batch size of models
batch_size = 128
model = GraphConvModel(
len(chembl_tasks), batch_size=batch_size, mode='regression')
# Fit trained model
model.fit(train_dataset, nb_epoch=20)
print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)
# Batch size of models
max_atoms = 26
batch_size = 128
layer_structures = [128, 128, 64]
atom_number_cases = [1, 6, 7, 8, 9]
ANItransformer = dc.trans.ANITransformer(
max_atoms=max_atoms, atom_cases=atom_number_cases)
train_dataset = ANItransformer.transform(train_dataset)
valid_dataset = ANItransformer.transform(valid_dataset)
test_dataset = ANItransformer.transform(test_dataset)
n_feat = ANItransformer.get_num_feats() - 1
# Fit models
metric = [
dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]
model = dc.models.ANIRegression(
len(tasks),
max_atoms,
n_feat,
layer_structures=layer_structures,
atom_number_cases=atom_number_cases,
batch_size=batch_size,
learning_rate=0.001,
use_queue=False,
mode="regression")
# Fit trained model
model.fit(train_dataset, nb_epoch=300, checkpoint_interval=100)
import shutil
import numpy as np
import deepchem as dc
from deepchem.molnet import load_hopv
# Only for debug!
np.random.seed(123)
# Load HOPV dataset
n_features = 1024
hopv_tasks, hopv_datasets, transformers = load_hopv()
train_dataset, valid_dataset, test_dataset = hopv_datasets
# Fit models
metric = [
dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean, mode="regression"),
dc.metrics.Metric(
dc.metrics.mean_absolute_error, np.mean, mode="regression")
]
model = dc.models.ProgressiveMultitaskRegressor(
len(hopv_tasks),
n_features,
layer_sizes=[1000],
dropouts=[.25],
learning_rate=0.001,
batch_size=50)
# Fit trained model
model.fit(train_dataset, nb_epoch=25)
print("Evaluating model")
from __future__ import unicode_literals
import numpy as np
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc
# Load Tox21 dataset
tasks, datasets, transformers = dc.molnet.load_qm7_from_mat(
featurizer='GraphConv', move_mean=True)
train_dataset, valid_dataset, test_dataset = datasets
# Fit models
metric = [
dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]
# Batch size of models
batch_size = 64
model = dc.models.GraphConvModel(
len(tasks), batch_size=batch_size, learning_rate=0.001, mode="regression")
# Fit trained model
model.fit(train_dataset, nb_epoch=50)
print("Evaluating model")
train_scores = model.evaluate(train_dataset, metric, transformers)
valid_scores = model.evaluate(valid_dataset, metric, transformers)
n_neg = 1
# Set batch sizes for network
test_batch_size = 128
support_batch_size = n_pos + n_neg
nb_epochs = 1
n_train_trials = 2000
n_eval_trials = 20
learning_rate = 1e-4
log_every_n_samples = 50
# Number of features on conv-mols
n_feat = 75
tox21_tasks, dataset, transformers = load_tox21_convmol()
# Define metric
metric = dc.metrics.Metric(dc.metrics.roc_auc_score, mode="classification")
task_splitter = dc.splits.TaskSplitter()
fold_datasets = task_splitter.k_fold_split(dataset, K)
train_folds = fold_datasets[:-1]
train_dataset = dc.splits.merge_fold_datasets(train_folds)
test_dataset = fold_datasets[-1]
# Train support model on train
support_model = dc.nn.SequentialSupportGraph(n_feat)
# Add layers
support_model.add(dc.nn.GraphConv(64, n_feat, activation='relu'))
support_model.add(dc.nn.GraphPool())
support_model.add(dc.nn.GraphConv(128, 64, activation='relu'))
support_model.add(dc.nn.GraphPool())
bias_init_consts=[0., 0., 0.],
penalty=penalty,
penalty_type=penalty_type,
dropouts=dropouts,
learning_rate=0.002,
momentum=0.8,
optimizer="adam",
batch_size=64,
conv_layers=1,
boxsize=None,
verbose=True,
seed=seed)
model.fit(train_dataset, nb_epoch=100)
metric = [
dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]
train_evaluator = dc.utils.evaluate.Evaluator(model, train_dataset,
transformers)
train_scores = train_evaluator.compute_model_performance(
metric,
csv_out="train_predict_ac_stratified.csv",
stats_out="train_stats_ac_stratified.csv")
print("Train scores")
print(train_scores)
test_evaluator = dc.utils.evaluate.Evaluator(model, test_dataset, transformers)
test_scores = test_evaluator.compute_model_performance(
metric,
csv_out="test_predict_ac_stratified.csv",
stats_out="test_stats_ac_stratified.csv")
print("Test scores")
print(test_scores)
bias_init_consts=[0., 0., 0.],
penalty=penalty,
penalty_type=penalty_type,
dropouts=dropouts,
learning_rate=0.002,
momentum=0.8,
optimizer="adam",
batch_size=64,
conv_layers=1,
boxsize=None,
verbose=True,
seed=seed)
model.fit(train_dataset, nb_epoch=100)
metric = [
dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]
train_evaluator = dc.utils.evaluate.Evaluator(model, train_dataset,
transformers)
train_scores = train_evaluator.compute_model_performance(
metric,
csv_out="train_predict_ac_scaffold.csv",
stats_out="train_stats_ac_scaffold.csv")
print("Train scores")
print(train_scores)
test_evaluator = dc.utils.evaluate.Evaluator(model, test_dataset, transformers)
test_scores = test_evaluator.compute_model_performance(
metric,
csv_out="test_predict_ac_scaffold.csv",
stats_out="test_stats_ac_scaffold.csv")
print("Test scores")
print(test_scores)
import os
import numpy as np
import shutil
import deepchem as dc
from deepchem.molnet import load_muv
np.random.seed(123)
# Load MUV data
muv_tasks, muv_datasets, transformers = load_muv()
train_dataset, valid_dataset, test_dataset = muv_datasets
# Build model
metric = dc.metrics.Metric(
dc.metrics.roc_auc_score, np.mean, mode="classification")
rate = dc.models.optimizers.ExponentialDecay(0.001, 0.8, 1000)
model = dc.models.MultitaskClassifier(
len(muv_tasks),
n_features=1024,
dropouts=[.25],
learning_rate=rate,
weight_init_stddevs=[.1],
batch_size=64,
verbosity="high")
# Fit trained model
model.fit(train_dataset)
# Evaluate train/test scores
train_scores = model.evaluate(train_dataset, [metric], transformers)
all_results = []
for trial in range(num_trials):
###Create model###
n_layers = 3
nb_epoch = 50
model = dc.models.TensorflowMultiTaskRegressor(
len(KINASE_tasks), train_dataset.get_data_shape()[0],
layer_sizes=[1000]*n_layers, dropouts=[.25]*n_layers,
weight_init_stddevs=[.02]*n_layers,
bias_init_consts=[.5]*n_layers, learning_rate=.0003,
penalty=.0001, penalty_type="l2", optimizer="adam", batch_size=100,
verbosity="high")
#Use R2 classification metric
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, task_averager=np.mean)
print("Training model")
model.fit(train_dataset, nb_epoch=nb_epoch, max_checkpoints_to_keep=1)
print("Evaluating models")
train_score, train_task_scores = model.evaluate(
train_dataset, [metric], transformers, per_task_metrics=True)
valid_score, valid_task_scores = model.evaluate(
valid_dataset, [metric], transformers, per_task_metrics=True)
test_score, test_task_scores = model.evaluate(
test_dataset, [metric], transformers, per_task_metrics=True)
all_results.append((train_score, train_task_scores,
valid_score, valid_task_scores,
test_score, test_task_scores))