How to use the deepchem.metrics function in deepchem

To help you get started, we’ve selected a few deepchem examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github deepchem / deepchem / examples / muv / muv_tf.py View on Github external
import os
import numpy as np
import shutil
import deepchem as dc
from deepchem.molnet import load_muv

np.random.seed(123)

# Load MUV data
muv_tasks, muv_datasets, transformers = load_muv()
train_dataset, valid_dataset, test_dataset = muv_datasets

# Build model
metric = dc.metrics.Metric(
    dc.metrics.roc_auc_score, np.mean, mode="classification")

rate = dc.models.optimizers.ExponentialDecay(0.001, 0.8, 1000)
model = dc.models.MultitaskClassifier(
    len(muv_tasks),
    n_features=1024,
    dropouts=[.25],
    learning_rate=rate,
    weight_init_stddevs=[.1],
    batch_size=64,
    verbosity="high")

# Fit trained model
model.fit(train_dataset)

# Evaluate train/test scores
train_scores = model.evaluate(train_dataset, [metric], transformers)
github deepchem / deepchem / examples / kinase / KINASE_tf_model.py View on Github external
all_results = []
for trial in range(num_trials):
  ###Create model###
  n_layers = 3
  nb_epoch = 50
  model = dc.models.TensorflowMultiTaskRegressor(
      len(KINASE_tasks), train_dataset.get_data_shape()[0],
      layer_sizes=[1000]*n_layers, dropouts=[.25]*n_layers,
      weight_init_stddevs=[.02]*n_layers,
      bias_init_consts=[.5]*n_layers, learning_rate=.0003,
      penalty=.0001, penalty_type="l2", optimizer="adam", batch_size=100,
      verbosity="high")

  #Use R2 classification metric
  metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, task_averager=np.mean)

  print("Training model")
  model.fit(train_dataset, nb_epoch=nb_epoch, max_checkpoints_to_keep=1)

  print("Evaluating models")
  train_score, train_task_scores = model.evaluate(
      train_dataset, [metric], transformers, per_task_metrics=True)
  valid_score, valid_task_scores = model.evaluate(
      valid_dataset, [metric], transformers, per_task_metrics=True)
  test_score, test_task_scores = model.evaluate(
      test_dataset, [metric], transformers, per_task_metrics=True)

  all_results.append((train_score, train_task_scores,
                      valid_score, valid_task_scores,
                      test_score, test_task_scores))
github deepchem / deepchem / examples / nci / nci_rf.py View on Github external
# Set some global variables up top
verbosity = "high"

base_dir = "/tmp/nci_rf"
model_dir = os.path.join(base_dir, "model")
if os.path.exists(base_dir):
  shutil.rmtree(base_dir)
os.makedirs(base_dir)

nci_tasks, nci_dataset, transformers = load_nci(
    base_dir)

(train_dataset, valid_dataset, test_dataset) = nci_dataset

classification_metric = Metric(metrics.roc_auc_score, np.mean,
                               verbosity=verbosity,
                               mode="classification")
def model_builder(model_dir):
  sklearn_model = RandomForestRegressor(n_estimators=500)
  return SklearnModel(sklearn_model, model_dir)
model = SingletaskToMultitask(nci_tasks, model_builder, model_dir)

# Fit trained model
model.fit(train_dataset)
model.save()

train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity)
train_scores = train_evaluator.compute_model_performance([classification_metric])

print("Train scores")
print(train_scores)
github taneishi / dlvs / graph_conv.py View on Github external
from deepchem.data.datasets import DiskDataset
from sklearn.model_selection import KFold

# Load Tox21 dataset
tox21_tasks, tox21_datasets, transformers = load_tox21(
        featurizer='GraphConv', split='index')
train_dataset, valid_dataset, test_dataset = tox21_datasets

X = train_dataset.X
y = train_dataset.y
w = train_dataset.w

# Fit models
metric = dc.metrics.Metric(
    dc.metrics.roc_auc_score, np.mean, mode="classification")

kf = KFold(n_splits=5, shuffle=True, random_state=123)

all_train_scores = []
all_test_scores = []

start = timeit.default_timer()

for train_index, test_index in kf.split(X):

    train_dataset = DiskDataset.from_numpy(X[train_index], y[train_index, :], w[train_index, :], verbose=False)
    test_dataset = DiskDataset.from_numpy(X[test_index], y[test_index, :], w[test_index, :], verbose=False)

    # Number of features on conv-mols
    n_feat = 75
    # Batch size of models
github deepchem / deepchem / examples / tox21 / tox21_tensorgraph_weave.py View on Github external
from __future__ import division
from __future__ import unicode_literals

import numpy as np
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc

# Load tox21 dataset
tox21_tasks, tox21_datasets, transformers = dc.molnet.load_tox21(
    featurizer='Weave', split='index')
train_dataset, valid_dataset, test_dataset = tox21_datasets

# Fit models
metric = dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean)

n_atom_feat = 75
n_pair_feat = 14
# Batch size of models
batch_size = 64
n_feat = 128

model = dc.models.WeaveTensorGraph(
    len(tox21_tasks),
    batch_size=batch_size,
    learning_rate=1e-3,
    use_queue=False,
    mode='classification')

# Fit trained model
model.fit(train_dataset, nb_epoch=50)
github deepchem / deepchem / examples / pdbbind / pdbbind_rf.py View on Github external
__license__ = "MIT"

import os
import deepchem as dc
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from deepchem.molnet import load_pdbbind

# For stable runs
np.random.seed(123)

pdbbind_tasks, pdbbind_datasets, transformers = load_pdbbind(
    featurizer="grid", split="random", subset="core")
train_dataset, valid_dataset, test_dataset = pdbbind_datasets

metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)

current_dir = os.path.dirname(os.path.realpath(__file__))
model_dir = os.path.join(current_dir, "%s_%s_RF" % (split, subset))

sklearn_model = RandomForestRegressor(n_estimators=500)
model = dc.models.SklearnModel(sklearn_model, model_dir=model_dir)

# Fit trained model
print("Fitting model on train dataset")
model.fit(train_dataset)
model.save()

print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)
github taneishi / dlvs / delaney / tf_models.py View on Github external
import numpy as np
import pandas as pd
import deepchem as dc
from datasets import load_delaney
import timeit

# Only for debug!
np.random.seed(123)

# Load Delaney dataset
delaney_tasks, delaney_datasets, transformers = load_delaney(
    featurizer='ECFP', split='random')
train_dataset, valid_dataset, test_dataset = delaney_datasets

# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)

n_layers = 2
nb_epoch = 50
model = dc.models.TensorflowMultiTaskRegressor(
    len(delaney_tasks), train_dataset.get_data_shape()[0],
    layer_sizes=[1000]*n_layers, dropouts=[0.25]*n_layers,
    weight_init_stddevs=[0.02]*n_layers,
    bias_init_consts=[1.]*n_layers, learning_rate=0.0008,
    penalty=0.0005, penalty_type="l2", optimizer="adam", batch_size=128,
    seed=123, verbosity="high")

start = timeit.default_timer()

# Fit trained model
model.fit(train_dataset, nb_epoch=nb_epoch)
github deepchem / deepchem / contrib / one_shot_models / examples / tox21_alternate_weave.py View on Github external
from __future__ import unicode_literals

import numpy as np
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc

# Load Tox21 dataset
tox21_tasks, tox21_datasets, transformers = dc.molnet.load_tox21(
    featurizer='Weave')
train_dataset, valid_dataset, test_dataset = tox21_datasets

# Fit models
metric = dc.metrics.Metric(
    dc.metrics.roc_auc_score, np.mean, mode="classification")

max_atoms_train = max([mol.get_num_atoms() for mol in train_dataset.X])
max_atoms_valid = max([mol.get_num_atoms() for mol in valid_dataset.X])
max_atoms_test = max([mol.get_num_atoms() for mol in test_dataset.X])
max_atoms = max([max_atoms_train, max_atoms_valid, max_atoms_test])

n_atom_feat = 75
n_pair_feat = 14
# Batch size of models
batch_size = 64
n_feat = 128
graph = dc.nn.AlternateSequentialWeaveGraph(
    batch_size,
    max_atoms=max_atoms,
    n_atom_feat=n_atom_feat,
    n_pair_feat=n_pair_feat)
github deepchem / deepchem / examples / factors / FACTORS_tf_robust.py View on Github external
FACTORS_tasks, datasets, transformers = load_factors(shard_size=shard_size)
train_dataset, valid_dataset, test_dataset = datasets

print("Number of compounds in train set")
print(len(train_dataset))
print("Number of compounds in validation set")
print(len(valid_dataset))
print("Number of compounds in test set")
print(len(test_dataset))

n_layers = 3
n_bypass_layers = 3
nb_epoch = 125

#Use R2 classification metric
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, task_averager=np.mean)

all_results = []
for trial in range(num_trials):
  model = dc.models.RobustMultitaskRegressor(
      len(FACTORS_tasks),
      train_dataset.get_data_shape()[0],
      layer_sizes=[1000] * n_layers,
      bypass_layer_sizes=[100] * n_bypass_layers,
      dropouts=[.25] * n_layers,
      bypass_dropouts=[.25] * n_bypass_layers,
      weight_init_stddevs=[.02] * n_layers,
      bias_init_consts=[1.] * n_layers,
      bypass_weight_init_stddevs=[.02] * n_bypass_layers,
      bypass_bias_init_consts=[1.] * n_bypass_layers,
      learning_rate=.0003,
      weight_decay_penalty=.0001,
github deepchem / deepchem / examples / low_data / sider_graph_conv_one_fold.py View on Github external
import deepchem as dc
from deepchem.models.graph_models import GraphConvModel

# 4-fold splits
K = 4
# num positive/negative ligands
n_pos = 10
n_neg = 10
# 10 trials on test-set
n_trials = 20

sider_tasks, fold_datasets, transformers = dc.molnet.load_sider(
    featurizer='GraphConv', split="task")

# Define metric
metric = dc.metrics.Metric(dc.metrics.roc_auc_score, mode="classification")

train_folds = fold_datasets[:-1]
train_dataset = dc.splits.merge_fold_datasets(train_folds)
test_dataset = fold_datasets[-1]
# Get supports on test-set
support_generator = dc.data.SupportGenerator(test_dataset, n_pos, n_neg,
                                             n_trials)

# Compute accuracies

task_scores = {task: [] for task in range(len(test_dataset.get_task_names()))}

for trial_num, (task, support) in enumerate(support_generator):
  print("Starting trial %d" % trial_num)

  # Number of features on conv-mols