How to use the deepchem.models function in deepchem

To help you get started, we’ve selected a few deepchem examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github deepchem / deepchem / examples / qm7 / qm7_sklearn.py View on Github external
train_dataset, test_dataset = random_splitter.train_test_split(dataset, train_dir, test_dir, frac_train=0.8)
#transformers = [dc.trans.NormalizationTransformer(transform_X=True, dataset=train_dataset), dc.trans.NormalizationTransformer(transform_y=True, dataset=train_dataset)]
transformers = [dc.trans.NormalizationTransformer(transform_y=True, dataset=train_dataset)]

for transformer in transformers:
    train_dataset = transformer.transform(train_dataset)
for transformer in transformers:
    test_dataset = transformer.transform(test_dataset)

regression_metric = dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression")

def model_builder(model_dir):
  sklearn_model = KernelRidge(
      kernel="rbf", alpha=5e-4, gamma=0.008)
  return dc.models.SklearnModel(sklearn_model, model_dir)
model = dc.models.SingletaskToMultitask(tasks, model_builder, model_dir)

# Fit trained model
model.fit(train_dataset)
model.save()

train_evaluator = dc.utils.evaluate.Evaluator(model, train_dataset, transformers)
train_scores = train_evaluator.compute_model_performance([regression_metric])

print("Train scores [kcal/mol]")
print(train_scores)

test_evaluator = dc.utils.evaluate.Evaluator(model, test_dataset, transformers)
test_scores = test_evaluator.compute_model_performance([regression_metric])

print("Validation scores [kcal/mol]")
print(test_scores)
github deepchem / deepchem / examples / hiv / hiv_tf_models.py View on Github external
import numpy as np
import deepchem as dc
from deepchem.molnet import load_hiv

# Only for debug!
np.random.seed(123)

# Load hiv dataset
n_features = 1024
hiv_tasks, hiv_datasets, transformers = load_hiv()
train_dataset, valid_dataset, test_dataset = hiv_datasets

# Fit models
metric = dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean)

model = dc.models.MultitaskClassifier(
    len(hiv_tasks),
    n_features,
    layer_sizes=[1000],
    dropouts=[.25],
    learning_rate=0.001,
    batch_size=50)

# Fit trained model
model.fit(train_dataset)

print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)

print("Train scores")
print(train_scores)
github deepchem / deepchem / examples / low_data / tox_rf_one_fold.py View on Github external
train_folds = fold_datasets[:-1] 
train_dataset = dc.splits.merge_fold_datasets(train_folds)
test_dataset = fold_datasets[-1]

# Get supports on test-set
support_generator = dc.data.SupportGenerator(
    test_dataset, n_pos, n_neg, n_trials)

# Compute accuracies
task_scores = {task: [] for task in range(len(test_dataset.get_task_names()))}
for (task, support) in support_generator:
  # Train model on support
  sklearn_model = RandomForestClassifier(
      class_weight="balanced", n_estimators=100)
  model = dc.models.SklearnModel(sklearn_model)
  model.fit(support)

  # Test model
  task_dataset = dc.data.get_task_dataset_minus_support(
      test_dataset, support, task)
  y_pred = model.predict_proba(task_dataset)
  score = metric.compute_metric(
      task_dataset.y, y_pred, task_dataset.w)
  print("Score on task %s is %s" % (str(task), str(score)))
  task_scores[task].append(score)

# Join information for all tasks.
mean_task_scores = {}
std_task_scores = {}
for task in range(len(test_dataset.get_task_names())):
  mean_task_scores[task] = np.mean(np.array(task_scores[task]))
github deepchem / deepchem / examples / qm7 / qm7_DTNN.py View on Github external
metric = [
    dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
    dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]

# Batch size of models
batch_size = 50
n_embedding = 20
graph_model = dc.nn.SequentialDTNNGraph(n_distance=100)
graph_model.add(dc.nn.DTNNEmbedding(n_embedding=n_embedding))
graph_model.add(dc.nn.DTNNStep(n_embedding=n_embedding, n_distance=100))
graph_model.add(dc.nn.DTNNStep(n_embedding=n_embedding, n_distance=100))
graph_model.add(dc.nn.DTNNGather(n_embedding=n_embedding))
n_feat = n_embedding

model = dc.models.MultitaskGraphRegressor(
    graph_model,
    len(tasks),
    n_feat,
    batch_size=batch_size,
    learning_rate=0.001,
    learning_rate_decay_time=1000,
    optimizer_type="adam",
    beta1=.9,
    beta2=.999)

# Fit trained model
model.fit(train_dataset, nb_epoch=50)

print("Evaluating model")
train_scores = model.evaluate(train_dataset, metric, transformers)
valid_scores = model.evaluate(valid_dataset, metric, transformers)
github deepchem / deepchem / examples / kinase / KINASE_rf_model.py View on Github external
print("Number of compounds in train set")
print(len(train_dataset))
print("Number of compounds in validation set")
print(len(valid_dataset))
print("Number of compounds in test set")
print(len(test_dataset))

num_features = train_dataset.get_data_shape()[0]
print("Num features: %d" % num_features)

def task_model_builder(model_dir):
  sklearn_model = RandomForestRegressor(
      n_estimators=100, max_features=int(num_features/3),
      min_samples_split=5, n_jobs=-1)
  return dc.models.SklearnModel(sklearn_model, model_dir)
model = dc.models.SingletaskToMultitask(KINASE_tasks, task_model_builder)

###Evaluate models###
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, task_averager=np.mean,
                           mode="regression")

print("Training model")
model.fit(train_dataset)

train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)
#Only use for final evaluation
test_scores = model.evaluate(test_dataset, [metric], transformers)

print("Train scores")
print(train_scores)
github deepchem / deepchem / examples / qm8 / qm8_ANI.py View on Github external
atom_number_cases = [1, 6, 7, 8, 9]

ANItransformer = dc.trans.ANITransformer(
    max_atoms=max_atoms, atom_cases=atom_number_cases)
train_dataset = ANItransformer.transform(train_dataset)
valid_dataset = ANItransformer.transform(valid_dataset)
test_dataset = ANItransformer.transform(test_dataset)
n_feat = ANItransformer.get_num_feats() - 1

# Fit models
metric = [
    dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
    dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]

model = dc.models.ANIRegression(
    len(tasks),
    max_atoms,
    n_feat,
    layer_structures=layer_structures,
    atom_number_cases=atom_number_cases,
    batch_size=batch_size,
    learning_rate=0.001,
    use_queue=False,
    mode="regression")

# Fit trained model
model.fit(train_dataset, nb_epoch=300, checkpoint_interval=100)

print("Evaluating model")
train_scores = model.evaluate(train_dataset, metric, transformers)
valid_scores = model.evaluate(valid_dataset, metric, transformers)
github deepchem / deepchem / examples / membrane_permeability / membrane_permeability_graph_conv.py View on Github external
n_atom_feat = 75
batch_size = 64

max_atoms_train = max([mol.get_num_atoms() for mol in train_dataset.X])
max_atoms_valid = max([mol.get_num_atoms() for mol in valid_dataset.X])
max_atoms_test = max([mol.get_num_atoms() for mol in test_dataset.X])
max_atoms = max([max_atoms_train, max_atoms_valid, max_atoms_test])

reshard_size = 512
transformer = dc.trans.DAGTransformer(max_atoms=max_atoms)
train_dataset.reshard(reshard_size)
train_dataset = transformer.transform(train_dataset)
valid_dataset.reshard(reshard_size)
valid_dataset = transformer.transform(valid_dataset)

model = dc.models.DAGModel(
    len(permeability_tasks),
    max_atoms=max_atoms,
    n_atom_feat=n_atom_feat,
    batch_size=batch_size,
    learning_rate=1e-3,
    use_queue=False,
    mode='regression')

# Fit trained model
model.fit(train_dataset, nb_epoch=50)
print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)

print("Train scores")
print(train_scores)
github deepchem / deepchem / examples / qm8 / qm8_DTNN.py View on Github external
# Load Tox21 dataset
tasks, datasets, transformers = dc.molnet.load_qm8()
train_dataset, valid_dataset, test_dataset = datasets

# Fit models
metric = [dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")]

# Batch size of models
batch_size = 50
n_embedding = 20
n_distance = 51
distance_min = -1.
distance_max = 9.2
n_hidden = 15
model = dc.models.DTNNModel(
    len(tasks),
    n_embedding=n_embedding,
    n_hidden=n_hidden,
    n_distance=n_distance,
    distance_min=distance_min,
    distance_max=distance_max,
    output_activation=False,
    batch_size=batch_size,
    learning_rate=0.0001,
    use_queue=False,
    mode="regression")

# Fit trained model
model.fit(train_dataset, nb_epoch=50)

print("Evaluating model")
github deepchem / deepchem / examples / uv / UV_tf_singletask.py View on Github external
n_features=n_features,
      logdir=m_dir,
      layer_sizes=[1000] * n_layers,
      dropouts=[.25] * n_layers,
      weight_init_stddevs=[.02] * n_layers,
      bias_init_consts=[1.] * n_layers,
      learning_rate=.0003,
      penalty=.0001,
      penalty_type="l2",
      optimizer="adam",
      batch_size=100)


all_results = []
for trial in range(num_trials):
  model = dc.models.SingletaskToMultitask(
      UV_tasks, task_model_builder, model_dir="UV_tf_singletask")

  print("Fitting Model")
  model.fit(train_dataset, nb_epoch=nb_epoch)

  print("Evaluating models")
  train_score, train_task_scores = model.evaluate(
      train_dataset, [metric], transformers, per_task_metrics=True)
  valid_score, valid_task_scores = model.evaluate(
      valid_dataset, [metric], transformers, per_task_metrics=True)
  test_score, test_task_scores = model.evaluate(
      test_dataset, [metric], transformers, per_task_metrics=True)

  all_results.append((train_score, train_task_scores, valid_score,
                      valid_task_scores, test_score, test_task_scores))
github deepchem / deepchem / examples / delaney / delaney_krr.py View on Github external
# Load Delaney dataset
n_features = 1024
delaney_tasks, delaney_datasets, transformers = dc.molnet.load_delaney()
train_dataset, valid_dataset, test_dataset = delaney_datasets

metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)


def model_builder(model_dir):
  sklearn_model = KernelRidge(kernel="rbf", alpha=1e-3, gamma=0.05)
  return dc.models.SklearnModel(sklearn_model, model_dir)


model_dir = tempfile.mkdtemp()
model = dc.models.SingletaskToMultitask(delaney_tasks, model_builder, model_dir)

model.fit(train_dataset)
model.save()

print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)

print("Train scores")
print(train_scores)

print("Validation scores")
print(valid_scores)