Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
elif featurizer == 'GraphConv':
featurizer_func = dc.feat.ConvMolFeaturizer()
loader = dc.data.CSVLoader(
tasks=hiv_tasks, smiles_field="smiles", featurizer=featurizer_func)
dataset = loader.featurize(dataset_file, shard_size=8192)
# Initialize transformers
transformers = [
dc.trans.BalancingTransformer(transform_w=True, dataset=dataset)
]
print("About to transform data")
for transformer in transformers:
dataset = transformer.transform(dataset)
splitters = {
'index': dc.splits.IndexSplitter(),
'random': dc.splits.RandomSplitter(),
'scaffold': dc.splits.ScaffoldSplitter(),
'butina': dc.splits.ButinaSplitter()
}
splitter = splitters[split]
train, valid, test = splitter.train_valid_test_split(dataset)
return hiv_tasks, (train, valid, test), transformers
if mode == 'regression':
transformers = [
deepchem.trans.NormalizationTransformer(
transform_y=True, dataset=dataset)
]
elif mode == 'classification':
transformers = [
deepchem.trans.BalancingTransformer(transform_w=True, dataset=dataset)
]
print("About to transform data")
for transformer in transformers:
dataset = transformer.transform(dataset)
splitters = {
'index': deepchem.splits.IndexSplitter(),
'random': dcCustom.splits.RandomSplitter(split_cold=predict_cold, cold_drug=cold_drug,
cold_target=cold_target, prot_seq_dict=prot_seq_dict),
'scaffold': deepchem.splits.ScaffoldSplitter(),
'butina': deepchem.splits.ButinaSplitter(),
'task': deepchem.splits.TaskSplitter()
}
splitter = splitters[split]
if test:
train, valid, test = splitter.train_valid_test_split(dataset)
all_dataset = (train, valid, test)
if reload:
deepchem.utils.save.save_dataset_to_disk(save_dir, train, valid, test,
transformers)
elif cross_validation:
fold_datasets = splitter.k_fold_split(dataset, K)
all_dataset = fold_datasets
loader = dc.load.DataLoader(tasks=SIDER_tasks,
smiles_field="smiles",
featurizer=featurizer_func,
verbosity='high')
dataset = loader.featurize(dataset_file)
print("%d datapoints in SIDER dataset" % len(dataset))
# Initialize transformers
transformers = [
dc.trans.BalancingTransformer(transform_w=True, dataset=dataset)]
print("About to transform data")
for transformer in transformers:
dataset = transformer.transform(dataset)
splitters = {'index': dc.splits.IndexSplitter(),
'random': dc.splits.RandomSplitter(),
'scaffold': dc.splits.ScaffoldSplitter()}
splitter = splitters[split]
train, valid, test = splitter.train_valid_test_split(dataset,
compute_feature_statistics=False)
return SIDER_tasks, (train, valid, test), transformers
# Initialize transformers
print("About to transform data")
if split == "year":
transformers = [
dc.trans.NormalizationTransformer(transform_y=True, dataset=train_dataset)]
for transformer in transformers:
train = transformer.transform(train_dataset)
valid = transformer.transform(valid_dataset)
test = transformer.transform(test_dataset)
else:
transformers = [
dc.trans.NormalizationTransformer(transform_y=True, dataset=dataset)]
for transformer in transformers:
dataset = transformer.transform(dataset)
splitters = {'index': dc.splits.IndexSplitter(),
'random': dc.splits.RandomSplitter(),
'scaffold': dc.splits.ScaffoldSplitter()}
if split in splitters:
splitter = splitters[split]
print("Performing new split.")
train, valid, test = splitter.train_valid_test_split(dataset)
return chembl_tasks, (train, valid, test), transformers
featurizer = dc.feat.CircularFingerprint(size=1024)
elif featurizer == 'GraphConv':
featurizer = dc.feat.ConvMolFeaturizer()
loader = dc.data.CSVLoader(
tasks=tasks, smiles_field="smiles", featurizer=featurizer)
dataset = loader.featurize(dataset_file, shard_size=8192)
transformers = [
dc.trans.NormalizationTransformer(transform_y=True, dataset=dataset)
]
for transformer in transformers:
dataset = transformer.transform(dataset)
splitters = {
'index': dc.splits.IndexSplitter(),
'random': dc.splits.RandomSplitter(),
'scaffold': dc.splits.ScaffoldSplitter()
}
splitter = splitters[split]
train, valid, test = splitter.train_valid_test_split(dataset)
return tasks, (train, valid, test), transformers
tasks=SWEET_tasks, smiles_field="smiles", featurizer=featurizer)
dataset = loader.featurize(dataset_file)
# Initialize transformers
transformers = [
dc.trans.BalancingTransformer(transform_w=True, dataset=dataset)
]
logger.info("About to transform data")
for transformer in transformers:
dataset = transformer.transform(dataset)
if split == None:
return SWEET_tasks, (dataset, None, None), transformers
splitters = {
'index': dc.splits.IndexSplitter(),
'random': dc.splits.RandomSplitter(),
'scaffold': dc.splits.ScaffoldSplitter(),
'task': dc.splits.TaskSplitter(),
'stratified': dc.splits.RandomStratifiedSplitter()
}
splitter = splitters[split]
frac_train = kwargs.get("frac_train", 0.8)
frac_valid = kwargs.get('frac_valid', 0.1)
frac_test = kwargs.get('frac_test', 0.1)
train, valid, test = splitter.train_valid_test_split(
dataset,
frac_train=frac_train,
frac_valid=frac_valid,
frac_test=frac_test)
if mode == 'regression':
transformers = [
dcCustom.trans.NormalizationTransformer(
transform_y=True, dataset=dataset)
]
elif mode == 'classification':
transformers = [
dcCustom.trans.BalancingTransformer(transform_w=True, dataset=dataset)
]
print("About to transform data")
for transformer in transformers:
dataset = transformer.transform(dataset)
splitters = {
'index': deepchem.splits.IndexSplitter(),
'random': dcCustom.splits.RandomSplitter(split_cold=predict_cold, cold_drug=cold_drug,
cold_target=cold_target, cold_drug_cluster=cold_drug_cluster, split_warm=split_warm,
prot_seq_dict=prot_seq_dict, threshold=filter_threshold, input_protein=input_protein,
remove_val_set_entries=remove_val_set_entries, save_dir_val_set=save_dir_val_set),
'scaffold': deepchem.splits.ScaffoldSplitter(),
'butina': deepchem.splits.ButinaSplitter(),
'task': deepchem.splits.TaskSplitter()
}
splitter = splitters[split]
if test:
train, valid, test = splitter.train_valid_test_split(dataset)
all_dataset = (train, valid, test)
if reload:
dcCustom.utils.save.save_dataset_to_disk(save_dir, train, valid, test,
transformers)
elif cross_validation:
if split is None:
transformers = [
deepchem.trans.NormalizationTransformer(
transform_y=True, dataset=dataset)
]
logger.info("Split is None, about to transform data.")
for transformer in transformers:
dataset = transformer.transform(dataset)
return chembl_tasks, (dataset, None, None), transformers
if split != "year":
splitters = {
'index': deepchem.splits.IndexSplitter(),
'random': deepchem.splits.RandomSplitter(),
'scaffold': deepchem.splits.ScaffoldSplitter()
}
splitter = splitters[split]
logger.info("Performing new split.")
train, valid, test = splitter.train_valid_test_split(dataset)
transformers = [
deepchem.trans.NormalizationTransformer(transform_y=True, dataset=train)
]
logger.info("About to transform data.")
for transformer in transformers:
train = transformer.transform(train)
valid = transformer.transform(valid)
dataset = loader.featurize(dataset_file, shard_size=8192)
if split is None:
# Initialize transformers
transformers = [
deepchem.trans.BalancingTransformer(transform_w=True, dataset=dataset)
]
logger.info("Split is None, about to transform data")
for transformer in transformers:
dataset = transformer.transform(dataset)
return bbbp_tasks, (dataset, None, None), transformers
splitters = {
'index': deepchem.splits.IndexSplitter(),
'random': deepchem.splits.RandomSplitter(),
'scaffold': deepchem.splits.ScaffoldSplitter()
}
splitter = splitters[split]
logger.info("About to split data with {} splitter.".format(split))
train, valid, test = splitter.train_valid_test_split(dataset)
# Initialize transformers
transformers = [
deepchem.trans.BalancingTransformer(transform_w=True, dataset=train)
]
for transformer in transformers:
train = transformer.transform(train)
valid = transformer.transform(valid)
test = transformer.transform(test)
if mode == 'regression':
transformers = [
dcCustom.trans.NormalizationTransformer(
transform_y=True, dataset=dataset)
]
elif mode == 'classification':
transformers = [
dcCustom.trans.BalancingTransformer(transform_w=True, dataset=dataset)
]
print("About to transform data")
for transformer in transformers:
dataset = transformer.transform(dataset)
splitters = {
'index': deepchem.splits.IndexSplitter(),
'random': dcCustom.splits.RandomSplitter(split_cold=predict_cold, cold_drug=cold_drug,
cold_target=cold_target, split_warm=split_warm, prot_seq_dict=prot_seq_dict,
threshold=filter_threshold),
'scaffold': deepchem.splits.ScaffoldSplitter(),
'butina': deepchem.splits.ButinaSplitter(),
'task': deepchem.splits.TaskSplitter()
}
splitter = splitters[split]
if test:
train, valid, test = splitter.train_valid_test_split(dataset)
all_dataset = (train, valid, test)
if reload:
deepchem.utils.save.save_dataset_to_disk(save_dir, train, valid, test,
transformers)
elif cross_validation:
fold_datasets = splitter.k_fold_split(dataset, K)