Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if split == "year":
transformers = [
dc.trans.NormalizationTransformer(transform_y=True, dataset=train_dataset)]
for transformer in transformers:
train = transformer.transform(train_dataset)
valid = transformer.transform(valid_dataset)
test = transformer.transform(test_dataset)
else:
transformers = [
dc.trans.NormalizationTransformer(transform_y=True, dataset=dataset)]
for transformer in transformers:
dataset = transformer.transform(dataset)
splitters = {'index': dc.splits.IndexSplitter(),
'random': dc.splits.RandomSplitter(),
'scaffold': dc.splits.ScaffoldSplitter()}
if split in splitters:
splitter = splitters[split]
print("Performing new split.")
train, valid, test = splitter.train_valid_test_split(dataset)
return chembl_tasks, (train, valid, test), transformers
logger.info("Removing missing entries...")
remove_missing_entries(dataset)
if split == None:
logger.info("About to transform the data...")
transformers = []
for transformer in transformers:
logger.info("Transforming the dataset with transformer ",
transformer.__class__.__name__)
dataset = transformer.transform(dataset)
return hppb_tasks, (dataset, None, None), transformers
splitters = {
'index': deepchem.splits.IndexSplitter(),
'random': deepchem.splits.RandomSplitter(),
'scaffold': deepchem.splits.ScaffoldSplitter(),
'butina': deepchem.splits.ButinaSplitter(),
}
splitter = splitters[split]
train, valid, test = splitter.train_valid_test_split(dataset, seed=split_seed)
transformers = []
logger.info("About to transform the data...")
for transformer in transformers:
logger.info("Transforming the data with transformer ",
transformer.__class__.__name__)
train = transformer.transform(train)
valid = transformer.transform(valid)
test = transformer.transform(test)
if reload:
logger.info("Saving file to {}.".format(save_dir))
if split == None:
transformers = [
deepchem.trans.BalancingTransformer(transform_w=True, dataset=dataset)
]
logger.info("Split is None, about to transform data")
for transformer in transformers:
dataset = transformer.transform(dataset)
return PCBA_tasks, (dataset, None, None), transformers
splitters = {
'index': deepchem.splits.IndexSplitter(),
'random': deepchem.splits.RandomSplitter(),
'scaffold': deepchem.splits.ScaffoldSplitter()
}
splitter = splitters[split]
logger.info("About to split dataset using {} splitter.".format(split))
train, valid, test = splitter.train_valid_test_split(dataset)
transformers = [
deepchem.trans.BalancingTransformer(transform_w=True, dataset=train)
]
logger.info("About to transform dataset.")
for transformer in transformers:
train = transformer.transform(train)
valid = transformer.transform(valid)
test = transformer.transform(test)
if reload:
if featurizer == 'ECFP':
featurizer_func = dc.feat.CircularFingerprint(size=1024)
elif featurizer == 'GraphConv':
featurizer_func = dc.feat.ConvMolFeaturizer()
permeability_tasks = sorted(['LogP(RRCK)'])
loader = dc.data.SDFLoader(
tasks=permeability_tasks, clean_mols=True, featurizer=featurizer_func)
dataset = loader.featurize(dataset_file)
splitters = {
'index': dc.splits.IndexSplitter(),
'random': dc.splits.RandomSplitter(),
'scaffold': dc.splits.ScaffoldSplitter()
}
splitter = splitters[split]
train, valid, test = splitter.train_valid_test_split(dataset)
return permeability_tasks, (train, valid, test), []
if split is None:
transformers = [
deepchem.trans.NormalizationTransformer(
transform_y=True, dataset=dataset, move_mean=move_mean)
]
logger.info("Split is None, about to transform data")
for transformer in transformers:
dataset = transformer.transform(dataset)
return delaney_tasks, (dataset, None, None), transformers
splitters = {
'index': deepchem.splits.IndexSplitter(),
'random': deepchem.splits.RandomSplitter(),
'scaffold': deepchem.splits.ScaffoldSplitter()
}
splitter = splitters[split]
logger.info("About to split dataset with {} splitter.".format(split))
train, valid, test = splitter.train_valid_test_split(dataset)
transformers = [
deepchem.trans.NormalizationTransformer(
transform_y=True, dataset=train, move_mean=move_mean)
]
logger.info("About to transform data.")
for transformer in transformers:
train = transformer.transform(train)
valid = transformer.transform(valid)
test = transformer.transform(test)
# Transform clintox dataset
if split is None:
transformers = [
deepchem.trans.BalancingTransformer(transform_w=True, dataset=dataset)
]
logger.info("Split is None, about to transform data.")
for transformer in transformers:
dataset = transformer.transform(dataset)
return clintox_tasks, (dataset, None, None), transformers
splitters = {
'index': deepchem.splits.IndexSplitter(),
'random': deepchem.splits.RandomSplitter(),
'scaffold': deepchem.splits.ScaffoldSplitter()
}
splitter = splitters[split]
logger.info("About to split data with {} splitter.".format(split))
train, valid, test = splitter.train_valid_test_split(dataset)
transformers = [
deepchem.trans.BalancingTransformer(transform_w=True, dataset=train)
]
logger.info("About to transform data.")
for transformer in transformers:
train = transformer.transform(train)
valid = transformer.transform(valid)
test = transformer.transform(test)
if reload:
tasks=all_nci_tasks, smiles_field="smiles", featurizer=featurizer)
dataset = loader.featurize(dataset_path, shard_size=shard_size)
# Initialize transformers
print("About to transform data")
transformers = [
dc.trans.NormalizationTransformer(transform_y=True, dataset=dataset)
]
for transformer in transformers:
dataset = transformer.transform(dataset)
splitters = {
'index': dc.splits.IndexSplitter(),
'random': dc.splits.RandomSplitter(),
'scaffold': dc.splits.ScaffoldSplitter()
}
splitter = splitters[split]
print("Performing new split.")
train, valid, test = splitter.train_valid_test_split(dataset)
return all_nci_tasks, (train, valid, test), transformers
if split == "year":
transformers = [
dc.trans.NormalizationTransformer(transform_y=True, dataset=train_dataset)]
for transformer in transformers:
train = transformer.transform(train_dataset)
valid = transformer.transform(valid_dataset)
test = transformer.transform(test_dataset)
else:
transformers = [
dc.trans.NormalizationTransformer(transform_y=True, dataset=dataset)]
for transformer in transformers:
dataset = transformer.transform(dataset)
splitters = {'index': dc.splits.IndexSplitter(),
'random': dc.splits.RandomSplitter(),
'scaffold': dc.splits.ScaffoldSplitter()}
if split in splitters:
splitter = splitters[split]
print("Performing new split.")
train, valid, test = splitter.train_valid_test_split(dataset)
return chembl_tasks, (train, valid, test), transformers