Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
:return: A string representing the featurization of the substructure.
"""
if fg_features is None:
fg_features = [None] * mol.GetNumAtoms()
substructure = list(substructure)
atoms = [Chem.Mol.GetAtomWithIdx(mol, idx) for idx in substructure]
bonds = []
for i in range(len(substructure)):
for j in range(i + 1, len(substructure)):
a1, a2 = substructure[i], substructure[j]
bond = mol.GetBondBetweenAtoms(a1, a2)
if bond is not None:
bonds.append(bond)
features = [str(atom_features(atom, fg_features[atom.GetIdx()])) for atom in atoms] + \
[str(bond_features(bond)) for bond in bonds]
features.sort() # ensure identical feature string for different atom/bond ordering
features = str(features)
return features
if vocab_func not in ['atom', 'atom_features', 'feature_vector', 'substructure']:
raise ValueError(f'vocab_func "{vocab_func}" not supported.')
mol = Chem.MolFromSmiles(smiles)
atoms = mol.GetAtoms()
if args is not None and \
('functional_group' in args.additional_atom_features or
'functional_group' in args.additional_output_features):
fg_featurizer = FunctionalGroupFeaturizer(args)
fg_features = fg_featurizer.featurize(mol)
else:
fg_features = [None] * len(atoms)
if vocab_func == 'feature_vector':
features = [atom_features(atom, fg) for atom, fg in zip(atoms, fg_features)]
elif vocab_func == 'atom_features':
features = [str(atom_features(atom, fg)) for atom, fg in zip(atoms, fg_features)]
elif vocab_func == 'atom':
features = [str(atom.GetAtomicNum()) for atom in atoms]
elif vocab_func == 'substructure':
substructures = get_substructures(list(atoms), substructure_sizes)
features = [substructure_to_feature(mol, substructure, fg_features) for substructure in substructures]
else:
raise ValueError(f'vocab_func "{vocab_func}" not supported.')
if nb_info:
nb_indices = []
for atom in atoms:
nb_indices.append([nb.GetIdx() for nb in atom.GetNeighbors()]) # atoms are sorted by idx
return features, nb_indices
mol = Chem.MolFromSmiles(smiles)
atoms = mol.GetAtoms()
if args is not None and \
('functional_group' in args.additional_atom_features or
'functional_group' in args.additional_output_features):
fg_featurizer = FunctionalGroupFeaturizer(args)
fg_features = fg_featurizer.featurize(mol)
else:
fg_features = [None] * len(atoms)
if vocab_func == 'feature_vector':
features = [atom_features(atom, fg) for atom, fg in zip(atoms, fg_features)]
elif vocab_func == 'atom_features':
features = [str(atom_features(atom, fg)) for atom, fg in zip(atoms, fg_features)]
elif vocab_func == 'atom':
features = [str(atom.GetAtomicNum()) for atom in atoms]
elif vocab_func == 'substructure':
substructures = get_substructures(list(atoms), substructure_sizes)
features = [substructure_to_feature(mol, substructure, fg_features) for substructure in substructures]
else:
raise ValueError(f'vocab_func "{vocab_func}" not supported.')
if nb_info:
nb_indices = []
for atom in atoms:
nb_indices.append([nb.GetIdx() for nb in atom.GetNeighbors()]) # atoms are sorted by idx
return features, nb_indices
return features