Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_dataset(qm9_dbpath, qm9_avlailable_properties):
# path exists and valid properties
dataset = spk.data.AtomsData(
qm9_dbpath, available_properties=qm9_avlailable_properties
)
assert dataset.available_properties == qm9_avlailable_properties
assert dataset.__len__() == 19
# test valid path exists but wrong properties
too_many = qm9_avlailable_properties + ["invalid"]
not_all = qm9_avlailable_properties[:-1]
wrong_prop = qm9_avlailable_properties[:-1] + ["invalid"]
with pytest.raises(spk.data.AtomsDataError):
dataset = spk.data.AtomsData(qm9_dbpath, available_properties=too_many)
with pytest.raises(spk.data.AtomsDataError):
dataset = spk.data.AtomsData(qm9_dbpath, available_properties=not_all)
with pytest.raises(spk.data.AtomsDataError):
dataset = spk.data.AtomsData(qm9_dbpath, available_properties=wrong_prop)
def empty_asedata(tmpdir, max_atoms, property_spec):
return schnetpack.data.AtomsData(
os.path.join(str(tmpdir), "test.db"),
available_properties=list(property_spec.keys()),
)
def dataset(build_db, tmp_db_path):
return spk.data.AtomsData(dbpath=tmp_db_path)
def test_orca_parser(tmpdir, main_path, target_orca_db):
db_path = os.path.join(tmpdir, "test_orca_parser.db")
all_properties = OrcaMainFileParser.properties + OrcaHessianFileParser.properties
orca_parser = OrcaParser(db_path, properties=all_properties)
orca_parser.file_extensions[Properties.hessian] = ".hess"
orca_parser.parse_data([main_path])
db_target = AtomsData(target_orca_db)
db_test = AtomsData(db_path)
target_atoms, target_properties = db_target.get_properties(0)
test_atoms, test_properties = db_test.get_properties(0)
assert np.allclose(
target_atoms.get_atomic_numbers(), test_atoms.get_atomic_numbers()
)
assert np.allclose(target_atoms.positions, test_atoms.positions)
for p in target_properties:
assert p in test_properties
assert np.allclose(test_properties[p], target_properties[p])
def example_asedata(tmpdir, max_atoms, property_spec, example_data):
data = schnetpack.data.AtomsData(
os.path.join(str(tmpdir), "test.db"),
available_properties=list(property_spec.keys()),
)
# add data
for ats, props in example_data:
data.add_system(ats, **props)
return data
def test_dataset(qm9_dbpath, qm9_avlailable_properties):
# path exists and valid properties
dataset = spk.data.AtomsData(
qm9_dbpath, available_properties=qm9_avlailable_properties
)
assert dataset.available_properties == qm9_avlailable_properties
assert dataset.__len__() == 19
# test valid path exists but wrong properties
too_many = qm9_avlailable_properties + ["invalid"]
not_all = qm9_avlailable_properties[:-1]
wrong_prop = qm9_avlailable_properties[:-1] + ["invalid"]
with pytest.raises(spk.data.AtomsDataError):
dataset = spk.data.AtomsData(qm9_dbpath, available_properties=too_many)
with pytest.raises(spk.data.AtomsDataError):
dataset = spk.data.AtomsData(qm9_dbpath, available_properties=not_all)
with pytest.raises(spk.data.AtomsDataError):
dataset = spk.data.AtomsData(qm9_dbpath, available_properties=wrong_prop)
# test valid path, but no properties
dataset = spk.data.AtomsData(qm9_dbpath)
assert set(dataset.available_properties) == set(qm9_avlailable_properties)
main_properties = []
hessian_properties = []
dummy_properties = []
for p in properties:
if p in self.main_properties:
main_properties.append(p)
elif p in self.hessian_properties:
hessian_properties.append(p)
else:
print("Unrecognized property {:s}".format(p))
all_properties = main_properties + hessian_properties + dummy_properties
self.all_properties = all_properties
self.atomsdata = spk.data.AtomsData(dbpath, available_properties=all_properties)
# The main file parser is always needed
self.main_parser = OrcaMainFileParser(properties=main_properties + ["atoms"])
if len(hessian_properties) > 0:
self.hessian_parser = OrcaHessianFileParser(properties=hessian_properties)
else:
self.hessian_parser = None
# Set up filter dictionary to e.g. remove numerically unstable solvent computations
self.filter = filter
# If requested, mask Q charges introduced by Orca
self.mask_charges = mask_charges
dataset = dataset.upper()
_log.info("Load {} dataset".format(dataset))
if dataset == "QM9":
return QM9(dbpath, properties=dataset_properties)
elif dataset == "ISO17":
return get_iso17(dataset_properties=dataset_properties)
elif dataset == "ANI1":
return get_ani1(dataset_properties=dataset_properties)
elif dataset == "MD17":
return get_md17(dataset_properties=dataset_properties)
elif dataset == "MATPROJ":
return get_matproj(dataset_properties=dataset_properties)
elif dataset == "CUSTOM":
file, extension = os.path.splitext(dbpath)
if extension == ".db":
return AtomsData(dbpath, required_properties=dataset_properties)
else:
generate_db(db_path=file + ".db", file_path=dbpath)
return AtomsData(file + ".db", required_properties=dataset_properties)
return QM9(dbpath, properties=dataset_properties)
elif dataset == "ISO17":
return get_iso17(dataset_properties=dataset_properties)
elif dataset == "ANI1":
return get_ani1(dataset_properties=dataset_properties)
elif dataset == "MD17":
return get_md17(dataset_properties=dataset_properties)
elif dataset == "MATPROJ":
return get_matproj(dataset_properties=dataset_properties)
elif dataset == "CUSTOM":
file, extension = os.path.splitext(dbpath)
if extension == ".db":
return AtomsData(dbpath, required_properties=dataset_properties)
else:
generate_db(db_path=file + ".db", file_path=dbpath)
return AtomsData(file + ".db", required_properties=dataset_properties)