Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
mol_dict["fix_orientation"] = True
# Build fresh indices
mol_dict["molecule_hash"] = dmol.get_hash()
mol_dict["molecular_formula"] = dmol.get_molecular_formula()
mol_dict["identifiers"] = {}
mol_dict["identifiers"]["molecule_hash"] = mol_dict["molecule_hash"]
mol_dict["identifiers"]["molecular_formula"] = mol_dict["molecular_formula"]
# search by index keywords not by all keys, much faster
orm_molecules.append(MoleculeORM(**mol_dict))
# Check if we have duplicates
hash_list = [x.molecule_hash for x in orm_molecules]
query = format_query(MoleculeORM, molecule_hash=hash_list)
indices = session.query(MoleculeORM.molecule_hash, MoleculeORM.id).filter(*query)
previous_id_map = {k: v for k, v in indices}
# For a bulk add there must be no pre-existing and there must be no duplicates in the add list
bulk_ok = len(hash_list) == len(set(hash_list))
bulk_ok &= len(previous_id_map) == 0
# bulk_ok = False
if bulk_ok:
# Bulk save, doesn't update fields for speed
session.bulk_save_objects(orm_molecules)
session.commit()
# Query ID's and reorder based off orm_molecule ordered list
query = format_query(MoleculeORM, molecule_hash=hash_list)
indices = session.query(MoleculeORM.molecule_hash, MoleculeORM.id).filter(*query)
indices = session.query(MoleculeORM.molecule_hash, MoleculeORM.id).filter(*query)
previous_id_map = {k: v for k, v in indices}
# For a bulk add there must be no pre-existing and there must be no duplicates in the add list
bulk_ok = len(hash_list) == len(set(hash_list))
bulk_ok &= len(previous_id_map) == 0
# bulk_ok = False
if bulk_ok:
# Bulk save, doesn't update fields for speed
session.bulk_save_objects(orm_molecules)
session.commit()
# Query ID's and reorder based off orm_molecule ordered list
query = format_query(MoleculeORM, molecule_hash=hash_list)
indices = session.query(MoleculeORM.molecule_hash, MoleculeORM.id).filter(*query)
id_map = {k: v for k, v in indices}
n_inserted = len(orm_molecules)
else:
# Start from old ID map
id_map = previous_id_map
new_molecules = []
n_inserted = 0
for orm_mol in orm_molecules:
duplicate_id = id_map.get(orm_mol.molecule_hash, False)
if duplicate_id is not False:
meta["duplicates"].append(str(duplicate_id))
else:
mol_dict = dmol.dict(exclude={"id", "validated"})
# TODO: can set them as defaults in the sql_models, not here
mol_dict["fix_com"] = True
mol_dict["fix_orientation"] = True
# Build fresh indices
mol_dict["molecule_hash"] = dmol.get_hash()
mol_dict["molecular_formula"] = dmol.get_molecular_formula()
mol_dict["identifiers"] = {}
mol_dict["identifiers"]["molecule_hash"] = mol_dict["molecule_hash"]
mol_dict["identifiers"]["molecular_formula"] = mol_dict["molecular_formula"]
# search by index keywords not by all keys, much faster
orm_molecules.append(MoleculeORM(**mol_dict))
# Check if we have duplicates
hash_list = [x.molecule_hash for x in orm_molecules]
query = format_query(MoleculeORM, molecule_hash=hash_list)
indices = session.query(MoleculeORM.molecule_hash, MoleculeORM.id).filter(*query)
previous_id_map = {k: v for k, v in indices}
# For a bulk add there must be no pre-existing and there must be no duplicates in the add list
bulk_ok = len(hash_list) == len(set(hash_list))
bulk_ok &= len(previous_id_map) == 0
# bulk_ok = False
if bulk_ok:
# Bulk save, doesn't update fields for speed
session.bulk_save_objects(orm_molecules)
session.commit()
# Build fresh indices
mol_dict["molecule_hash"] = dmol.get_hash()
mol_dict["molecular_formula"] = dmol.get_molecular_formula()
mol_dict["identifiers"] = {}
mol_dict["identifiers"]["molecule_hash"] = mol_dict["molecule_hash"]
mol_dict["identifiers"]["molecular_formula"] = mol_dict["molecular_formula"]
# search by index keywords not by all keys, much faster
orm_molecules.append(MoleculeORM(**mol_dict))
# Check if we have duplicates
hash_list = [x.molecule_hash for x in orm_molecules]
query = format_query(MoleculeORM, molecule_hash=hash_list)
indices = session.query(MoleculeORM.molecule_hash, MoleculeORM.id).filter(*query)
previous_id_map = {k: v for k, v in indices}
# For a bulk add there must be no pre-existing and there must be no duplicates in the add list
bulk_ok = len(hash_list) == len(set(hash_list))
bulk_ok &= len(previous_id_map) == 0
# bulk_ok = False
if bulk_ok:
# Bulk save, doesn't update fields for speed
session.bulk_save_objects(orm_molecules)
session.commit()
# Query ID's and reorder based off orm_molecule ordered list
query = format_query(MoleculeORM, molecule_hash=hash_list)
indices = session.query(MoleculeORM.molecule_hash, MoleculeORM.id).filter(*query)
self._raise_missing_attribute("initial_molecule", "List of optimizations ids")
sql_statement = text(
"""
select opt.id as opt_id, molecule.* from molecule
join optimization_procedure as opt
on molecule.id = opt.initial_molecule
where opt.id in :optimization_ids
"""
)
# bind and expand ids list
sql_statement = sql_statement.bindparams(bindparam("optimization_ids", expanding=True))
# column types:
columns = inspect(MoleculeORM).columns
sql_statement = sql_statement.columns(opt_id=Integer, *columns)
query_result = self.execute_query(sql_statement, optimization_ids=list(optimization_ids))
ret = {}
for rec in query_result:
self._remove_excluded_keys(rec)
key = rec.pop("opt_id")
ret[key] = Molecule(**rec)
return ret
def get_molecules(self, id=None, molecule_hash=None, molecular_formula=None, limit: int = None, skip: int = 0):
meta = get_metadata_template()
query = format_query(MoleculeORM, id=id, molecule_hash=molecule_hash, molecular_formula=molecular_formula)
# Don't include the hash or the molecular_formula in the returned result
rdata, meta["n_found"] = self.get_query_projection(
MoleculeORM, query, limit=limit, skip=skip, exclude=["molecule_hash", "molecular_formula"]
)
meta["success"] = True
# ret["meta"]["errors"].extend(errors)
data = [Molecule(**d, validate=False, validated=True) for d in rdata]
return {"meta": meta, "data": data}
Parameters
----------
values : str or list of strs
The hash of a molecule.
Returns
-------
bool
Number of deleted molecules.
"""
query = format_query(MoleculeORM, id=id, molecule_hash=molecule_hash)
with self.session_scope() as session:
ret = session.query(MoleculeORM).filter(*query).delete(synchronize_session=False)
return ret
def get_molecules(self, id=None, molecule_hash=None, molecular_formula=None, limit: int = None, skip: int = 0):
meta = get_metadata_template()
query = format_query(MoleculeORM, id=id, molecule_hash=molecule_hash, molecular_formula=molecular_formula)
# Don't include the hash or the molecular_formula in the returned result
rdata, meta["n_found"] = self.get_query_projection(
MoleculeORM, query, limit=limit, skip=skip, exclude=["molecule_hash", "molecular_formula"]
)
meta["success"] = True
# ret["meta"]["errors"].extend(errors)
data = [Molecule(**d, validate=False, validated=True) for d in rdata]
return {"meta": meta, "data": data}