Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def process_sdf_file(filename):
mp_pubchem = MongograntStore("rw:knowhere.lbl.gov/mp_pubchem", "mp_pubchem",
key="pubchem_id")
mp_pubchem.connect()
coll = mp_pubchem.collection
skipped = 0
pubchem_molecules = []
for i, mol in enumerate(pybel.readfile('sdf', filename)):
try:
pubchem_id = int(mol.data['PUBCHEM_COMPOUND_CID'])
xyz = mol.write(format="xyz")
data = {'pubchem_id': pubchem_id,
'xyz': xyz}
for key in keys:
if key in mol.data:
data[key_map[key]] = mol.data[key]
except KeyError:
mols.append(False)
return mols
def task_done(future):
results.append(future.result())
pbar.update()
max_atoms = 12
batch_size = 5000
name_keys = ['name_iupac', 'name_traditional']
mp_pubchem = MongograntStore("rw:knowhere.lbl.gov/mp_pubchem", "mp_pubchem",
key="pubchem_id")
mp_pubchem.connect()
coll = mp_pubchem.collection
total_mols = coll.count()
batches = coll.find_raw_batches(batch_size=batch_size)
pbar = tqdm(total=total_mols/batch_size, desc="process")
results = []
with pebble.ProcessPool() as pool:
for batch in batches:
f = pool.schedule(process_batch, args=(batch, ))
f.add_done_callback(task_done)
pbar.close()