Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# except (SystemExit, KeyboardInterrupt):
# raise
# except:
# try:
# observed_interactome = filter_hdf_chunks("IBIS_observed.h5", "table", "obs_int_id", float(mol_sfam_id))
# except (SystemExit, KeyboardInterrupt):
# raise
# except:
# job.log("Failed reading IBIS_observed.h5")
# return
sfamFileStoreID = sfamFileStoreIDs[mol_sfam_id]
obsFilePath = get_file(job, "{}_obs.h5".format(int(mol_sfam_id)),
sfamFileStoreID, work_dir=work_dir)
observed_interactome = pd.read_hdf(obsFilePath, "table")
RealtimeLogger.info("Obs has {} rows".format(observed_interactome.shape))
# obsFilePath = os.path.join(work_dir, "{0}.observed_interactome".format(int(mol_sfam_id)))
# out_store.read_input_file("{0}/{0}.observed_interactome".format(int(mol_sfam_id)), obsPath)
tableInfPath = get_file(job, "IBIS_inferred_{}.h5".format(table), tableInfStoreID)
# skip_int = set([tuple(map(int, os.path.basename(f)[:-3].split("_"))) for f in out_store.list_input_directory(
# "{}/_infrows/Intrac{}".format(int(mol_sfam_id), table)) if f.endswith(".h5")])
try:
inf_int_ids = filter_hdf_chunks(tableInfPath, "Intrac{}".format(table), chunksize=100,
nbr_superfam_id=mol_sfam_id)
except (RuntimeError, TypeError):
job.log("Unable to find sfam {} in table {}, Skipping".format(mol_sfam_id, table))
return
#inf_int_ids = set([tuple(row) for row in inf_int_ids.itertuples()])
#inf_int_ids -= skip_int
else:
#pdb_or_key is key
assert pdb_or_key.count("_") == 3
key = os.path.splitext(pdb_or_key)[0]
pdb, chain, sdi, domNo = os.path.basename(key).split("_")
sdi, domNo = sdi[3:], domNo[1:]
try:
pdb_path = os.path.join(work_dir, os.path.basename(key)+".pdb")
in_store.read_input_file(key+".pdb", pdb_path)
s = ProteinFeaturizer(pdb_path, pdb, chain, sdi=sdi, domNo=domNo,
work_dir=work_dir, job=job)
_, atom_features = s.calculate_flat_features()
RealtimeLogger.info("Finished atom features")
_, residue_features = s.calculate_flat_features(course_grained=True)
RealtimeLogger.info("Finished residue features")
graph_features = s.calculate_graph()
RealtimeLogger.info("Finished edge features")
out_store.write_output_file(atom_features, key+"_atom.npy")
out_store.write_output_file(residue_features, key+"_residue.npy")
out_store.write_output_file(graph_features, key+"_edges.gz")
for f in (pdb_path, atom_features, residue_features, graph_features):
try:
os.remove(f)
except OSError:
pass
except (SystemExit, KeyboardInterrupt):
raise
assert pdb_or_key.count("_") == 3
key = os.path.splitext(pdb_or_key)[0]
pdb, chain, sdi, domNo = os.path.basename(key).split("_")
sdi, domNo = sdi[3:], domNo[1:]
try:
pdb_path = os.path.join(work_dir, os.path.basename(key)+".pdb")
in_store.read_input_file(key+".pdb", pdb_path)
s = ProteinFeaturizer(pdb_path, pdb, chain, sdi=sdi, domNo=domNo,
work_dir=work_dir, job=job)
_, atom_features = s.calculate_flat_features()
RealtimeLogger.info("Finished atom features")
_, residue_features = s.calculate_flat_features(course_grained=True)
RealtimeLogger.info("Finished residue features")
graph_features = s.calculate_graph()
RealtimeLogger.info("Finished edge features")
out_store.write_output_file(atom_features, key+"_atom.npy")
out_store.write_output_file(residue_features, key+"_residue.npy")
out_store.write_output_file(graph_features, key+"_edges.gz")
for f in (pdb_path, atom_features, residue_features, graph_features):
try:
os.remove(f)
except OSError:
pass
except (SystemExit, KeyboardInterrupt):
raise
except Exception as e:
raise
rerun = True
try:
os.remove(fname)
except (OSError, FileNotFoundError):
pass
RealtimeLogger.info("Donwlaod step 6 {}".format(rerun))
if rerun and attempts > 0:
return self.get(key, attempts=attempts-1, last_source=source)
else:
RealtimeLogger.info("Not restarting")
raise KeyError("Key '{}' is an invalid file".format(key))
except Exception as e:
RealtimeLogger.info("API Failed parsing json ({}): {}".format(type(e), e))
raise KeyError("Key '{}' Not found; {} is an invalid file".format(key, fname))
if key not in self.files:
self.files[key] = (fname, should_remove)
if should_remove and self._clean:
self.clean()
return result
print (mol_sfam, int_sfam)
#if i<30: continue
# and (max_sfams is None or num_ddi
#Save all domains to fasta
domain_ids = {}
with open(domain_fasta, "w") as fasta:
if jobStoreIDs is not None:
for pdb_fname, jobStoreID in jobStoreIDs:
pdb_file = os.path.join(work_dir, pdb_fname)
job.fileStore.readGlobalFile(jobStoreID, userPath=pdb_file+".tmp")
remove_ter_lines(pdb_file+".tmp", pdb_file)
with open(pdb_file) as f:
pdb2seq(pdb_fname, sfam_id, f, fasta)
domain_ids[pdb_fname] = pdb_file
else:
for i, key in enumerate(in_store.list_input_directory(str(int(sfam_id)))):
if not key.endswith(".pdb"): continue
if i%10==0:
RealtimeLogger.info("{} {}".format(i, key))
fname = os.path.basename(key)
try:
in_store.read_input_file(key, fname)
except (KeyboardInterrupt, SystemExit):
raise
except Exception as e:
continue
with open(fname) as f:
pdb2seq(fname, sfam_id, f, fasta)
domain_ids[fname] = fname
try:
os.remove(fname)
except OSError:
pass
def cactus_realtime_log_info(msg, max_len = 1000):
if len(msg) > max_len:
msg = msg[:max_len] + "..."
RealtimeLogger.info("{}: {}".format(datetime.now(), msg))
with open(fname) as f:
pass
except IOError as e:
#Might be empty
try:
os.remove(fname)
except OSError:
pass
RealtimeLogger.info("Failed reading, {} bc {}".format(fname, e))
if attempts > 0:
return self.get(key, attempts=attempts-1, last_source=source)
else:
raise KeyError("Key '{}' is an invalid file".format(key))
RealtimeLogger.info("Donwlaod step 5")
try:
result = self.parse(fname, key)
except (SystemExit, KeyboardInterrupt) as e:
raise
except ValueError as e:
rerun = False
try:
with open(fname) as f:
for line in f:
rerun = self.check_line(key, line, attempts)
except Exception:
rerun = True
try:
os.remove(fname)
except (OSError, FileNotFoundError):
import itertools as it
import glob
import re
from collections import defaultdict
from toil.realtimeLogger import RealtimeLogger
try:
import pandas as pd
from molmimic.generate_data.iostore import IOStore
from molmimic.generate_data.job_utils import map_job
from molmimic.generate_data.util import get_file, PDB_TOOLS, izip_missing, get_pdb_residues, natural_keys, remove_ter_lines
from molmimic.parsers.USEARCH import run_usearch
from molmimic.parsers import superpose
except ImportError as e:
RealtimeLogger.info(e)
def merge_all_sfam_clusters(job, sfam_id, interaction_type, id):
assert interaction_type in ("observed", "inferred")
work_dir = job.fileStore.getLocalTempDir()
cluster_store = IOStore.get("aws:us-east-1:molmimic-clustered-structures")
cluster_interfaces = None
to_delete = []
for cluster_key in cluster_store.list_input_directory("{}/interface_clusters_{}_{}".format(sfam_id, id, interaction_type)):
cluster_file = os.path.join(work_dir, os.path.basename(cluster_key))
try:
cluster_store.read_input_file(cluster_key, cluster_file)
except (KeyboardInterrupt, SystemExit):
raise
def check_line(self, key, line, attempts):
RealtimeLogger.info("EPPIC Failed {}".format(line))
rerun = False
if "" in line or "" in line or "HTTP 404 Not Found" in line:
RealtimeLogger.info("Is restarting")
self.store.remove_file(key)
rerun = attempts > 0
if "Too many submissions" in line:
RealtimeLogger.info("Is changing IP")
try:
reset_ip()
except (SystemExit, KeyboardInterrupt):
raise
except:
pass
try:
RealtimeLogger.info("Start line {}".format(line))
result = json.loads(line)
RealtimeLogger.info("Result {}".format(result))
if len(result) == 1 and isinstance(result[0], dict) and "uid" in result[0]:
rerun = True
except (SystemExit, KeyboardInterrupt):
raise
except: