Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if not force:
keys = list(data_stores.eppic_interfaces.list_input_directory())
all_files = list(data_stores.eppic_interfaces.list_input_directory("pdb"))
done_pdbs = [f.split("/")[1] for f in all_files if "status.json" in f]
# done_pdbs = []
# for pdbId, files in groupby(data_stores.eppic_interfaces.list_input_directory(), lambda k: k.split("/")[1]):
# files = [os.path.splitext("".join(k.split("/")[2:]))[0] for k in files]
# if len(files) > 1 and "status.json" in files:
# done_pdbs.append(pdbId)
total_size = len(pdb)
pdb = pdb[~pdb["pdb"].isin(done_pdbs)]
RealtimeLogger.info("Filtered CATH ({}/{} domains)".format(len(pdb), total_size))
else:
RealtimeLogger.info("Running CATH ({} domains)".format(len(pdb)))
if split_groups:
pdb = pdb.assign(group=pdb["pdb"].str[:3])
pdb_groups = pdb.groupby("group")["pdb"].apply(list)
map_job(job, process_pdb_group, pdb_groups, cathFileStoreID, further_parallelize)
else:
map_job(job, process_pdb, pdb["pdb"], cathFileStoreID)
for line in it:
if line.startswith('BIOMOLECULE:'):
if seenbiomolecule:
break
seenbiomolecule = True
elif line.startswith('APPLY THE FOLLOWING TO CHAINS:'):
chains =[chain.strip() for chain in line[30:].split(',')]
elif line.startswith(' AND CHAINS:'):
chains += [chain.strip() for chain in line[30:].split(',')]
elif line.startswith(' BIOMT'):
current_M = np.eye(3)
current_t = np.zeros(3)
for i in range(3):
l = next(it) if i > 0 else line
RealtimeLogger("LINE IS {}".format(l))
row = int(l[7])
num = int(l[8:12])
vec = l[12:].split()
vec = map(float, vec)
current_M[i, :] = vec[:-1]
current_t[i] = vec[-1]
biomt.append((current_M.T, current_t))
return biomt
try:
idx = self.params_to_update[k]
param_func = self.param_funcs[idx]
except KeyError as e:
try:
idx = self.optional_params_to_update[k]
param_func = self.optional_param_funcs[idx]
except KeyError as e:
#Invalid parameter, skip
RealtimeLogger.info("ignoring parameter {}".format(k))
continue
val = param_func(k, v)
if val is None:
RealtimeLogger.info("ignoring parameter {} since val is None".format(k))
continue
try:
formatter = self.parameter_formatters[k]
if formatter is not None:
parameters[idx] = formatter.format(val)
else:
parameters[idx] = None
except KeyError:
try:
formatter = self.optional_parameter_formatters[k]
if formatter is not None:
parameters[idx] = formatter.format(val)
else:
parameters[idx] = None
except KeyError:
"table",
columns=cath_names,
drop_duplicates=True,
**cathcode)[cath_names]
safe_remove(cath_file, warn=True)
else:
cathcodes = pd.DataFrame([cathcode], columns=cath_names)
if cathcodes.shape[1] < level:
map_job(job, run_cath_hierarchy, cathcodes.values.tolist(), func,
cathFileStoreID, **kwds)
else:
sfams = (cathcodes.astype(int).astype(str)+"/").sum(axis=1).str[:-1].tolist()
RealtimeLogger.info("Running sfam {}".format(cathcode))
kwds.pop("further_parallelize", None)
kwds.pop("level", None)
if "cathCodeStoreID" in kwds:
del kwds["cathCodeStoreID"]
map_job(job, func, sfams, cathFileStoreID, **kwds)
def __connect(self):
"""
Make sure we have an S3 Bucket connection, and set one up if we don't.
Creates the S3 bucket if it doesn't exist.
"""
if self.s3 is None:
RealtimeLogger.debug("Connecting to bucket {} in region {}".format(
self.bucket_name, self.region))
print("Connecting to bucket {} in region {}".format(
self.bucket_name, self.region))
# Connect to the s3 bucket service where we keep everything
self.s3 = boto3.client('s3', self.region, config=
botocore.client.Config(signature_version='s3v4', retries={"max_attempts":20}))
self.s3r = boto3.resource('s3', self.region, config=
botocore.client.Config(signature_version='s3v4', retries={"max_attempts":20}))
try:
self.s3.head_bucket(Bucket=self.bucket_name)
except:
if self.region == 'us-east-1':
self.s3.create_bucket(
Bucket=self.bucket_name,
)
:param function func: Function to spawn dynamically, passes one sample as first argument
:param list inputs: Array of samples to be batched
:param list args: any arguments to be passed to the function
"""
# num_partitions isn't exposed as an argument in order to be transparent to the user.
# The value for num_partitions is a tested value
num_partitions = 100
partition_size = int(ceil(len(inputs)/num_partitions))
if partition_size > 1:
RealtimeLogger.info("MAP_JOB: total: {}; paritions_size: {}".format(
len(inputs), partition_size
))
for partition in partitions(inputs, partition_size):
job.addChildJobFn(map_job, func, partition, *args, **kwds)
else:
RealtimeLogger.info("MAP_JOB: Running: {}".format(len(inputs)))
for sample in inputs:
job.addChildJobFn(func, sample, *args, **kwds)
kwds = params.update(kwds)
else:
raise RuntimeError
parameters = self.parameters[:]
for k, v in kwds.items():
try:
idx = self.params_to_update[k]
param_func = self.param_funcs[idx]
except KeyError as e:
try:
idx = self.optional_params_to_update[k]
param_func = self.optional_param_funcs[idx]
except KeyError as e:
#Invalid parameter, skip
RealtimeLogger.info("ignoring parameter {}".format(k))
continue
val = param_func(k, v)
if val is None:
RealtimeLogger.info("ignoring parameter {} since val is None".format(k))
continue
try:
formatter = self.parameter_formatters[k]
if formatter is not None:
parameters[idx] = formatter.format(val)
else:
parameters[idx] = None
except KeyError:
try:
def _runMainLoop(self, rootJob):
"""
Runs the main loop with the given job.
:param toil.job.Job rootJob: The root job for the workflow.
:rtype: Any
"""
logProcessContext(self.config)
with RealtimeLogger(self._batchSystem,
level=self.options.logLevel if self.options.realTimeLogging else None):
# FIXME: common should not import from leader
from toil.leader import Leader
return Leader(config=self.config,
batchSystem=self._batchSystem,
provisioner=self._provisioner,
jobStore=self._jobStore,
rootJob=rootJob,
jobCache=self._jobCache).run()
raise RuntimeError("Input must be Atom or Residue: {}".format(type(atom_or_residue)))
if not hasattr(self, "_pqr") or (not only_charge and len(list(self._pqr.values())[0])==1):
try:
if only_charge:
pdb2pqr = Pdb2Pqr(work_dir=self.work_dir, job=self.job)
self._pqr = pdb2pqr.get_charge_from_pdb_file(self.path, with_charge=False)
else:
apbs = APBS(work_dir=self.work_dir, job=self.job)
self._pqr = apbs.atom_potentials_from_pdb(self.path)
except (SystemExit, KeyboardInterrupt):
raise
except Exception as e:
raise
self._pqr = {}
RealtimeLogger.info("ELECTROSTATICS failed ({}): {}".format(type(e), e))
atom_id = atom.get_full_id()[3:5]
if atom_id[1][1] != " ":
#pdb2pqr removes alternate conformations and only uses the first
atom_id = (atom_id[0], (atom_id[1][0], " "))
if only_charge:
charge_value = self._pqr.get(atom_id, np.nan)
electrostatic_pot_value = np.nan
else:
try:
charge_value, electrostatic_pot_value = self._pqr[atom_id]
except KeyError:
charge_value, electrostatic_pot_value = np.NaN, np.NaN
def __connect(self):
"""
Make sure we have an Azure connection, and set one up if we don't.
"""
if self.connection is None:
RealtimeLogger.debug("Connecting to account {}, using "
"container {} and prefix {}".format(self.account_name,
self.container_name, self.name_prefix))
# Connect to the blob service where we keep everything
self.connection = BlobService(
account_name=self.account_name, account_key=self.account_key)