Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def check_repli(res, my_auth, tag, check, start, lambda_limit, winsize=None):
"""Check run status for each set in res, and report missing runs and completed process"""
for a_set in res:
# get all related items
all_items, all_uuids = ff_utils.expand_es_metadata([a_set['uuid']], my_auth,
store_frame='embedded',
add_pc_wfr=True,
ignore_field=['experiment_relation',
'biosample_relation',
'references',
'reference_pubs'])
all_wfrs = all_items.get('workflow_run_awsem', []) + all_items.get('workflow_run_sbg', [])
now = datetime.utcnow()
print(a_set['accession'], (now-start).seconds)
if (now-start).seconds > lambda_limit:
break
# missing run
missing_run = []
# still running
running = []
# problematic cases
def check_margi(res, my_auth, tag, check, start, lambda_limit, nore=False, nonorm=False):
"""Check run status for each set in res, and report missing runs and completed process"""
for a_set in res:
# get all related items
all_items, all_uuids = ff_utils.expand_es_metadata([a_set['uuid']], my_auth,
store_frame='embedded',
add_pc_wfr=True,
ignore_field=['experiment_relation',
'biosample_relation',
'references',
'reference_pubs'])
all_wfrs = all_items.get('workflow_run_awsem', []) + all_items.get('workflow_run_sbg', [])
now = datetime.utcnow()
print(a_set['accession'], (now-start).seconds, len(all_uuids))
if (now-start).seconds > lambda_limit:
break
# missing run
missing_run = []
# still running
running = []
# problematic cases
all_samples = ff_utils.search_metadata(q, my_auth)
print(len(all_samples))
step1_name = 'workflow_bwa-mem_no_unzip-check'
step2_name = 'workflow_add-readgroups-check'
step3_name = 'workflow_merge-bam-check'
step4_name = 'workflow_picard-MarkDuplicates-check'
step5_name = 'workflow_sort-bam-check'
step6_name = 'workflow_gatk-BaseRecalibrator'
step7_name = 'workflow_gatk-ApplyBQSR-check'
step8_name = 'workflow_gatk-HaplotypeCaller'
# collect all wf for wf version check
all_system_wfs = ff_utils.search_metadata('/search/?type=Workflow&status=released', my_auth)
for a_sample in all_samples:
all_items, all_uuids = ff_utils.expand_es_metadata([a_sample['uuid']], my_auth,
store_frame='embedded',
add_pc_wfr=True,
ignore_field=['previous_version',
'experiment_relation',
'biosample_relation',
'references',
'reference_pubs'])
now = datetime.utcnow()
print(a_sample['accession'], (now-start).seconds, len(all_uuids))
if (now-start).seconds > lambda_limit:
break
# collect similar types of items under library
all_wfrs = all_items.get('workflow_run_awsem', []) + all_items.get('workflow_run_sbg', [])
file_items = [typ for typ in all_items if typ.startswith('file_') and typ != 'file_format']
all_files = [i for typ in all_items for i in all_items[typ] if typ in file_items]
all_qcs = [i for typ in all_items for i in all_items[typ] if typ.startswith('quality_metric')]
def check_rna(res, my_auth, tag, check, start, lambda_limit):
"""Check run status for each set in res, and report missing runs and completed process"""
for a_set in res:
# get all related items
all_items, all_uuids = ff_utils.expand_es_metadata([a_set['uuid']], my_auth,
store_frame='embedded',
add_pc_wfr=True,
ignore_field=['experiment_relation',
'biosample_relation',
'references',
'reference_pubs'])
all_wfrs = all_items.get('workflow_run_awsem', []) + all_items.get('workflow_run_sbg', [])
now = datetime.utcnow()
# print(a_set['accession'], (now-start).seconds)
if (now-start).seconds > lambda_limit:
break
# missing run
missing_run = []
# still running
running = []
# problematic cases
# check indexing queue
env = connection.ff_env
indexing_queue = ff_utils.stuff_in_queues(env, check_secondary=True)
if indexing_queue:
check.status = 'PASS' # maybe use warn?
check.brief_output = ['Waiting for indexing queue to clear']
check.summary = 'Waiting for indexing queue to clear'
check.full_output = {}
return check
q = '/search/?type=Sample&files.display_title=No+value&cram_files.display_title%21=No+value'
all_samples = ff_utils.search_metadata(q, my_auth)
print(len(all_samples))
for a_sample in all_samples:
all_items, all_uuids = ff_utils.expand_es_metadata([a_sample['uuid']], my_auth,
store_frame='embedded',
add_pc_wfr=True,
ignore_field=['experiment_relation',
'biosample_relation',
'references',
'reference_pubs'])
now = datetime.utcnow()
print(a_sample['accession'], (now-start).seconds, len(all_uuids))
if (now-start).seconds > lambda_limit:
break
# are all files uploaded ?
all_uploaded = True
cram_files = [i for i in all_items['file_processed'] if i['file_format']['file_format'] == 'CRAM']
print(len(cram_files))
for a_file in cram_files:
if a_file['status'] in ['uploading', 'upload failed']:
def check_hic(res, my_auth, tag, check, start, lambda_limit, nore=False, nonorm=False):
"""Check run status for each set in res, and report missing runs and completed process"""
for a_set in res:
# get all related items
all_items, all_uuids = ff_utils.expand_es_metadata([a_set['uuid']], my_auth,
store_frame='embedded',
add_pc_wfr=True,
ignore_field=['experiment_relation',
'biosample_relation',
'references',
'reference_pubs'])
all_wfrs = all_items.get('workflow_run_awsem', []) + all_items.get('workflow_run_sbg', [])
now = datetime.utcnow()
print(a_set['accession'], (now-start).seconds)
if (now-start).seconds > lambda_limit:
break
# missing run
missing_run = []
# still running
running = []
# problematic cases
res = ff_utils.search_metadata(q, my_auth)
# check if anything in scope
if not res:
return check
# list step names
step1_name = 'workflow_gatk-CombineGVCFs'
step2_name = 'workflow_gatk-GenotypeGVCFs-check'
# step3_name = 'workflow_gatk-VQSR-check'
# collect all wf for wf version check
all_system_wfs = ff_utils.search_metadata('/search/?type=Workflow&status=released', my_auth)
# iterate over msa
print(len(res))
for an_msa in res:
all_items, all_uuids = ff_utils.expand_es_metadata([an_msa['uuid']], my_auth,
store_frame='embedded',
add_pc_wfr=True,
ignore_field=['previous_version',
'experiment_relation',
'biosample_relation',
'references',
'reference_pubs'])
now = datetime.utcnow()
print(an_msa['@id'], (now-start).seconds, len(all_uuids))
if (now-start).seconds > lambda_limit:
break
all_wfrs = all_items.get('workflow_run_awsem', []) + all_items.get('workflow_run_sbg', [])
file_items = [typ for typ in all_items if typ.startswith('file_') and typ != 'file_format']
all_files = [i for typ in all_items for i in all_items[typ] if typ in file_items]
all_qcs = [i for typ in all_items for i in all_items[typ] if typ.startswith('quality_metric')]