Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
id2item = {}
stati2search = [s for s in STATUS_LEVEL.keys() if STATUS_LEVEL.get(s) >= 4]
items2search = ['Case']
item_search = 'search/?frame=object'
for item in items2search:
item_search += '&type={}'.format(item)
for status in stati2search:
item_search += '&status={}'.format(status)
if id_list:
itemids = re.split(',|\s+', id_list)
itemids = [id for id in itemids if id]
else:
itemres = ff_utils.search_metadata(item_search, key=connection.ff_keys, page_limit=500)
itemids = [item.get('uuid') for item in itemres]
es_items = ff_utils.get_es_metadata(itemids, key=connection.ff_keys, chunk_size=200, is_generator=True)
for es_item in es_items:
label = es_item.get('object').get('display_title')
desc = es_item.get('object').get('description')
inst = es_item.get('embedded').get('institution').get('display_title')
status = es_item.get('properties').get('status', 'in review')
id2links[es_item.get('uuid')] = [li.get('uuid') for li in es_item.get('linked_uuids_embedded')]
id2status[es_item.get('uuid')] = STATUS_LEVEL.get(status)
id2item[es_item.get('uuid')] = {'label': label, 'status': status, 'institution': inst,
'description': desc}
mismatches = {}
linked2get = {}
for i, iid in enumerate(itemids):
linkedids = id2links.get(iid)
if not linkedids: # item with no link
continue
elif res.get('experiment_sets'):
if len(res['experiment_sets']) != 1: # this should not happen
opf['problematic'].append({
'@id': res['@id'],
'experiment_sets': [es['uuid'] for es in res['experiment_sets']]})
continue
exp_or_set = res['experiment_sets'][0]
else: # this should not happen
opf['problematic'].append({'@id': res['@id']})
continue
res['exp_set_uuid'] = exp_or_set['uuid']
if res['exp_set_uuid'] not in exp_set_uuids:
exp_set_uuids.append(res['exp_set_uuid'])
# get lab of Exp/ExpSet
result_exp_set = ff_utils.get_es_metadata(exp_set_uuids, sources=['uuid', 'properties.lab'], key=connection.ff_keys)
uuid_2_lab = {} # map file uuid to Exp/Set lab
for item in result_exp_set:
uuid_2_lab[item['uuid']] = item['properties']['lab']
# evaluate contributing lab
for res in result:
if res['@id'] not in [pr['@id'] for pr in opf['problematic']]:
contr_lab = []
exp_set_lab = uuid_2_lab[res['exp_set_uuid']]
if exp_set_lab == res['lab']['uuid']:
continue
elif res.get('contributing_labs'):
contr_lab = [lab['uuid'] for lab in res['contributing_labs']]
if exp_set_lab in contr_lab:
continue
contr_lab.append(exp_set_lab)
elif res.get('experiment_sets'):
if len(res['experiment_sets']) != 1: # this should not happen
opf['problematic'].append({
'@id': res['@id'],
'experiment_sets': [es['uuid'] for es in res['experiment_sets']]})
continue
exp_or_set = res['experiment_sets'][0]
else: # this should not happen
opf['problematic'].append({'@id': res['@id']})
continue
res['exp_set_uuid'] = exp_or_set['uuid']
if res['exp_set_uuid'] not in exp_set_uuids:
exp_set_uuids.append(res['exp_set_uuid'])
# get lab of Exp/ExpSet
result_exp_set = ff_utils.get_es_metadata(exp_set_uuids, sources=['uuid', 'properties.lab'], key=connection.ff_keys)
uuid_2_lab = {} # map file uuid to Exp/Set lab
for item in result_exp_set:
uuid_2_lab[item['uuid']] = item['properties']['lab']
# evaluate contributing lab
for res in result:
if res['@id'] not in [pr['@id'] for pr in opf['problematic']]:
contr_lab = []
exp_set_lab = uuid_2_lab[res['exp_set_uuid']]
if exp_set_lab == res['lab']['uuid']:
continue
elif res.get('contributing_labs'):
contr_lab = [lab['uuid'] for lab in res['contributing_labs']]
if exp_set_lab in contr_lab:
continue
contr_lab.append(exp_set_lab)
if not linkedids: # item with no link
continue
istatus = id2status.get(iid)
for lid in linkedids:
lstatus = id2status.get(lid)
if not lstatus: # add to list to get
linked2get.setdefault(lid, []).append(iid)
elif lstatus < istatus: # status mismatch for an item we've seen before
ignore = id2item.get(iid).get('to_ignore')
if ignore is not None and lid in ignore:
continue
else:
mismatches.setdefault(iid, []).append(lid)
if len(linked2get) > MIN_CHUNK_SIZE or i + 1 == len(itemids): # only query es when we have more than a set number of ids (500)
linked2chk = ff_utils.get_es_metadata(list(linked2get.keys()), key=connection.ff_keys,
chunk_size=200, is_generator=True)
for litem in linked2chk:
luuid = litem.get('uuid')
listatus = litem.get('properties').get('status', 'in review by lab')
llabel = litem.get('item_type')
lstatus = STATUS_LEVEL.get(listatus)
# add info to tracking dict
id2status[luuid] = lstatus
id2item[luuid] = {'label': llabel, 'status': listatus}
for lfid in set(linked2get[luuid]):
# check to see if the linked item is something to ignore for that item
ignore = id2item[lfid].get('to_ignore')
if ignore is not None and luuid in ignore:
continue
elif lstatus < id2status[lfid]: # status mismatch so add to report
mismatches.setdefault(lfid, []).append(luuid)
borgns = [gene2org.get(g.get('@id')) for g in biogenes if '@id' in g]
linked_orgn_name = _get_orgname_from_atid_list(borgns, orgn2name)
if not linked_orgn_name: # didn't get it from genes - try genomic regions
gen_regions = biofeat.get('genome_location')
if gen_regions is not None:
grorgns = []
for genreg in gen_regions:
assembly_in_dt = False
gr_dt = genreg.get('display_title')
for ga, orgn in genome2orgn.items():
if ga in gr_dt:
grorgns.append(orgn)
assembly_in_dt = True
break
if not assembly_in_dt:
gr_res = ff_utils.get_es_metadata([genreg.get('uuid')],
key=connection.ff_keys, sources=['properties.genome_assembly'])
try:
gr_ass = gr_res[0].get('properties').get('genome_assembly')
except AttributeError:
gr_ass = None
if gr_ass is not None:
for ga, orgn in genome2orgn.items():
if ga == gr_ass:
grorgns.append(orgn)
linked_orgn_name = _get_orgname_from_atid_list(grorgns, orgn2name)
if not linked_orgn_name: # and finally try Description
desc = biofeat.get('description')
if desc is not None:
for o in orgn2name.values():
if o in desc.lower():
linked_orgn_name = o
# return check
if get_stage_info()['stage'] != 'prod':
check.summary = check.description = 'This check only runs on Foursight prod'
return check
time_limit = 270 # 4.5 minutes
t0 = time.time()
check.full_output = {} # purged items by item type
search = '/search/?type=TrackingItem&tracking_type=download_tracking&status=deleted&field=uuid&limit=300'
search_res = ff_utils.search_metadata(search, key=connection.ff_keys)
search_uuids = [res['uuid'] for res in search_res]
client = es_utils.create_es_client(connection.ff_es, True)
# a bit convoluted, but we want the frame=raw, which does not include uuid
# use get_es_metadata to handle this. Use it as a generator
for to_purge in ff_utils.get_es_metadata(search_uuids, es_client=client, is_generator=True,
key=connection.ff_keys):
if round(time.time() - t0, 2) > time_limit:
break
purge_properties = to_purge['properties']
purge_properties['uuid'] = to_purge['uuid'] # add uuid to frame=raw
try:
purge_res = ff_utils.purge_metadata(to_purge['uuid'], key=connection.ff_keys)
except Exception as exc:
purge_status = 'error'
purge_detail = str(exc)
else:
purge_status = purge_res['status']
purge_detail = purge_properties if purge_status == 'success' else purge_res
purge_record = {'uuid': to_purge['uuid'], 'result': purge_detail}
if to_purge['item_type'] not in check.full_output:
check.full_output[to_purge['item_type']] = {}
opf_exp_results = ff_utils.search_metadata(opf_exp, key=connection.ff_keys)
results = opf_set_results + opf_exp_results
# extract file uuids
files = []
for result in results:
if result.get('other_processed_files'):
for case in result['other_processed_files']:
files.extend([i['uuid'] for i in case['files']])
if case.get('higlass_view_config'):
files.append(case['higlass_view_config'].get('uuid'))
if result.get('experiments_in_set'):
for exp in result['experiments_in_set']:
for case in exp['other_processed_files']:
files.extend([i['uuid'] for i in case['files']])
# get metadata for files, to collect status
resp = ff_utils.get_es_metadata(list(set(files)),
sources=['links.quality_metric', 'object.status', 'uuid'],
key=connection.ff_keys)
opf_status_dict = {item['uuid']: item['object']['status'] for item in resp if item['uuid'] in files}
opf_linked_dict = {
item['uuid']: item.get('links', {}).get('quality_metric', []) for item in resp if item['uuid'] in files
}
quality_metrics = [uuid for item in resp for uuid in item.get('links', {}).get('quality_metric', [])]
qm_resp = ff_utils.get_es_metadata(list(set(quality_metrics)),
sources=['uuid', 'object.status'],
key=connection.ff_keys)
opf_other_dict = {item['uuid']: item['object']['status'] for item in qm_resp if item not in files}
check.full_output = {}
for result in results:
hg_dict = {item['title']: item.get('higlass_view_config', {}).get('uuid')
for item in result.get('other_processed_files', [])}
titles = [item['title'] for item in result.get('other_processed_files', [])]
mismatches = {}
linked2get = {}
for i, iid in enumerate(itemids):
linkedids = id2links.get(iid)
if not linkedids: # item with no link
continue
istatus = id2status.get(iid)
for lid in linkedids:
lstatus = id2status.get(lid)
if not lstatus: # add to list to get
linked2get.setdefault(lid, []).append(iid)
elif lstatus < istatus: # status mismatch for an item we've seen before
mismatches.setdefault(iid, []).append(lid)
if len(linked2get) > MIN_CHUNK_SIZE or i + 1 == len(itemids): # only query es when we have more than a set number of ids (500)
linked2chk = ff_utils.get_es_metadata(list(linked2get.keys()), key=connection.ff_keys,
chunk_size=200, is_generator=True)
for litem in linked2chk:
luuid = litem.get('uuid')
listatus = litem.get('properties').get('status', 'in review')
llabel = litem.get('item_type')
lstatus = STATUS_LEVEL.get(listatus)
# add info to tracking dict
id2status[luuid] = lstatus
id2item[luuid] = {'label': llabel, 'status': listatus}
for lfid in set(linked2get[luuid]):
if lstatus < id2status[lfid]: # status mismatch so add to report
mismatches.setdefault(lfid, []).append(luuid)
linked2get = {} # reset the linked id dict
if mismatches:
brief_output = {}
full_output = {}