Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def load_data(input_file):
with open_anyfile(input_file) as in_f:
result = defaultdict(list)
for line in in_f:
pharos_id, _id = line.strip().split(',')
if _id != 'entrez_gene_id' and _id != '0':
result[str(_id)].append(int(pharos_id))
for k, v in result.items():
json_doc = {'_id': str(k),
'pharos': {"target_id": v}}
yield unlist(json_doc)
else:
new_doc['_id'] = 'chr{0}:g.{1}_{2}del'.format(chrom, start, end)
# handle cases of insertions where only alt info is provided
elif chrom and alt and not ref:
no_case3 += 1
new_doc['_id'] = 'chr{0}:g.{1}_{2}ins{3}'.format(chrom, start, end, alt)
# handle cases where no ref or alt info provided,
# in this case, use CIVIC internal ID as the primary id for MyVariant.info, e.g. CIVIC_VARIANT:1
else:
no_case4 += 1
new_doc['_id'] = 'CIVIC_VARIANT:' + str(variant_id)
for _evidence in doc['evidence_items']:
if 'disease' in _evidence and 'doid' in _evidence['disease'] and _evidence['disease']['doid']:
_evidence['disease']['doid'] = 'DOID:' + _evidence['disease']['doid']
new_doc['civic'] = doc
yield dict_sweep(unlist(new_doc),['','null', 'N/A', None, [], {}])
# change doid into its formal representation, which should be sth like DOID:1
else:
continue
logging.info("number of ids with ref, alt, chrom: {}".format(no_case1))
logging.info("number of ids with chrom, ref but no alt: {}".format(no_case2))
logging.info("number of ids with chrom, alt but no ref: {}".format(no_case3))
logging.info("number of ids with no ref and alt: {}".format(no_case4))
for _d in value['atc-code']:
restr_atccode_dict(_d)
elif isinstance(value['atc-code'], dict) or isinstance(value['atc-code'], OrderedDict):
restr_atccode_dict(value['atc-code'])
d1['atc_codes'] = atccode_list
d1['targets'] = targets_list
d1['carriers'] = carriers_list
d1['enzymes'] = enzymes_list
d1['transporters'] = transporters_list
d1['predicted_properties'] = pred_properties_dict
d1['products'] = products_list
restr_dict['drugbank'] = d1
restr_dict = unlist(restr_dict)
restr_dict = dict_sweep(restr_dict,vals=[None,".", "-", "", "NA", "none", " ", "Not Available", "unknown","null","None"])
restr_dict = boolean_convert(restr_dict,added_keys=["mddr_like_rule","bioavailability","ghose_filter","rule_of_five"])
restr_dict = value_convert(restr_dict,skipped_keys=["dpd","chemspider","chebi","pubchem_compound","pubchem_substance","bindingdb"])
return restr_dict
def restructure_dict(dictionary):
restr_dict = dict()
restr_dict['_id'] = dictionary['ChEBI ID']
restr_dict['chebi']= dictionary
restr_dict['chebi'] = clean_up(restr_dict['chebi'])
restr_dict = dict_sweep(restr_dict,vals=[None,".", "-", "", "NA", "none", " ", "Not Available",
"unknown","null","None","NaN"])
restr_dict = value_convert_to_number(unlist(restr_dict),skipped_keys=["cid","sid",
"beilstein","pubmed","sabio_rk","gmelin","molbase", "synonyms", "wikipedia","url_stub"])
return restr_dict
def load_data(_file):
f = open(_file,'r',encoding='latin1')
reader = csv.DictReader(f,dialect='excel-tab')
for row in reader:
_dict = restr_dict(row)
_dict = unlist(dict_sweep(_dict))
#_dict["_id"] = _dict["ndc"]["productndc"]
yield _dict
obj_list = []
id_list = []
for _set in cp.ReferenceClinVarAssertion.GenotypeSet.MeasureSet:
variant_id = _set.ID
for _measure in _set.Measure:
json_obj = parse_measure(_measure, hg19=hg19)
if json_obj:
json_obj['clinvar']['rcv'].update({'accession': rcv_accession,
'clinical_significance': clinical_significance,
'number_submitters': number_submitters,
'review_status': review_status,
'last_evaluated': str(last_evaluated),
'origin': origin,
'conditions': conditions})
json_obj['clinvar'].update({'variant_id': variant_id})
json_obj = (dict_sweep(unlist(value_convert_to_number(json_obj,
['chrom', 'omim', 'id', 'orphanet', 'gene',
'rettbase_(cdkl5)', 'cosmic', 'dbrbc'])), [None, '', 'None']))
obj_list.append(json_obj)
id_list.append(json_obj['_id'])
for _obj in obj_list:
_obj['clinvar'].update({'genotypeset': {
'type': 'CompoundHeterozygote',
'genotype': id_list
}})
yield _obj
else:
variant_id = cp.ReferenceClinVarAssertion.MeasureSet.ID
for _measure in cp.ReferenceClinVarAssertion.MeasureSet.Measure:
json_obj = parse_measure(_measure, hg19=hg19)
if json_obj:
json_obj['clinvar']['rcv'].update({'accession': rcv_accession,