How to use the biothings.utils.dataload.unlist function in biothings

To help you get started, we’ve selected a few biothings examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github biothings / mygene.info / src / hub / dataload / sources / pharos / parser.py View on Github external
def load_data(input_file):

    with open_anyfile(input_file) as in_f:
        result = defaultdict(list)
        for line in in_f:
            pharos_id, _id = line.strip().split(',')
            if _id != 'entrez_gene_id' and _id != '0':
                result[str(_id)].append(int(pharos_id))
        for k, v in result.items():
            json_doc = {'_id': str(k),
                        'pharos': {"target_id": v}}
            yield unlist(json_doc)
github biothings / myvariant.info / src / hub / dataload / sources / civic / civic_parser.py View on Github external
else:
                    new_doc['_id'] = 'chr{0}:g.{1}_{2}del'.format(chrom, start, end)
            # handle cases of insertions where only alt info is provided
            elif chrom and alt and not ref:
                no_case3 += 1
                new_doc['_id'] = 'chr{0}:g.{1}_{2}ins{3}'.format(chrom, start, end, alt)
            # handle cases where no ref or alt info provided,
            # in this case, use CIVIC internal ID as the primary id for MyVariant.info, e.g. CIVIC_VARIANT:1
            else:
                no_case4 += 1
                new_doc['_id'] = 'CIVIC_VARIANT:' + str(variant_id)
            for _evidence in doc['evidence_items']:
                if 'disease' in _evidence and 'doid' in _evidence['disease'] and _evidence['disease']['doid']:
                    _evidence['disease']['doid'] = 'DOID:' + _evidence['disease']['doid']
            new_doc['civic'] = doc
            yield dict_sweep(unlist(new_doc),['','null', 'N/A', None, [], {}])
            # change doid into its formal representation, which should be sth like DOID:1
        else:
            continue
    logging.info("number of ids with ref, alt, chrom: {}".format(no_case1))
    logging.info("number of ids with chrom, ref but no alt: {}".format(no_case2))
    logging.info("number of ids with chrom, alt but no ref: {}".format(no_case3))
    logging.info("number of ids with no ref and alt: {}".format(no_case4))
github biothings / mychem.info / src / dataload / contrib / drugbank / drugbank_parser.py View on Github external
for _d in value['atc-code']:                    
                    restr_atccode_dict(_d)  
                    
            elif isinstance(value['atc-code'], dict) or isinstance(value['atc-code'], OrderedDict):                
                restr_atccode_dict(value['atc-code'])
                
       
    d1['atc_codes'] = atccode_list
    d1['targets'] = targets_list
    d1['carriers'] = carriers_list
    d1['enzymes'] = enzymes_list
    d1['transporters'] = transporters_list    
    d1['predicted_properties'] = pred_properties_dict  
    d1['products'] = products_list            
    restr_dict['drugbank'] = d1     
    restr_dict = unlist(restr_dict) 
    restr_dict = dict_sweep(restr_dict,vals=[None,".", "-", "", "NA", "none", " ", "Not Available", "unknown","null","None"])      
    restr_dict = boolean_convert(restr_dict,added_keys=["mddr_like_rule","bioavailability","ghose_filter","rule_of_five"])
    restr_dict = value_convert(restr_dict,skipped_keys=["dpd","chemspider","chebi","pubchem_compound","pubchem_substance","bindingdb"])    
    return restr_dict
github biothings / mychem.info / src / hub / dataload / sources / chebi / chebi_parser.py View on Github external
def restructure_dict(dictionary):
    restr_dict = dict()
    restr_dict['_id'] = dictionary['ChEBI ID']
    restr_dict['chebi']= dictionary
    restr_dict['chebi'] = clean_up(restr_dict['chebi'])
    restr_dict = dict_sweep(restr_dict,vals=[None,".", "-", "", "NA", "none", " ", "Not Available",
        "unknown","null","None","NaN"])
    restr_dict = value_convert_to_number(unlist(restr_dict),skipped_keys=["cid","sid",
        "beilstein","pubmed","sabio_rk","gmelin","molbase", "synonyms", "wikipedia","url_stub"])
    return restr_dict
github biothings / mychem.info / src / dataload / sources / ndc / ndc_packages_parser.py View on Github external
def load_data(_file):
    f = open(_file,'r',encoding='latin1')
    reader = csv.DictReader(f,dialect='excel-tab')
    for row in reader:
        _dict = restr_dict(row)
        _dict = unlist(dict_sweep(_dict))
        #_dict["_id"] = _dict["ndc"]["productndc"]
        yield _dict
github biothings / myvariant.info / src / hub / dataload / sources / clinvar / clinvar_xml_parser.py View on Github external
obj_list = []
        id_list = []
        for _set in cp.ReferenceClinVarAssertion.GenotypeSet.MeasureSet:
            variant_id = _set.ID
            for _measure in _set.Measure:
                json_obj = parse_measure(_measure, hg19=hg19)
                if json_obj:
                    json_obj['clinvar']['rcv'].update({'accession': rcv_accession,
                        'clinical_significance': clinical_significance,
                        'number_submitters': number_submitters,
                        'review_status': review_status,
                        'last_evaluated': str(last_evaluated),
                        'origin': origin,
                        'conditions': conditions})
                    json_obj['clinvar'].update({'variant_id': variant_id})
                    json_obj = (dict_sweep(unlist(value_convert_to_number(json_obj,
                                               ['chrom', 'omim', 'id', 'orphanet', 'gene',
                                                'rettbase_(cdkl5)', 'cosmic', 'dbrbc'])), [None, '', 'None']))
                    obj_list.append(json_obj)
                    id_list.append(json_obj['_id'])
        for _obj in obj_list:
            _obj['clinvar'].update({'genotypeset': {
                    'type': 'CompoundHeterozygote',
                    'genotype': id_list
                    }})
            yield _obj
    else:
        variant_id = cp.ReferenceClinVarAssertion.MeasureSet.ID
        for _measure in cp.ReferenceClinVarAssertion.MeasureSet.Measure:
            json_obj = parse_measure(_measure, hg19=hg19)
            if json_obj:
                json_obj['clinvar']['rcv'].update({'accession': rcv_accession,