How to use the pysam.TabixFile function in pysam

To help you get started, we’ve selected a few pysam examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github henryjuho / parus_indel / summary_analyses / wga2phy.py View on Github external
action='store_true')
    parser.add_argument('-contig_key',
                        help=argparse.SUPPRESS,
                        default='{}/parus_indel/annotation/contigs_key.txt'.format(os.getenv('HOME')))
    args = parser.parse_args()

    # submission loop
    if args.sub is True:
        command_line = [' '.join([x for x in sys.argv if x != '-sub' and x != '-evolgen'])]
        out_stem = args.cds_fa.split('/')[-1].replace('.fna.gz', '')
        q_sub(command_line, out=args.out_dir + out_stem, evolgen=args.evolgen, t=96)
        sys.exit()

    # variables
    fa = args.cds_fa
    wga = pysam.TabixFile(args.wga_bed)
    out_dir = args.out_dir
    if not os.path.isdir(out_dir):
        os.makedirs(out_dir)
    contig_data = contig_dict(args.contig_key)

    # loop through fasta
    sequence, chromo, coords, skip, trans_name, method = '', '', [], False, '', ''
    for line in gzip.open(fa):

        # skip sequence lines following skipped header
        if skip is True:
            if not line.startswith('>'):
                continue
            else:
                skip = False
github hall-lab / svtools / scripts / cnvnator_merge_lowmem_cov.py View on Github external
def run_from_args(args):
  #cp = cProfile.Profile()
  info, carriers=get_info(args.lmerged_vcf, args.chrom, args.sample_list)
  if args.dry_run_info_file!='':
    info.to_csv(args.dry_run_info_file)
    exit(1)
  info=prune_info(info)
  component_pos=info.groupby(['comp']).agg({'chr': 'first', 'varstart': np.min, 'varstop': np.max}).reset_index()

  cntab=pysam.TabixFile(args.cnfile)
  cn=get_cndata(0, component_pos, info, cntab, args.verbose)
  if args.verbose:
    sys.stderr.write("Memory usage info: %s\n" % (resource.getrusage(resource.RUSAGE_SELF).ru_maxrss))
  nind=np.unique(cn.loc[cn.comp==0, 'id']).shape[0]
  print(str(nind))
  outf1=open(args.outfile, "w")
  outf2=open(args.diag_outfile, "w")
  header='\t'.join(['#comp', 'cluster', 'dist_cluster', 'start', 'stop', 'nocl', 'bic', 'mean_sep', 'mean_offset', 'cov', 'wts', 'cn_med', 'cn_mad', 'info_ncarriers', 'is_rare', 'mm_corr', 'dist','nvar', 'score', 'ptspos', 'ptsend', 'prpos', 'prend', 'is_winner'])
  outf1.write(header+"\n")
  outf2.write(header+"\n")

  for comp in component_pos.comp.unique():
    if(comp==args.test_comp) or (args.test_comp==-1):  
      cn=get_cndata(comp, component_pos, info, cntab, args.verbose)                  
      if args.verbose:
        sys.stderr.write("Memory usage info: %s\n" % (resource.getrusage(resource.RUSAGE_SELF).ru_maxrss))
github PubuduSaneth / cnvScan / src / cnvScan_VarFilt.py View on Github external
def process(self):
        if self.genelist:
            gene_list = pysam.TabixFile(self.genelist)

        with open(self.input, 'r') as fin:
            with open(self.output, 'w') as fout:
                for line in fin:

                    line = line.strip()
                    if line.startswith('#chr'):
                        header = line.split('\t')
                        if self.genelist:
                            header += ['PIDD_GENE', 'Inheritance', 'Phenotype']
                        fout.write('\t'.join(header) + '\n')
                        continue
                    elif line.startswith('##'):
                        continue

                    try:
github phenopolis / phenopolis / pre_analysis / VEP / postprocess_VEP_json.py View on Github external
def vcf_query(chrom=None, pos=None, ref=None, alt=None, variant_str=None, individual=None, verbose=False, limit=100, release='mainset_July2016'):
    if variant_str:
        variant_str=str(variant_str).strip().replace('_','-')
        chrom, pos, ref, alt = variant_str.split('-')
    #tb=pysam.TabixFile('/slms/UGI/vm_exports/vyp/phenotips/uclex_files/current/chr%s.vcf.gz' % chrom,)
    tb=pysam.TabixFile('/SAN/vyplab/UCLex/current/%s_chr%s.vcf.gz' % (release, chrom,))
    #mainset_February2016_chrX_filtered.vcf.gz
    region=str('%s:%s-%s'%(chrom, pos, int(pos),))
    headers=[h for h in tb.header]
    headers=(headers[len(headers)-1]).strip().split('\t')
    records=tb.fetch(region=region)
    records=[r.split('\t') for r in records]
    def response(POS, REF, ALT, index, geno, chrom, pos):
        alleles=[geno['REF']]+geno['ALT'].split(',')
        homozygous_genotype='/'.join([str(index),str(index)])
        heterozygous_genotype='/'.join(['0',str(index)])
        variant=dict()
        variant['POS']=POS
        variant['REF']=REF
        variant['ALT']=ALT
        variant['index']=index
        variant['variant_id']='-'.join([str(chrom),str(POS),variant['REF'],variant['ALT']])
github hall-lab / svtools / scripts / cnvnator_merge_memory.py View on Github external
def run_from_args(args):
  #cp = cProfile.Profile()
  sys.stderr.write("Memory usage start: %s\n" % (resource.getrusage(resource.RUSAGE_SELF).ru_maxrss))
  info, carriers=get_info(args.lmerged_vcf, args.chrom, args.sample_list)
  component_pos=info.groupby(['comp']).agg({'chr': 'first', 'varstart': np.min, 'varstop': np.max}).reset_index()
  sys.stderr.write("Memory usage info: %s\n" % (resource.getrusage(resource.RUSAGE_SELF).ru_maxrss))
  sys.stderr.write("cntab\n")
  cntab=pysam.TabixFile(args.cnfile)
  cn=get_cndata(0, component_pos, info, cntab)
  sys.stderr.write("Memory usage cntab: %s\n" % (resource.getrusage(resource.RUSAGE_SELF).ru_maxrss))
  nind=np.unique(cn.loc[cn.comp==0, 'id']).shape[0]
  print(str(nind))
  outf1=open(args.outfile, "w")
  outf2=open(args.diag_outfile, "w")
  header='\t'.join(['#comp', 'cluster', 'dist_cluster', 'start', 'stop', 'nocl', 'bic', 'mm', 'kk', 'cn_med', 'cn_mad', 'info_ncarriers', 'is_rare', 'mm_corr', 'dist','nvar', 'score', 'ptspos', 'ptsend', 'prpos', 'prend', 'is_winner'])
  outf1.write(header+"\n")
  outf2.write(header+"\n")

  for comp in component_pos.comp.unique():
    if (comp>0) and (comp%100==0):
      cn=get_cndata(comp, component_pos, info, cntab)
                      
    sys.stderr.write("comp="+str(comp)+"\n")
    sys.stderr.write("Memory usage: %s\n" % (resource.getrusage(resource.RUSAGE_SELF).ru_maxrss))
github henryjuho / parus_indel / summary_analyses / callable_sites_summary_nogff.py View on Github external
def bed_regions(bed_file, chromo):
    open_bed = pysam.TabixFile(bed_file)
    for line in open_bed.fetch(chromo, parser=pysam.asTuple()):
        start = int(line[1])
        end = int(line[2])

        yield start, end
github deeptools / pyGenomeTracks / pygenometracks / tracks / BedGraphTrack.py View on Github external
def load_file(self):
        self.tbx = None
        # try to load a tabix file is available
        if self.properties['file'].endswith(".bgz"):
            # from the tabix file is not possible to know the
            # global min and max
            try:
                self.tbx = pysam.TabixFile(self.properties['file'])
            except IOError:
                self.interval_tree, __, __ = file_to_intervaltree(self.properties['file'],
                                                                  self.properties['region'])
        # load the file as an interval tree
        else:
            self.interval_tree, __, __ = file_to_intervaltree(self.properties['file'],
                                                              self.properties['region'])

        self.num_fields = None
github igvteam / igv-reports / igv_reports / tabix.py View on Github external
def get_data(filename, region):

    tb = pysam.TabixFile(filename)
    if region:
        range_string = region['chr'] + ":" + str(region['start']) + "-" + str(region['end'])
        it = tb.fetch(range_string)
    else:
        it = tb.fetch()

    data=""
    for row in it:
        data += row

    return data
github jrderuiter / geneviz / src / geneviz / util / tabix.py View on Github external
def _open_file(self) -> pysam.TabixFile:
        # Open gtf file.
        tabix_file = pysam.TabixFile(self._file_path, parser=self._parser)

        # Yield file object and ensure it is closed.
        try:
            yield tabix_file
        finally:
            tabix_file.close()
github churchill-lab / g2gtools / g2gtools / vci.py View on Github external
_chrom = 0
        _pos = 1
        _shared = 2
        _deleted = 3 if not reverse else 4
        _inserted = 4 if not reverse else 3
        _fragment = 5

        num_lines_chrom = 0
        num_lines_processed = 0

        pos_from = 0
        pos_to = 0

        tree = IntervalTree()

        tabix_file = pysam.TabixFile(filename)
        iterator = tabix_file.fetch(contig, parser=pysam.asTuple())

        LOG.info("Parsing VCI, contig: {}".format(contig))

        for rec in iterator:
            num_lines_chrom += 1

            if len(rec) != 6:
                raise exceptions.G2GError("Unexpected line in VCI file: {0}".format(rec))

            if rec[2] == '.':
                continue

            """
            1 3000019 G . A 3000019
            1 3003170 A . T 3151