How to use the pysam.FastaFile function in pysam

To help you get started, we’ve selected a few pysam examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ga4gh / ga4gh-server / tests / datadriven / test_references.py View on Github external
def __init__(self, referenceSetId, fastaFile):
        super(ReferenceSetTest, self).__init__(referenceSetId, fastaFile)
        self._fastaFile = pysam.FastaFile(fastaFile)
github nanoporetech / medaka / medaka / variant.py View on Github external
'{} does not support decoding of variants'.format(label_scheme))

    if not hasattr(label_scheme, 'decode_consensus'):
        raise AttributeError(
            '{} does not support consensus decoding required '
            'for variant calling.'.format(label_scheme))

    meta_info = label_scheme.variant_metainfo

    ref_names = [r.ref_name for r in regions]
    with medaka.vcf.VCFWriter(
            args.output, 'w', version='4.1',
            contigs=ref_names, meta_info=meta_info) as vcf_writer:
        for reg in regions:
            logger.info("Processing {}.".format(reg))
            ref_seq = pysam.FastaFile(args.ref_fasta).fetch(
                reference=reg.ref_name).upper()

            samples = index.yield_from_feature_files([reg])
            trimmed_samples = trim_samples(samples)
            joined_samples = join_samples(
                trimmed_samples, ref_seq, label_scheme)

            for sample in joined_samples:
                variants = label_scheme.decode_variants(sample, ref_seq)
                vcf_writer.write_variants(variants, sort=True)
github iprada / Circle-Map / alignment.py View on Github external
#!/data/xsh723/anaconda/bin/python3.6
import os
import pysam as ps
import pybedtools as bt
from Bio import pairwise2
from Bio.Seq import Seq
from Bio.Alphabet import generic_dna
import time


begin = time.time()
fastafile = ps.FastaFile("/data/xsh723/scratch/hg38/canonical_hg38/hg38.fa")

os.chdir("/isdata/kroghgrp/xsh723/circle_map/test_data/aligner")
read_length = 100


os.system("samtools index coordinate_circle_qname_sorted_paired_end_sim_aln.bam")
os.system("bedtools genomecov -bg -ibam coordinate_circle_qname_sorted_paired_end_sim_aln.bam | mergeBed > circ_read_coverage.bed")


def is_soft_clipped(read):
    for cigar in read.cigar:
        if cigar[0] == 4:
            return (True)
        else:
            return (False)
github childsish / sofia / templates / genomics / steps / sequence_set.py View on Github external
def get_fasta_set(filename):
        return pysam.FastaFile(filename)
except ImportError:
github cdarby / samovar / pipeline / 4.filter / linkageFilter.py View on Github external
NT = "ACGTN"

parser = argparse.ArgumentParser(description='python siteFeatures.py --bam diploid_output/phased_possorted_bam.bam --bed haploidPos.bed --out diploidBam_haploidPos.pileup')
parser.add_argument('--bam', help='Input bam file name',required=True)
parser.add_argument('--bed', help='Input bed file name (output from classifySite)',required=True)
parser.add_argument('--ref',help="reference genome",required=True)
parser.add_argument('--vcfavoid',help="VCF of variants to NOT use as linkage",required=False,default=None)
args = parser.parse_args()

R = readIntervalFile(args.bed)
if args.vcfavoid is not None:
	V = readVCF(args.vcfavoid)
else:
	V = set()

reference = pysam.FastaFile(args.ref)
samfile = pysam.AlignmentFile(args.bam, "rb")
#for region_index in pymp_context.xrange(len(R)): #pymp.xrange returns an iterator and corresponds to dynamic scheduling.
for region in R:
	#region = R[region_index] 
	chrom = region[0]
	start = int(region[1])
	#end = int(region[2])
	alignments = []
	alignmentStarts = []
	alignmentEnds = []
	readNames = []
	basesAtSite = []
	pileupStart = None
	pileupEnd = None
	XH = []
github igvteam / igv-reports / igv_reports / fasta.py View on Github external
if None == region:

        with open(fasta_file,"r") as f:

            return(f.read())

    else :

        if isinstance(region,str):
            region = regions.parse_region(region)

        chr = region["chr"]
        start = region["start"] - 1
        end = region["end"]

        fasta = pysam.FastaFile(fasta_file)

        slice_seq = fasta.fetch(chr, start, end)

        fasta.close()

        return slice_seq
github YangLab / CIRCexplorer2 / circ2 / parser.py View on Github external
def check_fasta(fa_f, pysam_flag=True):
    if not os.path.isfile(fa_f + '.fai'):
        pysam.faidx(fa_f)
    if pysam_flag:  # return pysam FastaFile object
        fa = pysam.FastaFile(fa_f)
        return fa
    else:  # return fasta file path
        return fa_f
github BuysDB / SingleCellMultiOmics / singlecellmultiomics / fastaProcessing / fastaHandle.py View on Github external
from pysamiterators import CachedFasta
from pysam import FastaFile


class CachedFastaNoHandle(CachedFasta):
    def __init__(self, path: str):
        handle = FastaFile(path)
        CachedFasta.__init__(self, handle)


class FastaNoHandle(FastaFile):
    def __init__(self, path: str):
        handle = FastaFile(path)
        FastaFile.__init__(self, handle)
github SUwonglab / arcsv / arcsv / sv_inference.py View on Github external
print('[inference] adjacency graph:')
        g.print_summary()
        print('')

    altered_reference_file = open(os.path.join(outdir, 'altered.fasta'), 'w')
    altered_reference_data = open(os.path.join(outdir, 'altered.pkl'), 'wb')
    qnames, block_positions, insertion_sizes, del_sizes = [], [], [], []
    simplified_blocks, simplified_paths = [], []
    has_left_flank, has_right_flank = [], []
    sv_logfile = open(os.path.join(outdir, 'logging', 'graph_log.txt'), 'w')
    sv_outfile = open(os.path.join(outdir, 'arcsv_out.tab'), 'w')
    sv_outfile.write(svout_header_line())
    split_outfile = open(os.path.join(outdir, 'split_support.txt'), 'w')
    split_outfile.write(splitout_header_line())

    ref = pysam.FastaFile(reference_files['reference'])

    supp_edges = len([e for e in g.graph.es if e['support'] >= opts['min_edge_support']])
    unsupp_edges = len([e for e in g.graph.es if e['support'] < opts['min_edge_support']
                        and e['support'] > 0])
    sv_logfile.write('graph(nodes/supp edges/unsupp edges)\t{0}\t{1}\t{2}\n'
                     .format(g.size, supp_edges, unsupp_edges))

    subgraphs = []
    for i in range(len(gap_indices) - 1):
        start_block = gap_indices[i]
        end_block = gap_indices[i+1]
        if opts['verbosity'] > 1:
            print('[inference] calling decompose {0} {1}'.format(start_block, end_block))
        s = decompose_graph(opts, g, start_block, end_block)
        subgraphs.extend(s)