Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self, referenceSetId, fastaFile):
super(ReferenceSetTest, self).__init__(referenceSetId, fastaFile)
self._fastaFile = pysam.FastaFile(fastaFile)
'{} does not support decoding of variants'.format(label_scheme))
if not hasattr(label_scheme, 'decode_consensus'):
raise AttributeError(
'{} does not support consensus decoding required '
'for variant calling.'.format(label_scheme))
meta_info = label_scheme.variant_metainfo
ref_names = [r.ref_name for r in regions]
with medaka.vcf.VCFWriter(
args.output, 'w', version='4.1',
contigs=ref_names, meta_info=meta_info) as vcf_writer:
for reg in regions:
logger.info("Processing {}.".format(reg))
ref_seq = pysam.FastaFile(args.ref_fasta).fetch(
reference=reg.ref_name).upper()
samples = index.yield_from_feature_files([reg])
trimmed_samples = trim_samples(samples)
joined_samples = join_samples(
trimmed_samples, ref_seq, label_scheme)
for sample in joined_samples:
variants = label_scheme.decode_variants(sample, ref_seq)
vcf_writer.write_variants(variants, sort=True)
#!/data/xsh723/anaconda/bin/python3.6
import os
import pysam as ps
import pybedtools as bt
from Bio import pairwise2
from Bio.Seq import Seq
from Bio.Alphabet import generic_dna
import time
begin = time.time()
fastafile = ps.FastaFile("/data/xsh723/scratch/hg38/canonical_hg38/hg38.fa")
os.chdir("/isdata/kroghgrp/xsh723/circle_map/test_data/aligner")
read_length = 100
os.system("samtools index coordinate_circle_qname_sorted_paired_end_sim_aln.bam")
os.system("bedtools genomecov -bg -ibam coordinate_circle_qname_sorted_paired_end_sim_aln.bam | mergeBed > circ_read_coverage.bed")
def is_soft_clipped(read):
for cigar in read.cigar:
if cigar[0] == 4:
return (True)
else:
return (False)
def get_fasta_set(filename):
return pysam.FastaFile(filename)
except ImportError:
NT = "ACGTN"
parser = argparse.ArgumentParser(description='python siteFeatures.py --bam diploid_output/phased_possorted_bam.bam --bed haploidPos.bed --out diploidBam_haploidPos.pileup')
parser.add_argument('--bam', help='Input bam file name',required=True)
parser.add_argument('--bed', help='Input bed file name (output from classifySite)',required=True)
parser.add_argument('--ref',help="reference genome",required=True)
parser.add_argument('--vcfavoid',help="VCF of variants to NOT use as linkage",required=False,default=None)
args = parser.parse_args()
R = readIntervalFile(args.bed)
if args.vcfavoid is not None:
V = readVCF(args.vcfavoid)
else:
V = set()
reference = pysam.FastaFile(args.ref)
samfile = pysam.AlignmentFile(args.bam, "rb")
#for region_index in pymp_context.xrange(len(R)): #pymp.xrange returns an iterator and corresponds to dynamic scheduling.
for region in R:
#region = R[region_index]
chrom = region[0]
start = int(region[1])
#end = int(region[2])
alignments = []
alignmentStarts = []
alignmentEnds = []
readNames = []
basesAtSite = []
pileupStart = None
pileupEnd = None
XH = []
if None == region:
with open(fasta_file,"r") as f:
return(f.read())
else :
if isinstance(region,str):
region = regions.parse_region(region)
chr = region["chr"]
start = region["start"] - 1
end = region["end"]
fasta = pysam.FastaFile(fasta_file)
slice_seq = fasta.fetch(chr, start, end)
fasta.close()
return slice_seq
def check_fasta(fa_f, pysam_flag=True):
if not os.path.isfile(fa_f + '.fai'):
pysam.faidx(fa_f)
if pysam_flag: # return pysam FastaFile object
fa = pysam.FastaFile(fa_f)
return fa
else: # return fasta file path
return fa_f
from pysamiterators import CachedFasta
from pysam import FastaFile
class CachedFastaNoHandle(CachedFasta):
def __init__(self, path: str):
handle = FastaFile(path)
CachedFasta.__init__(self, handle)
class FastaNoHandle(FastaFile):
def __init__(self, path: str):
handle = FastaFile(path)
FastaFile.__init__(self, handle)
print('[inference] adjacency graph:')
g.print_summary()
print('')
altered_reference_file = open(os.path.join(outdir, 'altered.fasta'), 'w')
altered_reference_data = open(os.path.join(outdir, 'altered.pkl'), 'wb')
qnames, block_positions, insertion_sizes, del_sizes = [], [], [], []
simplified_blocks, simplified_paths = [], []
has_left_flank, has_right_flank = [], []
sv_logfile = open(os.path.join(outdir, 'logging', 'graph_log.txt'), 'w')
sv_outfile = open(os.path.join(outdir, 'arcsv_out.tab'), 'w')
sv_outfile.write(svout_header_line())
split_outfile = open(os.path.join(outdir, 'split_support.txt'), 'w')
split_outfile.write(splitout_header_line())
ref = pysam.FastaFile(reference_files['reference'])
supp_edges = len([e for e in g.graph.es if e['support'] >= opts['min_edge_support']])
unsupp_edges = len([e for e in g.graph.es if e['support'] < opts['min_edge_support']
and e['support'] > 0])
sv_logfile.write('graph(nodes/supp edges/unsupp edges)\t{0}\t{1}\t{2}\n'
.format(g.size, supp_edges, unsupp_edges))
subgraphs = []
for i in range(len(gap_indices) - 1):
start_block = gap_indices[i]
end_block = gap_indices[i+1]
if opts['verbosity'] > 1:
print('[inference] calling decompose {0} {1}'.format(start_block, end_block))
s = decompose_graph(opts, g, start_block, end_block)
subgraphs.extend(s)
def __init__(self, filename):
self._fh = FastaFile(filename)