Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
l = line.split("\t")
read_id = l.pop(0)
if read_id in region_read_ids:
fast5_file = l.pop(0)
region_fast5_files[str(read_id)] = fast5_file.rstrip()
# --------------------------------------------------------
# PART 5: Make a region BAM and BAI file
# --------------------------------------------------------
new_bam = "reads.bam"
custom_print( "[ Writing to a new BAM file ] \t" + new_bam )
region_reads = pysam.view("-b", o_bam, draft_ga_coords, "-o", new_bam, catch_stdout=False)
new_bam_index = new_bam + ".bai"
custom_print( "[ Writing to a new BAI file ] \t" + new_bam_index )
pysam.index(new_bam, new_bam_index)
# --------------------------------------------------------
# PART 6: With user input ref coords, extract all
# aligned reads within these coordinates
# and make new FASTA file
# --------------------------------------------------------
# detect type of sequences file then handle accordingly
file_type = detect_fa_filetype(o_fa)
new_fa = "reads.fasta"
custom_print( "[ Writing to a new fasta file ]\t" + new_fa )
with open (new_fa, "w") as fout:
if ".gz" in file_type:
with gzip.open(o_fa, "rt") as fin:
if "fasta.gz" in file_type:
for title, seq in SimpleFastaParser(handle):
name = title.split(None, 1)[0]
stdout=subprocess.PIPE)
PairDict = sam_parser(ngmlrline, ngmlr_ext_dir)
else:
sam_nglmr_file = os.path.join(ngmlr_ext_dir, (prefix + '.sam'))
log.debug(
'[Alignment][ngmlral] - ngmlr -t %s -r %s -q %s -o %s -x ont' % (th, ref, fast_Q_file, sam_ngmlr_file))
ngmlrline = subprocess.Popen(
['ngmlr', '-t', str(th), '-r', str(ref), '-q', str(fast_Q_file), '-o', str(sam_ngmlr_file), '-x ont'],
stdout=subprocess.PIPE).wait()
outputfilebam = os.path.join(ngmlr_ext_dir, (prefix + '.tmp.bam'))
log.debug('[Alignment][ngmlral] - samtools view -Sb -@ %s %s -o %s' % (th, sam_nglmr_file, outputfilebam))
pysam.view("-Sb", "-@%s" % str(th), sam_nglmr_file, "-o%s" % outputfilebam, catch_stdout=False)
os.remove(sam_nglmr_file)
pysam.sort(outputfilebam, "-o%s" % bam_ngmlr_file, catch_stdout=False)
log.debug('[Alignment][ngmlral] - samtools index %s -@%s' % (bam_ngmlr_file, str(th)))
pysam.index(bam_ngmlr_file, "-@%s" % str(th), catch_stdout=False)
os.remove(outputfilebam)
else:
log.warning('[Alignment][ngmlral] - file %s already exists!' % bam_ngmlr_file)
try:
shutil.move(ngmlr_ext_dir, os.path.join(work_dir, out_dir))
except shutil.Error:
log.error("Unable to move %s" % ngmlr_ext_dir)
self._assertPathEmpty(destPath, inRepo=True)
self._moveFile(filePath, destPath, moveMode)
# move the index file if it exists, otherwise do indexing
indexPath = os.path.join(
os.path.split(filePath)[0],
readGroupSetName + self.bamIndexExtension)
indexMessage = ""
if os.path.exists(indexPath):
dstDir = os.path.split(destPath)[0]
self._moveFile(
indexPath,
os.path.join(dstDir, os.path.basename(indexPath)),
moveMode)
else:
pysam.index(destPath.encode('utf-8'))
indexMessage = " (and indexed)"
# finish
self._repoEmit("ReadGroupSet '{}' added to dataset '{}'{}".format(
fileName, datasetName, indexMessage))
@transform(reduceBamToChr19, suffix(".bam"), ".bam.bai")
def indexBam(infile, outfile):
'''
index the reduced bam file
'''
pysam.index(infile)
"""
mode = 'wb' if bam else 'w'
with pysam.AlignmentFile(fname, mode, header=header) as fh:
for ref_id, subreads in enumerate(alignments):
for aln in sorted(subreads, key=lambda x: x.rstart):
a = pysam.AlignedSegment()
a.reference_id = ref_id
a.query_name = aln.qname
a.query_sequence = aln.seq
a.reference_start = aln.rstart
a.cigarstring = aln.cigar
a.flag = aln.flag
a.mapping_quality = 60
fh.write(a)
if mode == 'wb':
pysam.index(fname)
sampName = sampName.replace(')','') # for R
if d.get(sampName,None):
print >> sys.stdout, '##ERROR: Duplicate input %s - ignored!!' % sampName
continue
d[sampName] = {} # new data
outreads = 0 # count of reads outside each bed region for library size normalization
ignoredoutreads = 0
outchrom = None
outstart = None
outend = None
notmapped = 0
# pysam expects to find infile.bai
bam_index = "%s.bai" % f
if (not os.path.isfile(bam_index)):
print >> sys.stdout,'indexing ',f
pysam.index(f)
samfile = pysam.Samfile( f, "rb" )
nbam = 0
for c in chroms:
try:
nbam += samfile.count(c,1,999999999) # for non bed region count
except:
pass
print 'bam %s has %d total reads' % (f,nbam)
ignored = 0 # this is per sample over all regions
ignoredoutreads = 0 # ditto but for total reads outside regions fudgtimate
for bedrec in dat:
hits = 0 # count by contig
chrom = bedrec[0].strip()
start = int(bedrec[1]) # new contig start becomes old inter-region end
end = int(bedrec[2])
regionID = bedrec[3]
def sort_and_index(file_name, sorted_prefix=None):
""" Sorts and indexes the bam file given by file_name.
"""
if sorted_prefix is None:
sorted_prefix = file_name.replace('.bam', '') + '_sorted'
sorted_name = sorted_prefix + '.bam'
log_subprocess.check_call(['samtools','sort', '-o', sorted_name, file_name])
pysam.index(sorted_name)
def indexed_bam(bam_file):
if not os.path.exists(bam_file + '.bai'):
pysam.index(bam_file)
bam = pysam.Samfile(bam_file, 'rb')
yield bam
bam.close()
input_sam = pysam.Samfile(input_file, "rb")
input_nreads = 0
for read in input_sam:
input_nreads += 1
if input_nreads > sample_nreads:
E.info("INPUT bam has %s reads, SAMPLE bam has %s reads"
% (input_nreads, sample_nreads))
E.info("INPUT being downsampled to match SAMPLE")
input_outfile = normalize(input_file,
input_nreads,
input_outfile,
sample_nreads)
shutil.copyfile(sample_file, sample_outfile)
pysam.index(sample_outfile)
return sample_outfile, input_outfile
elif sample_nreads > input_nreads:
E.info("SAMPLE bam has %s reads, INPUT bam has %s reads"
% (sample_nreads, input_nreads))
E.info("SAMPLE being downsampled to match INPUT")
sample_outfile = normalize(sample_file,
sample_nreads,
sample_outfile,
input_nreads)
shutil.copyfile(input_file, input_outfile)
pysam.index(input_outfile)
return sample_outfile, input_outfile