Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
assert alns[0].cigarstring == "21M"
assert alns[0].reference_start == 30
assert alns[0].reference_end == 51
assert alns[0].is_reverse
qs = "<<<<<<<:<9/,&,22;;<<<"
read.query_qualities = pysam.qualitystring_to_array(qs)
alns = genome_source.align(Alignment(read))
import warnings
with warnings.catch_warnings():
# this is a python 2/3 incompatibility I think, where the warning
# indicates array.tostring() is deprecated but array.tobytes()
# only exists in py3
warnings.simplefilter("ignore")
assert pysam.qualities_to_qualitystring(alns[0].query_qualities) == qs[::-1]
mdtag=None,
name="dummy",
mapq=10,
baseq=30,
reference_start=0,
reference_id=0):
read = pysam.AlignedSegment()
read.seq = seq
read.cigarstring = cigar
if mdtag:
read.set_tag("MD", mdtag)
read.qname = name
read.mapq = mapq
read.reference_start = reference_start
read.reference_id = reference_id
qualities_string = pysam.qualities_to_qualitystring([baseq] * len(seq))
read.qual = qualities_string.encode("ascii")
return read
mdtag=None,
name="dummy",
mapq=10,
baseq=30,
reference_start=0,
reference_id=0):
read = pysam.AlignedSegment()
read.seq = seq
read.cigarstring = cigar
if mdtag:
read.set_tag("MD", mdtag)
read.qname = name
read.mapq = mapq
read.reference_start = reference_start
read.reference_id = reference_id
qualities_string = pysam.qualities_to_qualitystring([baseq] * len(seq))
read.qual = qualities_string.encode("ascii")
return read
firstseq = r1.query_sequence
secondseq = r2.query_sequence
# Check if sequences match : # they won't match if there are soft clips
if firstseq == secondseq :
if ForwardRead and PairedRead : del forwardcache[d] # assume that if sequences are same, reads are same too (mapper should treat them in a similar way)
elif ForwardRead : del forwardsingle[d]
elif not ForwardRead and PairedRead : del reversecache[d], positioncache[d]
else : del reversesingle[d], positionsingle[d]
return
newqual = pysam.qualities_to_qualitystring(r1.query_qualities)
secondqual = pysam.qualities_to_qualitystring(r2.query_qualities)
if not ForwardRead : # reverse seq and qual
firstseq = firstseq[::-1]
secondseq = secondseq[::-1]
newqual = newqual[::-1]
secondqual = secondqual[::-1]
# Check that the MD tag actually exists
try:
r1_MD = r1.get_tag('MD')
r2_MD = r2.get_tag('MD')
except KeyError:
sys.stderr.write("Error: MD tag doesn't exist in input file\n")
sys.exit(1)
# Turn seq and qual into a list
r2 = reversesingle[d]
firstseq = r1.query_sequence
secondseq = r2.query_sequence
# Check if sequences match : # they won't match if there are soft clips
if firstseq == secondseq :
if ForwardRead and PairedRead : del forwardcache[d] # assume that if sequences are same, reads are same too (mapper should treat them in a similar way)
elif ForwardRead : del forwardsingle[d]
elif not ForwardRead and PairedRead : del reversecache[d], positioncache[d]
else : del reversesingle[d], positionsingle[d]
return
newqual = pysam.qualities_to_qualitystring(r1.query_qualities)
secondqual = pysam.qualities_to_qualitystring(r2.query_qualities)
if not ForwardRead : # reverse seq and qual
firstseq = firstseq[::-1]
secondseq = secondseq[::-1]
newqual = newqual[::-1]
secondqual = secondqual[::-1]
# Check that the MD tag actually exists
try:
r1_MD = r1.get_tag('MD')
r2_MD = r2.get_tag('MD')
except KeyError:
sys.stderr.write("Error: MD tag doesn't exist in input file\n")
sys.exit(1)
if cigarlist[-1] != 4 :
MinFlankStart = 0
# Check if the MD tag actually exists
try:
row_MD = row.get_tag('MD')
except KeyError:
sys.stderr.write("Error: MD tag doesn't exist in input file\n")
sys.exit(1)
mincut, maxcut = FindCutoffIndices(newcigar, MinFlankEnd, MinFlankStart, ForwardRead)
# If read does not need to be split, just copy previous read object to new list
if splits == 0 and newcigar == row.cigar :
newqual = DiscardVariants(pysam.qualities_to_qualitystring(row.query_qualities), row_MD, newcigar, mincut, maxcut)
newrow = CreateReadObject(row, row.query_sequence, newqual, newcigar, row.reference_start)
yield newrow
elif splits == 0 : # cigar, seq, qual have been trimmed
newseq = row.query_sequence[start:end]
newqual = pysam.qualities_to_qualitystring(row.query_qualities)[start:end]
newqual = DiscardVariants(newqual, row_MD, newcigar, mincut, maxcut)
newrow = CreateReadObject(row, newseq, newqual, newcigar, row.reference_start)
yield newrow
else :
if newcigar != row.cigar :
row.cigar = newcigar
q = pysam.qualities_to_qualitystring(row.query_qualities)[start:end]
# ===== Write consensus bam =====
if tag_dict[tag] == 2:
doubletons += 1
doubleton_bam.write(SSCS_read)
else:
SSCS_bam.write(SSCS_read)
SSCS_reads += 1
# ===== write as fastq file =====
#if SSCS_read.is_reverse and SSCS_read.is_read1:
#fastq_seq = SSCS_read.query_sequence.decode("utf-8")
fastq_seq = SSCS[0]
if 'rev' in tag:
fastq_seq = reverse_seq(fastq_seq)
fastq_qual = pysam.qualities_to_qualitystring(reversed(SSCS[1]))
else:
fastq_qual = pysam.qualities_to_qualitystring(SSCS[1])
if tag_dict[tag] == 2:
if 'R1' in tag:
doubleton_fastqFile1.write('@{}\n{}\n+\n{}\n'.format(SSCS_read.query_name, fastq_seq, fastq_qual))
else:
doubleton_fastqFile2.write('@{}\n{}\n+\n{}\n'.format(SSCS_read.query_name, fastq_seq, fastq_qual))
else:
if 'R1' in tag:
fastqFile1.write('@{}\n{}\n+\n{}\n'.format(SSCS_read.query_name, fastq_seq, fastq_qual))
else:
fastqFile2.write('@{}\n{}\n+\n{}\n'.format(SSCS_read.query_name, fastq_seq, fastq_qual))
# Remove read from dictionary after writing
del bam_dict[tag]
def quality_array_to_string(quality_list):
"""Convert list of phred quality values to string.
:param quality_list: List of phred quality scores.
:returns: Quality string.
:rtype: str
"""
return pysam.qualities_to_qualitystring(quality_list)
def CreateSplitRead(row, position, k, i) :
startread = position
newcigar = []
newstring = ''
oldseq = row.query_sequence
oldqual = pysam.qualities_to_qualitystring(row.query_qualities)
newqual = ''
while k < len(row.cigar) :
cigartype, cigarlength = row.cigar[k]
# cigartype 1 stands for insertion
# in this case, position should be kept as it is
if cigartype == 1 :
newstring += oldseq[i:(cigarlength+i)]
newqual += oldqual[i:(cigarlength+i)]
i += cigarlength
newcigar.append((cigartype, cigarlength))
k += 1
# cigartype 2 stands for deletion