Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# filter orientation
if (r.is_reverse and args.orientation == 'fwd') or \
(not r.is_reverse and args.orientation == 'rev'):
return True
# filter quality
if args.quality is not None:
mean_q = np.mean(r.query_qualities)
if mean_q < args.quality:
logger.debug("Filtering {} by quality ({:.2f}).".format(r.query_name, mean_q))
return True
# filter accuracy or alignment coverage
if args.accuracy is not None or args.coverage is not None or args.length is not None:
stats = stats_from_aligned_read(r, bam.references, bam.lengths)
if args.accuracy is not None and stats['acc'] < args.accuracy:
logger.info("Filtering {} by accuracy ({:.2f}).".format(r.query_name, stats['acc']))
return True
if args.coverage is not None and stats['coverage'] < args.coverage:
logger.info("Filtering {} by coverage ({:.2f}).".format(r.query_name, stats['coverage']))
return True
if args.length is not None and stats['read_length'] < args.length:
logger.info("Filtering {} by length ({:.2f}).".format(r.query_name, stats['length']))
return True
# don't filter
return False
# reads should already be trimmed to a common aligment start and end point
reads = [r for r in bam]
ref_end, ref_start = reads[0].reference_end, reads[0].reference_start
ref_len = ref_end - ref_start
if not (all([r.reference_end == ref_end for r in reads]) and
all([r.reference_start == ref_start for r in reads])):
raise ValueError('Alignments have not been trimmed to a common overlap window, try trim_alignments')
# get errors in each read
data = {}
qscores = []
for aln in reads:
errors = get_errors(aln, ref_seq)
counts = count_errors(errors)
# check we got the same error counts as stats_from_aligned_read
stats = stats_from_aligned_read(aln, list(ref_lengths.keys()), list(ref_lengths.values()))
for k in counts.keys():
if stats[k] != counts[k]:
msg = "Error counts {} don't match those from the CIGAR str {}."
raise ValueError(msg.format(counts, {k: stats[k] for k in counts.keys()}))
qscores.append((aln.query_name, get_qscores(counts, ref_len)))
data[aln.query_name] = errors
# get intersection of errors
names = list(data.keys())
common_errors = set(data[names[0]].keys()) # set of reference positions
for name in names[1:]:
common_errors = common_errors.intersection(set(data[name].keys()))
remaining_errors = {}
# loop through common errors, checking ref is the same and retaining the
# error with the shortest edit distance
for rp in common_errors: