How to use the pomoxis.stats_from_bam.stats_from_aligned_read function in pomoxis

To help you get started, we’ve selected a few pomoxis examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nanoporetech / pomoxis / pomoxis / subsample_bam.py View on Github external
# filter orientation
    if (r.is_reverse and args.orientation == 'fwd') or \
        (not r.is_reverse and args.orientation == 'rev'):
        return True

    # filter quality
    if args.quality is not None:
        mean_q = np.mean(r.query_qualities)
        if mean_q < args.quality:
            logger.debug("Filtering {} by quality ({:.2f}).".format(r.query_name, mean_q))
            return True

    # filter accuracy or alignment coverage
    if args.accuracy is not None or args.coverage is not None or args.length is not None:
        stats = stats_from_aligned_read(r, bam.references, bam.lengths)
        if args.accuracy is not None and stats['acc'] < args.accuracy:
            logger.info("Filtering {} by accuracy ({:.2f}).".format(r.query_name, stats['acc']))
            return True
        if args.coverage is not None and stats['coverage'] < args.coverage:
            logger.info("Filtering {} by coverage ({:.2f}).".format(r.query_name, stats['coverage']))
            return True
        if args.length is not None and stats['read_length'] < args.length:
            logger.info("Filtering {} by length ({:.2f}).".format(r.query_name, stats['length']))
            return True
    # don't filter
    return False
github nanoporetech / pomoxis / pomoxis / common_errors_from_bam.py View on Github external
# reads should already be trimmed to a common aligment start and end point
    reads = [r for r in bam]
    ref_end, ref_start = reads[0].reference_end, reads[0].reference_start
    ref_len = ref_end - ref_start
    if not (all([r.reference_end == ref_end for r in reads]) and
            all([r.reference_start == ref_start for r in reads])):
        raise ValueError('Alignments have not been trimmed to a common overlap window, try trim_alignments')

    # get errors in each read
    data = {}
    qscores = []
    for aln in reads:
        errors = get_errors(aln, ref_seq)
        counts = count_errors(errors)
        # check we got the same error counts as stats_from_aligned_read
        stats = stats_from_aligned_read(aln, list(ref_lengths.keys()), list(ref_lengths.values()))
        for k in counts.keys():
            if stats[k] != counts[k]:
                msg = "Error counts {} don't match those from the CIGAR str {}."
                raise ValueError(msg.format(counts, {k: stats[k] for k in counts.keys()}))
        qscores.append((aln.query_name, get_qscores(counts, ref_len)))
        data[aln.query_name] = errors

    # get intersection of errors
    names = list(data.keys())
    common_errors = set(data[names[0]].keys())  # set of reference positions
    for name in names[1:]:
        common_errors = common_errors.intersection(set(data[name].keys()))
    remaining_errors = {}
    # loop through common errors, checking ref is the same and retaining the
    # error with the shortest edit distance
    for rp in common_errors: