How to use edlib - 10 common examples

To help you get started, we’ve selected a few edlib examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github rrwick / Bacsort / rmlst_distance_matrix.py View on Github external
if assembly_1 == assembly_2:
        distances[(assembly_1, assembly_2)] = 0.0
        return

    pattern = re.compile(r'\d+[\w=]')
    common_genes = set(gene_seqs[assembly_1]) & set(gene_seqs[assembly_2])
    if not common_genes:
        distances[(assembly_1, assembly_2)] = 1.0
        distances[(assembly_2, assembly_1)] = 1.0
        return

    alignments = []
    for gene in common_genes:
        gene_1 = gene_seqs[assembly_1][gene]
        gene_2 = gene_seqs[assembly_2][gene]
        result = edlib.align(gene_1, gene_2, 'NW', 'path')
        cigar = [(int(x[:-1]), x[-1]) for x in pattern.findall(result['cigar'])]
        alignment_length = sum(x[0] for x in cigar)
        match_count = sum(x[0] for x in cigar if x[1] == '=')
        identity = match_count / alignment_length
        alignments.append((identity, match_count, alignment_length))

    # alignments = sorted(alignments, key=lambda x: x[0])
    # discard_count = 0
    # while True:
    #     if (discard_count + 2) / len(alignments) < discard_best_worst_alignments:
    #         discard_count += 2
    #     else:
    #         break
    # discard_each_end = discard_count // 2
    # alignments = alignments[discard_count:-discard_count]
github nextgenusfs / amptk / amptk / amptklib.py View on Github external
def parseMappingFileIllumina(input):
    fwdprimer = ''
    revprimer = ''
    samples = []
    with open(input, 'r') as inputfile:
        for line in inputfile:
            line = line.replace('\n', '')
            if line.startswith('#'):
                continue
            cols = line.split('\t')
            if not cols[0] in samples:
                samples.append(cols[0])
            match = edlib.align(cols[1], cols[2], mode="HW", k=0)
            if match["editDistance"] == 0:
                Trim = match["locations"][0][1]+1
                if fwdprimer == '':
                    fwdprimer = cols[2][Trim:]
                    revprimer = cols[3]
            else:
                fwdprimer = cols[2]
                revprimer = cols[3]
        return (samples, fwdprimer, revprimer)
github nextgenusfs / amptk / amptk / amptklib.py View on Github external
def trimForPrimer(primer, seq, primer_mismatch):
    foralign = edlib.align(primer, seq, mode="HW", k=primer_mismatch, additionalEqualities=degenNuc)
    if foralign['editDistance'] < 0:
        return 0
    else:
        CutPos = foralign["locations"][0][1]+1
        return CutPos
github nextgenusfs / amptk / amptk / amptklib.py View on Github external
NoPrimer += 1
                        continue
                    stringent = {}
                    stringent[BCLabel] = samples[BCLabel]['RevBarcode']
                    revBC, revBCLabel = AlignBarcode(read2[1], stringent, barcode_mismatch)
                    if revBC == '':
                        NoRevBarcode += 1
                        continue
                    #look for reverse primer in reverse read
                    revalign = edlib.align(RevPrimer, read2[1], mode="HW", k=primer_mismatch, additionalEqualities=degenNuc)
                    if revalign['editDistance'] < 0: #not found
                        NoRevPrimer += 1
                        continue
                else:
                    #look for forward primer
                    foralign = edlib.align(fwdprimer, read1[1], mode="HW", k=primer_mismatch, additionalEqualities=degenNuc)
                    if foralign['editDistance'] < 0: #not found
                        NoPrimer += 1
                        continue
                    if len(revbarcodes) > 0:
                        #look for valid revbarcodes
                        if len(revbarcodes) > 0:
                            revBC, revBCLabel = AlignBarcode(read2[1], revbarcodes, barcode_mismatch)
                            if revBC == '':
                                NoRevBarcode += 1
                                continue
                    #look for reverse primer in reverse read
                    revalign = edlib.align(revprimer, read2[1], mode="HW", k=primer_mismatch, additionalEqualities=degenNuc)
                    if revalign['editDistance'] < 0: #not found
                        NoRevPrimer += 1
                        continue
                #if get here, then all is well, construct new header and trim reads
github iprada / Circle-Map / circlemap / utils.py View on Github external
nuc_and_ops =  np.array([1,2,3,4,5,6,7])
    encoded_nucs = number_encoding(soft_clipped_read['seq'])

    hits = 0

    min_score = len(soft_clipped_read['seq'])


    top_hits = {}


    if read.is_reverse:

        while hits < n_hits and min_score >= -10:

            alignment = edlib.align(soft_clipped_read['seq'], minus_strand, mode='HW', task='path')
            if hits ==0:
                if alignment['editDistance'] > max_edit:
                    return(None)



            for location in alignment['locations']:



                mask_bases = 'X' * ( location[1] - location[0])


                minus_strand = minus_strand[:location[0]] + mask_bases + minus_strand[location[1]:]

                hits += 1
github Genomon-Project / GenomonSV / genomon_sv / edlibFunction.py View on Github external
# [2]=ref1,[3]=ref1_rev,
                # [4]=ref2,[5]=ref2_rev,
                # [6]=ref, [7]=ref_rev
                edlib_ret = [100,100,100,100,100,100,100,100]

                # alt
                ret = edlib.align(line, fa_alt, mode="HW", task="path")
                edlib_ret[0] = ret["editDistance"]
                ret = edlib.align(rc_line, fa_alt, mode="HW", task="path")
                edlib_ret[1] = ret["editDistance"]

                if fa_ref == "":
                    # ref1
                    ret = edlib.align(line, fa_ref1, mode="HW", task="path")
                    edlib_ret[2] = ret["editDistance"]
                    ret = edlib.align(rc_line, fa_ref1, mode="HW", task="path")
                    edlib_ret[3] = ret["editDistance"]
                    # ref2
                    ret = edlib.align(line, fa_ref2, mode="HW", task="path")
                    edlib_ret[4] = ret["editDistance"]
                    ret = edlib.align(rc_line, fa_ref2, mode="HW", task="path")
                    edlib_ret[5] = ret["editDistance"]
                else:
                    # ref
                    ret = edlib.align(line, fa_ref, mode="HW", task="path")
                    edlib_ret[6] = ret["editDistance"]
                    ret = edlib.align(rc_line, fa_ref, mode="HW", task="path")
                    edlib_ret[7] = ret["editDistance"]

                if read_pair == 1:
                    d_edlib_read1[name] = edlib_ret
                else:
github Genomon-Project / GenomonSV / genomon_sv / edlibFunction.py View on Github external
edlib_ret[1] = ret["editDistance"]

                if fa_ref == "":
                    # ref1
                    ret = edlib.align(line, fa_ref1, mode="HW", task="path")
                    edlib_ret[2] = ret["editDistance"]
                    ret = edlib.align(rc_line, fa_ref1, mode="HW", task="path")
                    edlib_ret[3] = ret["editDistance"]
                    # ref2
                    ret = edlib.align(line, fa_ref2, mode="HW", task="path")
                    edlib_ret[4] = ret["editDistance"]
                    ret = edlib.align(rc_line, fa_ref2, mode="HW", task="path")
                    edlib_ret[5] = ret["editDistance"]
                else:
                    # ref
                    ret = edlib.align(line, fa_ref, mode="HW", task="path")
                    edlib_ret[6] = ret["editDistance"]
                    ret = edlib.align(rc_line, fa_ref, mode="HW", task="path")
                    edlib_ret[7] = ret["editDistance"]

                if read_pair == 1:
                    d_edlib_read1[name] = edlib_ret
                else:
                    d_edlib_read2[name] = edlib_ret

    numRef = 0 
    numAlt = 0
    numOther = 0
    for name in d_edlib_read1.keys():
        ret1 = d_edlib_read1[name]
        ret2 = d_edlib_read2[name]
github nextgenusfs / amptk / amptk / amptklib.py View on Github external
multihits = 0
    findForPrimer = 0
    findRevPrimer = 0
    with open(outR1, 'w') as outfile1:
        with open(outR2, 'w') as outfile2:
            for read1, read2 in zip(file1, file2):
                Total += 1
                ffp = False
                frp = False
                R1Seq = read1[1][:RL]
                R1Qual = read1[2][:RL]
                R2Seq = read2[1][:RL]
                R2Qual = read2[2][:RL]
                ForTrim, RevTrim = (0,)*2
                #look for forward primer in forward read
                R1foralign = edlib.align(fwdprimer, R1Seq, mode="HW", k=primer_mismatch, additionalEqualities=degenNuc)
                if R1foralign['editDistance'] < 0:
                    if require_primer == 'on' or full_length: #not found
                        continue
                else:
                    if len(R1foralign['locations']) > 1: #multiple hits
                        multihits += 1
                        continue
                    try:
                        ForTrim = R1foralign["locations"][0][1]+1
                        findForPrimer += 1
                        ffp = True
                    except IndexError:
                        pass
                R1revalign = edlib.align(RevComp(revprimer), R1Seq, mode="HW", k=primer_mismatch, additionalEqualities=degenNuc)
                if R1revalign['editDistance'] < 0:
                    R1RevCut = RL
github nextgenusfs / amptk / amptk / process_ont.py View on Github external
def findFwdPrimer(primer, sequence, mismatch, equalities):
    # search for match
    align = edlib.align(primer, sequence, mode="HW",
                        k=int(mismatch), additionalEqualities=equalities)
    if align["editDistance"] >= 0:  # we found a hit
        TrimPos = align["locations"][0][1]+1
        return(TrimPos, align['editDistance'])
    else:  # return position will be False if not found
        return (False, False)
github nextgenusfs / amptk / amptk / amptklib.py View on Github external
def findRevPrimer(primer, sequence, mismatch, equalities):
    #trim position
    TrimPos = None
    #search for match
    align = edlib.align(primer, sequence, mode="HW", task="locations", k=int(mismatch), additionalEqualities=equalities)
    if align["editDistance"] >= 0: #we found a hit
        TrimPos = align["locations"][0][0]
    #return position will be None if not found
    return TrimPos

edlib

Lightweight, super fast library for sequence alignment using edit (Levenshtein) distance.

MIT
Latest version published 16 days ago

Package Health Score

82 / 100
Full package analysis

Popular edlib functions

Similar packages