How to use errant - 6 common examples

To help you get started, we’ve selected a few errant examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github chrisjbryant / errant / errant / alignment.py View on Github external
def get_all_merge_edits(self):
        edits = []
        for op, group in groupby(self.align_seq, 
                lambda x: True if x[0] == "M" else False):
            if not op:
                merged = self.merge_edits(list(group))
                edits.append(Edit(self.orig, self.cor, merged[0][1:]))
        return edits
github chrisjbryant / errant / errant / alignment.py View on Github external
def get_all_split_edits(self):
        edits = []
        for align in self.align_seq:
            if align[0] != "M": 
                edits.append(Edit(self.orig, self.cor, align[1:]))
        return edits
github chrisjbryant / errant / errant / en / merger.py View on Github external
# Split alignment into groups of M, T and rest. (T has a number after it)
    for op, group in groupby(alignment.align_seq, 
            lambda x: x[0][0] if x[0][0] in {"M", "T"} else False):
        group = list(group)
        # Ignore M
        if op == "M": continue
        # T is always split
        elif op == "T":
            for seq in group:
                edits.append(Edit(alignment.orig, alignment.cor, seq[1:]))
        # Process D, I and S subsequence
        else:
            processed = process_seq(group, alignment)
            # Turn the processed sequence into edits
            for seq in processed: 
                edits.append(Edit(alignment.orig, alignment.cor, seq[1:]))
    return edits
github chrisjbryant / errant / errant / alignment.py View on Github external
def get_all_equal_edits(self):
        edits = []
        for op, group in groupby(self.align_seq, lambda x: x[0]):
            if op != "M":
                merged = self.merge_edits(list(group))
                edits.append(Edit(self.orig, self.cor, merged[0][1:]))
        return edits
github chrisjbryant / errant / errant / commands / parallel_to_m2.py View on Github external
def main():
#    pr = cProfile.Profile()
#    pr.enable()

    # Parse command line args
    args = parse_args()
    print("Loading resources...")
    # Load Errant
    annotator = errant.load("en")
    # Open output m2 file
    out_m2 = open(args.out, "w")

    print("Processing parallel files...")
    # Process an arbitrary number of files line by line simultaneously. Python 3.3+
    # See https://tinyurl.com/y4cj4gth
    with ExitStack() as stack:
        in_files = [stack.enter_context(open(i)) for i in [args.orig]+args.cor]
        # Process each line of all input files
        for line in zip(*in_files):
            # Get the original and all the corrected texts
            orig = line[0].strip()
            cors = line[1:]
            # Skip the line if orig is empty
            if not orig: continue
            # Parse orig with spacy
github chrisjbryant / errant / errant / __init__.py View on Github external
if lang not in supported:
        raise Exception("%s is an unsupported or unknown language" % lang)

    # Load spacy
    nlp = nlp or spacy.load(lang, disable=["ner"])

    # Load language edit merger
    merger = import_module("errant.%s.merger" % lang)

    # Load language edit classifier
    classifier = import_module("errant.%s.classifier" % lang)
    # The English classifier needs spacy
    if lang == "en": classifier.nlp = nlp

    # Return a configured ERRANT annotator
    return Annotator(lang, nlp, merger, classifier)

errant

The ERRor ANnotation Toolkit (ERRANT). Automatically extract and classify edits in parallel sentences.

MIT
Latest version published 11 months ago

Package Health Score

59 / 100
Full package analysis