How to use the conllu.compat.text function in conllu

To help you get started, we’ve selected a few conllu examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github EmilStenstrom / conllu / tests / test_init.py View on Github external
def test_multiple_sentences(self):
        data = dedent("""\
            1   The     the    DET    DT   Definite=Def|PronType=Art   4   det     _   _
            2   dog     dog    NOUN   NN   Number=Sing                 5   nmod    _   SpaceAfter=No
            3  .       .      PUNCT  .    _                           5   punct   _   _

            1   The     the    DET    DT   Definite=Def|PronType=Art   4   det     _   _
            2   dog     dog    NOUN   NN   Number=Sing                 5   nmod    _   SpaceAfter=No
            3  .       .      PUNCT  .    _                           5   punct   _   _

        """)
        self.assertEqual(
            text(parse(data)),
            "[TokenList, TokenList]"
        )
github EmilStenstrom / conllu / tests / test_integration.py View on Github external
('feats', Token([
                    ("Mood", "Ind"),
                    ("Number", "Sing"),
                    ("Person", "3"),
                    ("Tense", "Pres"),
                    ("VerbForm", "Fin"),
                ])),
                ('head', 0),
                ('deprel', 'root'),
                ('deps', None),
                ('misc', None)
            ])
        )

        self.assertEqual(
            [text(child) for child in root.children],
            [
                "TokenTree",
                "TokenTree",
                "TokenTree",
            ]
        )

        self.assertEqual(
            root.metadata["text"],
            "The quick brown fox jumps over the lazy dog."
        )

        self.assertEqual(root.serialize(), data)

        self.assertEqual(
            capture_print(root.print_tree),
github EmilStenstrom / conllu / conllu / parser.py View on Github external
for i, field in enumerate(fields):
        # Allow parsing CoNNL-U files with fewer columns
        if i >= len(line):
            break

        if field in field_parsers:
            try:
                value = field_parsers[field](line, i)
            except ParseException as e:
                raise ParseException("Failed parsing field '{}': ".format(field) + str(e))

        else:
            value = line[i]

        data[text(field)] = value

    return data
github EmilStenstrom / conllu / conllu / parser.py View on Github external
new_metadata_parsers = DEFAULT_METADATA_PARSERS.copy()
        new_metadata_parsers.update(metadata_parsers)
        metadata_parsers = new_metadata_parsers

    custom_result = None
    if key in metadata_parsers:
        custom_result = metadata_parsers[key](key, value)
    elif "__fallback__" in metadata_parsers:
        custom_result = metadata_parsers["__fallback__"](key, value)

    # Allow returning pair instead of list of pairs from metadata parsers
    if custom_result:
        if isinstance(custom_result, tuple):
            key, value = custom_result
            return [(text(key), value)]
        return [(text(key), value) for key, value in custom_result]

    if not key or not value:
        # Lines without value are invalid by default
        return []

    return [(text(key), value)]
github EmilStenstrom / conllu / conllu / parser.py View on Github external
custom_result = metadata_parsers[key](key, value)
    elif "__fallback__" in metadata_parsers:
        custom_result = metadata_parsers["__fallback__"](key, value)

    # Allow returning pair instead of list of pairs from metadata parsers
    if custom_result:
        if isinstance(custom_result, tuple):
            key, value = custom_result
            return [(text(key), value)]
        return [(text(key), value) for key, value in custom_result]

    if not key or not value:
        # Lines without value are invalid by default
        return []

    return [(text(key), value)]