Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_simple(self):
data = dedent("""\
# global.columns = ID FORM UPOS HEAD DEPREL MISC PARSEME:MWE
1\tDer\tDET\t2\tdet\t_\t*
""")
self.assertEqual(
parse_conllu_plus_fields(string_to_file(data)),
["id", "form", "upos", "head", "deprel", "misc", "parseme:mwe"]
)
def test_empty(self):
self.assertEqual(parse_conllu_plus_fields(string_to_file("")), None)
self.assertEqual(parse_conllu_plus_fields(string_to_file(None)), None)
def test_empty_columns(self):
data = dedent("""\
# global.columns =
1\tDer\tDET\t2\tdet\t_\t*
""")
self.assertEqual(parse_conllu_plus_fields(string_to_file(data)), None)
def parse_incr(in_file, fields=None, field_parsers=None, metadata_parsers=None):
if not hasattr(in_file, 'read'):
raise FileNotFoundError("Invalid file, 'parse_incr' needs an opened file as input")
if not fields:
fields = parse_conllu_plus_fields(in_file, metadata_parsers=metadata_parsers)
for sentence in parse_sentences(in_file):
yield TokenList(*parse_token_and_metadata(
sentence,
fields=fields,
field_parsers=field_parsers,
metadata_parsers=metadata_parsers
))