Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_simple(self):
data = dedent("""\
1\thej
2\tdå
3\thej
1\thej
2\tdå
3\thej
""")
sentences = list(parse_sentences(string_to_file(data)))
self.assertEqual(sentences, [
'1\thej\n2\tdå\n3\thej',
'1\thej\n2\tdå\n3\thej',
])
def test_ends_without_newline(self):
data = "1\thej\n2\tdå"
sentences = list(parse_sentences(string_to_file(data)))
self.assertEqual(sentences, [
'1\thej\n2\tdå',
])
def test_empty(self):
self.assertEqual(list(parse_sentences(string_to_file(""))), [])
self.assertEqual(list(parse_sentences(string_to_file(None))), [])
def test_multiple_newlines(self):
data = dedent("""\
1\thej
2\tdå
1\thej
2\tdå
1\thej
2\tdå
""")
sentences = list(parse_sentences(string_to_file(data)))
self.assertEqual(sentences, [
'1\thej\n2\tdå',
'1\thej\n2\tdå',
'1\thej\n2\tdå',
])
def parse_incr(in_file, fields=None, field_parsers=None, metadata_parsers=None):
if not hasattr(in_file, 'read'):
raise FileNotFoundError("Invalid file, 'parse_incr' needs an opened file as input")
if not fields:
fields = parse_conllu_plus_fields(in_file, metadata_parsers=metadata_parsers)
for sentence in parse_sentences(in_file):
yield TokenList(*parse_token_and_metadata(
sentence,
fields=fields,
field_parsers=field_parsers,
metadata_parsers=metadata_parsers
))