How to use the conllu.models.TokenList function in conllu

To help you get started, we’ve selected a few conllu examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github EmilStenstrom / conllu / tests / test_models.py View on Github external
tokenlist4 = TokenList([{"id": 1}, {"id": 2}, {"id": 3}], {"meta1": "data1"})
        tokenlist5 = TokenList([{"id": 4}, {"id": 5}, {"id": 6}], {"meta2": "data2"})
        tokenlist4.extend(tokenlist5)
        tokenlist6 = TokenList([{"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}, {"id": 6}],
                               {"meta1": "data1", "meta2": "data2"})
        self.assertEqual(tokenlist4, tokenlist6)

        tokenlist7 = TokenList([{"id": 1}, {"id": 2}, {"id": 3}], "abc")
        tokenlist8 = TokenList([{"id": 4}, {"id": 5}, {"id": 6}], "de")
        tokenlist7.extend(tokenlist8)
        tokenlist9 = TokenList([{"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}, {"id": 6}], "abcde")
        self.assertEqual(tokenlist7, tokenlist9)

        tokenlist7 = TokenList([{"id": 1}, {"id": 2}, {"id": 3}], "abc")
        tokenlist8 = TokenList([{"id": 4}, {"id": 5}, {"id": 6}], {"meta2": "data2"})
        tokenlist7.extend(tokenlist8)
        tokenlist9 = TokenList([{"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}, {"id": 6}],
                               ["abc", {"meta2": "data2"}])
        self.assertEqual(tokenlist7, tokenlist9)
github EmilStenstrom / conllu / tests / test_models.py View on Github external
def test_clear(self):
        tokenlist = TokenList([{"id": 1}, {"id": 2}, {"id": 3}], {"meta": "data"})
        tokenlist.clear()
        self.assertEqual(len(tokenlist.tokens), 0)
        self.assertEqual(tokenlist.metadata, None)
github EmilStenstrom / conllu / tests / test_models.py View on Github external
def test_copy(self):
        tokenlist1 = TokenList([{"id": 1}, {"id": 2}, {"id": 3}], {"meta": "data"})
        tokenlist2 = tokenlist1.copy()
        self.assertIsNot(tokenlist1, tokenlist2)
        self.assertEqual(tokenlist1, tokenlist2)
github EmilStenstrom / conllu / tests / test_models.py View on Github external
def test_and_filtering(self):
        tokenlist = TokenList([
            {"id": 1, "form": "a", "field": "x"},
            {"id": 2, "form": "dog", "field": "x"},
            {"id": 3, "form": "dog", "field": "y"},
        ])
        self.assertEqual(
            tokenlist.filter(field="x", id=2),
            TokenList([
                {"id": 2, "form": "dog", "field": "x"},
            ])
        )
        self.assertEqual(
            tokenlist.filter(field="x", id=3),
            TokenList([])
        )
github EmilStenstrom / conllu / tests / test_models.py View on Github external
def test_nested_filtering(self):
        tokenlist = TokenList([
            {"form": "The", "feats": Token([('Definite', 'Def'), ('PronType', 'Art')])},
            {"form": "quick", "feats": Token([('Degree', 'Pos')])},
            {"form": "brown", "feats": Token([('Degree', 'Pos')])},
            {"form": "fox", "feats": Token([('Number', 'Sing')])},
        ])
        self.assertEqual(
            tokenlist.filter(feats__Degree="Pos").filter(form="brown"),
            TokenList([
                {"form": "brown", "feats": Token([('Degree', 'Pos')])},
            ])
        )
        self.assertEqual(
            tokenlist.filter(form="brown").filter(feats__Degree="Pos"),
            TokenList([
                {"form": "brown", "feats": Token([('Degree', 'Pos')])},
            ])
github EmilStenstrom / conllu / tests / test_models.py View on Github external
def test_extend(self):
        tokenlist1 = TokenList([{"id": 1}, {"id": 2}, {"id": 3}])
        tokenlist2 = [{"id": 4}, {"id": 5}, {"id": 6}]
        tokenlist1.extend(tokenlist2)
        tokenlist3 = TokenList([{"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}, {"id": 6}])
        self.assertEqual(tokenlist1, tokenlist3)

        tokenlist4 = TokenList([{"id": 1}, {"id": 2}, {"id": 3}], {"meta1": "data1"})
        tokenlist5 = TokenList([{"id": 4}, {"id": 5}, {"id": 6}], {"meta2": "data2"})
        tokenlist4.extend(tokenlist5)
        tokenlist6 = TokenList([{"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}, {"id": 6}],
                               {"meta1": "data1", "meta2": "data2"})
        self.assertEqual(tokenlist4, tokenlist6)

        tokenlist7 = TokenList([{"id": 1}, {"id": 2}, {"id": 3}], "abc")
        tokenlist8 = TokenList([{"id": 4}, {"id": 5}, {"id": 6}], "de")
        tokenlist7.extend(tokenlist8)
        tokenlist9 = TokenList([{"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}, {"id": 6}], "abcde")
        self.assertEqual(tokenlist7, tokenlist9)

        tokenlist7 = TokenList([{"id": 1}, {"id": 2}, {"id": 3}], "abc")
        tokenlist8 = TokenList([{"id": 4}, {"id": 5}, {"id": 6}], {"meta2": "data2"})
        tokenlist7.extend(tokenlist8)
        tokenlist9 = TokenList([{"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}, {"id": 6}],
                               ["abc", {"meta2": "data2"}])
        self.assertEqual(tokenlist7, tokenlist9)
github EmilStenstrom / conllu / conllu / models.py View on Github external
def __init__(self, tokens, metadata=None):
        super(TokenList, self).__init__(tokens)
        if not isinstance(tokens, list):
            raise ParseException("Can't create TokenList, tokens is not a list.")

        self.metadata = metadata
github EmilStenstrom / conllu / conllu / models.py View on Github external
def serialize(self):
        if not self.token or "id" not in self.token:
            raise ParseException("Could not serialize tree, missing 'id' field.")

        def flatten_tree(root_token, token_list=[]):
            token_list.append(root_token.token)

            for child_token in root_token.children:
                flatten_tree(child_token, token_list)

            return token_list

        tokens = flatten_tree(self)
        tokens = sorted(tokens, key=lambda t: t['id'])
        tokenlist = TokenList(tokens, self.metadata)

        return serialize(tokenlist)
github EmilStenstrom / conllu / conllu / models.py View on Github external
def filter(self, **kwargs):
        tokens = self.tokens.copy()

        for query, value in kwargs.items():
            filtered_tokens = []
            for token in tokens:
                if traverse_dict(token, query) == value:
                    filtered_tokens.append(token)

            tokens = filtered_tokens

        return TokenList(tokens)