How to use the nltk.Tree function in nltk

To help you get started, we’ve selected a few nltk examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github relwell / corenlp-xml-lib / test / test_document.py View on Github external
def test_subtrees_for_phrase(self):
        t = self._sentence.subtrees_for_phrase("NP")[0]
        self.assertIsInstance(t, Tree)
        self.assertEquals("property", t[-1].leaves()[0])
github baoy-nlp / FAParser / previous_repo / utils / distance_helper.py View on Github external
if node0.label() != '' and node1.label() != '':
            tr = [node0, node1]
        elif node0.label() == '' and node1.label() != '':
            tr = [c for c in node0] + [node1]
        elif node0.label() != '' and node1.label() == '':
            tr = [node0] + [c for c in node1]
        elif node0.label() == '' and node1.label() == '':
            tr = [c for c in node0] + [c for c in node1]

        arc_list = str(arcdict[arc[idx]]).split('+')
        arc_list.reverse()
        for a in arc_list:
            if isinstance(tr, nltk.Tree):
                tr = [tr]
            tr = nltk.Tree(a, tr)

        return tr
github WladimirSidorenko / DiscourseSenser / dsenser / wang / implicit.py View on Github external
set:
            set of syntactic productions

        """
        ret = set()
        # obtain token indices for each arg sentence
        snt_id = None
        snt2tok = self._get_snt2tok(a_rel[a_arg][TOK_LIST])
        # obtain set of leaves corresponding to that argument
        arg_leaves = set()
        subt_leaves = set()
        processed_leaves = set()
        itree = itree_str = inode_path = None
        for snt_id, toks in snt2tok.iteritems():
            itree_str = a_parses[a_doc_id][SENTENCES][snt_id][PARSE_TREE]
            itree = Tree.fromstring(itree_str)
            if not itree.leaves():
                print("Invalid parse tree for sentence {:d}".format(snt_id),
                      file=sys.stderr)
                continue
            # obtain all terminal syntactic nodes from the arg
            for itok in toks:
                inode_path = itree.leaf_treeposition(itok)
                arg_leaves.add(itree[inode_path])
            # check all subtrees (not efficient, but easy to implement)
            for s_t in itree.subtrees():
                subt_leaves.update(s_t.leaves())
                if subt_leaves.issubset(arg_leaves) and \
                   not subt_leaves.issubset(processed_leaves):
                    ret.update(str(p) for p in itree.productions()
                               if any(is_nonterminal(n)
                                      for n in p.rhs()))
github NidhiSalian / DeepLearning-Chatbot / dependency_tree.py View on Github external
def to_nltk_tree2(node):
    if node.n_lefts + node.n_rights > 0:
        return Tree(tok_format(node), [to_nltk_tree2(child) for child in node.children])
    else:
        return tok_format(node)
github dperezrada / keywords2vec / keywords_tokenizer.py View on Github external
def get_nodes_for_ntlk(parent, stopwords):
    keywords = []
    for node in parent:
        if type(node) is nltk.Tree:
            phrase = " ".join([key.lower() for key, value in node.leaves()])
            phrase = unidecode.unidecode(phrase)
            if phrase not in stopwords:
                pattern = re.compile('([^\s\w-]|_)+')
                phrase = pattern.sub('', phrase).strip()
                keywords.append(phrase)
    return keywords
github andreasvc / disco-dop / estimates.py View on Github external
def main():
	from treebank import NegraCorpusReader
	from grammar import induce_srcg
	from plcfrs import parse, pprint_chart
	from containers import Grammar
	from nltk import Tree
	corpus = NegraCorpusReader(".", "sample2.export", encoding="iso-8859-1")
	trees = list(corpus.parsed_sents())
	for a in trees: a.chomsky_normal_form(vertMarkov=1, horzMarkov=1)
	grammar = Grammar(induce_srcg(trees, corpus.sents()))
	trees = [Tree.parse("(ROOT (A (a 0) (b 1)))", parse_leaf=int),
			Tree.parse("(ROOT (a 0) (B (c 2) (b 1)))", parse_leaf=int),
			Tree.parse("(ROOT (a 0) (B (c 2) (b 1)))", parse_leaf=int),
			Tree.parse("(ROOT (C (b 0) (a 1)) (c 2))", parse_leaf=int),
			Tree.parse("(ROOT (C (b 0) (a 1)) (c 2))", parse_leaf=int),
			]
	sents =[["a","b"],
			["a","c","b"],
			["a","c","b"],
			["b","a","c"],
			["b","a","a"]]
	print "treebank:"
	for a in trees: print a
	print "\ngrammar:"
	grammar = induce_srcg(trees, sents)
	for (r,yf),w in sorted(grammar):
		print r[0], "-->", " ".join(r[1:]), yf, exp(w)
github plasticityai / magnitude / pymagnitude / third_party / allennlp / models / constituency_parser.py View on Github external
return [tree]

            argmax_split = start + 1
            # Find the next largest subspan such that
            # the left hand side is a constituent.
            for split in range(end - 1, start, -1):
                if (start, split) in spans_to_labels:
                    argmax_split = split
                    break

            left_trees = assemble_subtree(start, argmax_split)
            right_trees = assemble_subtree(argmax_split, end)
            children = left_trees + right_trees
            if labels is not None:
                while labels:
                    children = [Tree(labels.pop(), children)]
            return children
github uclnlp / inferbeddings / inferbeddings / nli / generate / operators.py View on Github external
def _remove_subtree_from_tree(tree, subtree_to_remove):
    for st in tree:
        if isinstance(st, nltk.Tree):
            if st == subtree_to_remove:
                tree.remove(st)
            _remove_subtree_from_tree(st, subtree_to_remove)
    return
github uclnlp / inferbeddings / inferbeddings / nli / util.py View on Github external
def to_instance(d, tokenize=None):
        sentence1 = d['sentence1']
        sentence1_parse = d['sentence1_parse']
        sentence1_tree = nltk.Tree.fromstring(sentence1_parse)
        sentence1_parse_tokens = sentence1_tree.leaves()
        sentence1_tokens = tokenize(sentence1) if tokenize else None

        sentence2 = d['sentence2']
        sentence2_parse = d['sentence2_parse']
        sentence2_tree = nltk.Tree.fromstring(sentence2_parse)
        sentence2_parse_tokens = sentence2_tree.leaves()
        sentence2_tokens = tokenize(sentence2) if tokenize else None

        gold_label = d['gold_label']

        instance = {
            'sentence1': sentence1,
            'sentence1_parse': sentence1_parse,
            'sentence1_parse_tokens': sentence1_parse_tokens,
            'sentence1_tokens': sentence1_tokens,
github nltk / nltk / nltk / tree.py View on Github external
def demo():
    """
    A demonstration showing how Trees and Trees can be
    used.  This demonstration creates a Tree, and loads a
    Tree from the Treebank corpus,
    and shows the results of calling several of their methods.
    """

    from nltk import Tree, ProbabilisticTree

    # Demonstrate tree parsing.
    s = '(S (NP (DT the) (NN cat)) (VP (VBD ate) (NP (DT a) (NN cookie))))'
    t = Tree.fromstring(s)
    print("Convert bracketed string into tree:")
    print(t)
    print(t.__repr__())

    print("Display tree properties:")
    print(t.label())  # tree's constituent type
    print(t[0])  # tree's first child
    print(t[1])  # tree's second child
    print(t.height())
    print(t.leaves())
    print(t[1])
    print(t[1, 1])
    print(t[1, 1, 0])

    # Demonstrate tree modification.
    the_cat = t[0]