Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _get_chunked_words(self, grid, chunk_types, tagset=None):
# n.b.: this method is very similar to conllstr2tree.
words = self._get_column(grid, self._colmap['words'])
pos_tags = self._get_column(grid, self._colmap['pos'])
if tagset and tagset != self._tagset:
pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
chunk_tags = self._get_column(grid, self._colmap['chunk'])
stack = [Tree(self._root_label, [])]
for (word, pos_tag, chunk_tag) in zip(words, pos_tags, chunk_tags):
if chunk_tag == 'O':
state, chunk_type = 'O', ''
else:
(state, chunk_type) = chunk_tag.split('-')
# If it's a chunk we don't care about, treat it as O.
if chunk_types is not None and chunk_type not in chunk_types:
state = 'O'
# Treat a mismatching I like a B.
if state == 'I' and chunk_type != stack[-1].label():
state = 'B'
# For B or I: close any open chunks
if state in 'BO' and len(stack) == 2:
stack.pop()
# For B: start a new chunk.
def trees(self, edge, tree_class=Tree, complete=False):
"""
Return an iterator of the tree structures that are associated
with ``edge``.
If ``edge`` is incomplete, then the unexpanded children will be
encoded as childless subtrees, whose node value is the
corresponding terminal or nonterminal.
:rtype: list(Tree)
:note: If two trees share a common subtree, then the same
Tree may be used to encode that subtree in
both trees. If you need to eliminate this subtree
sharing, then create a deep copy of each tree.
"""
return iter(self._trees(edge, complete, memo={}, tree_class=tree_class))
def _selectprod_cb(self, production):
canvas = self._treelet_canvas
self._prodlist.highlight(production)
if self._treelet is not None: self._treelet.destroy()
# Convert the production to a tree.
rhs = production.rhs()
for (i, elt) in enumerate(rhs):
if isinstance(elt, Nonterminal): elt = Tree(elt)
tree = Tree(production.lhs().symbol(), *rhs)
# Draw the tree in the treelet area.
fontsize = int(self._size.get())
node_font = ('helvetica', -(fontsize+4), 'bold')
leaf_font = ('helvetica', -(fontsize+2))
self._treelet = tree_to_treesegment(canvas, tree,
node_font=node_font,
leaf_font=leaf_font)
self._treelet['draggable'] = 1
# Center the treelet.
(x1, y1, x2, y2) = self._treelet.bbox()
w, h = int(canvas['width']), int(canvas['height'])
self._treelet.move((w-x1-x2)/2, (h-y1-y2)/2)
delimited by whitespace, and each word should have the form
``text/tag``. Words that do not contain a slash are
assigned a ``tag`` of None.
:param s: The string to be converted
:type s: str
:param chunk_node: The label to use for chunk nodes
:type chunk_node: str
:param top_node: The label to use for the root of the tree
:type top_node: str
:rtype: Tree
"""
WORD_OR_BRACKET = re.compile(r'\[|\]|[^\[\]\s]+')
stack = [Tree(top_node, [])]
for match in WORD_OR_BRACKET.finditer(s):
text = match.group()
if text[0] == '[':
if len(stack) != 1:
raise ValueError('Unexpected [ at char %d' % match.start())
chunk = Tree(chunk_node, [])
stack[-1].append(chunk)
stack.append(chunk)
elif text[0] == ']':
if len(stack) != 2:
raise ValueError('Unexpected ] at char %d' % match.start())
stack.pop()
else:
if sep is None:
stack[-1].append(text)
else:
chunks.append(Tree(label, np_nodes))
appended = True
elif node['ctag'] == 'ADJ' and node['rel'] == 'POSDEP' and tree.nodes[node['head']]['ctag'] != 'CONJ':
np_nodes = [item]
i = n - node['head']
while i > 0:
label = 'ADJP'
if type(chunks[-1]) == Tree:
label = chunks[-1].label()
leaves = chunks.pop().leaves()
i -= len(leaves)
np_nodes = leaves + np_nodes
else:
i -= 1
np_nodes.insert(0, chunks.pop())
chunks.append(Tree(label, np_nodes))
appended = True
for d in node_deps(node):
if d == n - 1 and type(chunks[-1]) == Tree and chunks[
-1].label() != 'PP' and appended is not True:
label = chunks[-1].label()
if node['rel'] == 'ADV':
label = 'ADVP'
elif label in {'ADJP', 'ADVP'}:
if node['ctag'] == 'N':
label = 'NP'
elif node['ctag'] == 'ADJ':
label = 'ADJP'
leaves = chunks.pop().leaves()
leaves.append(item)
chunks.append(Tree(label, leaves))
appended = True
@type remaining_text: C{list} of C{Token}
@param remaining_text: The portion of the text that is not yet
covered by C{stack}.
"""
if production is None: productions = self._grammar.productions()
else: productions = [production]
# Try each production, in order.
for production in productions:
rhslen = len(production.rhs())
# check if the RHS of a production matches the top of the stack
if self._match_rhs(production.rhs(), stack[-rhslen:]):
# combine the tree to reflect the reduction
tree = Tree(production.lhs().symbol(), stack[-rhslen:])
stack[-rhslen:] = [tree]
# We reduced something
if self._trace:
self._trace_reduce(stack, production, remaining_text)
return production
# We didn't reduce anything
return None
def get_frazier_score(treestrings):
""" Average all of the frazier scores for the given input_file. """
sentences, total_frazier_score, total_word_count = 0, 0, 0
for tree_line in treestrings:
if tree_line.strip() == "":
continue
tree = Tree.fromstring(tree_line)
sentences += 1
raw_frazier_score = calc_frazier_score(tree, 0, "")
try:
total_word_count += get_word_score(tree)
total_frazier_score += raw_frazier_score
except ZeroDivisionError:
print('WARNING: ZeroDisvisionError for the tree: ' + str(tree))
pass
score = float(total_frazier_score) / float(total_word_count)
return score
def parse(self, tokens):
# Inherit docs from ParserI
tokens = list(tokens)
self._grammar.check_coverage(tokens)
# Start a recursive descent parse, with an initial tree
# containing just the start symbol.
start = self._grammar.start().symbol()
initial_tree = Tree(start, [])
frontier = [()]
if self._trace:
self._trace_start(initial_tree, frontier, tokens)
parses = self._parse(tokens, initial_tree, frontier)
# Return the parses.
return parses
chunks.append(Tree(label, np_nodes))
appended = True
for d in node_deps(node):
if d == n - 1 and type(chunks[-1]) == Tree and chunks[
-1].label() != 'PP' and appended is not True:
label = chunks[-1].label()
if node['rel'] == 'ADV':
label = 'ADVP'
elif label in {'ADJP', 'ADVP'}:
if node['ctag'] == 'N':
label = 'NP'
elif node['ctag'] == 'ADJ':
label = 'ADJP'
leaves = chunks.pop().leaves()
leaves.append(item)
chunks.append(Tree(label, leaves))
appended = True
elif tree.nodes[d]['rel'] == 'NPREMOD' and appended is not True:
np_nodes = [item]
i = n - d
while i > 0:
if type(chunks[-1]) == Tree:
leaves = chunks.pop().leaves()
i -= len(leaves)
np_nodes = leaves + np_nodes
else:
i -= 1
np_nodes.insert(0, chunks.pop())
chunks.append(Tree('NP', np_nodes))
appended = True
if not appended:
label = 'NP'
Return a tree token that has a node C{M{lhs}.symbol}, and
C{M{n}} children. For each nonterminal element
C{M{elt[i]}} in the production, the tree token has a
childless subtree with node value C{M{elt[i]}.symbol}; and
for each terminal element C{M{elt[j]}}, the tree token has
a leaf token with type C{M{elt[j]}}.
@param production: The CFG production that licenses the tree
token that should be returned.
@type production: C{CFGProduction}
"""
LEAF = self.property('LEAF')
children = []
for elt in production.rhs():
if isinstance(elt, Nonterminal):
children.append(Tree(elt.symbol(), []))
else:
# This will be matched.
children.append(Token({LEAF: elt}))
return Tree(production.lhs().symbol(), children)