Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _term(g):
"""Applies the TERM rule on 'g' (see top comment)."""
all_t = {x for rule in g.rules for x in rule.rhs if isinstance(x, T)}
t_rules = {t: Rule(NT('__T_%s' % str(t)), [t], weight=0, alias='Term') for t in all_t}
new_rules = []
for rule in g.rules:
if len(rule.rhs) > 1 and any(isinstance(x, T) for x in rule.rhs):
new_rhs = [t_rules[x].lhs if isinstance(x, T) else x for x in rule.rhs]
new_rules.append(Rule(rule.lhs, new_rhs, weight=rule.weight, alias=rule.alias))
new_rules.extend(v for k, v in t_rules.items() if k in rule.rhs)
else:
new_rules.append(rule)
return Grammar(new_rules)
if len(rules) != len(set(rules)):
duplicates = [item for item, count in Counter(rules).items() if count > 1]
raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates))
for r in rules:
for sym in r.expansion:
if not (sym.is_term or sym in self.rules_by_origin):
raise GrammarError("Using an undefined rule: %s" % sym) # TODO test validation
self.start_states = {start: self.expand_rule(root_rule.origin)
for start, root_rule in root_rules.items()}
self.end_states = {start: fzset({RulePtr(root_rule, len(root_rule.expansion))})
for start, root_rule in root_rules.items()}
lr0_root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start)])
for start in parser_conf.start}
lr0_rules = parser_conf.rules + list(lr0_root_rules.values())
assert(len(lr0_rules) == len(set(lr0_rules)))
self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)
# cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
for start, root_rule in lr0_root_rules.items()}
self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
def parse(self, stream, start):
assert start, start
start_symbol = NonTerminal(start)
columns = [set()]
to_scan = set() # The scan buffer. 'Q' in E.Scott's paper.
## Predict for the start_symbol.
# Add predicted items to the first Earley set (for the predictor) if they
# result in a non-terminal, or the scanner if they result in a terminal.
for rule in self.predictions[start_symbol]:
item = Item(rule, 0, 0)
if item.expect in self.TERMINALS:
to_scan.add(item)
else:
columns[0].add(item)
to_scan = self._parse(stream, columns, to_scan, start_symbol)
# Empty rule; assert all other attributes are equal
assert len({(r.alias, r.order, r.options) for r in dups}) == len(dups)
# Remove duplicates
compiled_rules = list(set(compiled_rules))
# Filter out unused rules
while True:
c = len(compiled_rules)
used_rules = {s for r in compiled_rules
for s in r.expansion
if isinstance(s, NonTerminal)
and s != r.origin}
used_rules |= {NonTerminal(s) for s in start}
compiled_rules = [r for r in compiled_rules if r.origin in used_rules]
if len(compiled_rules) == c:
break
# Filter out unused terminals
used_terms = {t.name for r in compiled_rules
for t in r.expansion
if isinstance(t, Terminal)}
terminals = [t for t in terminals if t.name in used_terms or t.name in self.ignore]
return terminals, compiled_rules, self.ignore