Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def learn(self):
if not self.attributes:
self.root = self._single_node_tree()
return
self.root = DecisionTreeNode()
q = [(self.root, self.dataset)]
while q:
node, examples = q.pop()
A = self._max_gain_split(examples)
counts = A.get_target_class_counts()
branches = A.get_branches()
# Base case exception
if node is self.root:
node.set_results_from_counts(counts)
if len(counts) == 1:
continue # Avoid splitting when there's a single target class
if len(branches) == 1:
continue # Avoid splitting when there's a single child branch
def _single_node_tree(self):
c = Counter(self.target)
for example in self.dataset:
c.add(example)
node = DecisionTreeNode()
node.set_results_from_counts(c)
return node
def learn(self, examples, attributes, parent_examples):
"""
A decision tree learner that *strictly* follows the pseudocode given in
AIMA. In 3rd edition, see Figure 18.5, page 702.
"""
if not examples:
return self.plurality_value(parent_examples)
elif len(set(map(self.target, examples))) == 1:
return self.plurality_value(examples)
elif not attributes:
return self.plurality_value(examples)
A = max(attributes, key=lambda a: self.importance(a, examples))
tree = DecisionTreeNode(attribute=A)
for value in set(map(A, examples)):
exs = [e for e in examples if A(e) == value]
subtree = self.learn(exs, attributes - set([A]), examples)
tree.add_branch(value, subtree)
return tree
def plurality_value(self, examples):
if not examples:
raise ValueError("Dataset is empty")
counter = Counter(self.target)
for example in examples:
counter.add(example)
tree = DecisionTreeNode()
# Note that tie is *not* solved randomly here
tree.set_results_from_counts(counter)
return tree
def learn(self):
if not self.attributes:
self.root = self._single_node_tree()
return
self.root = DecisionTreeNode()
leaves = {self.root: self._new_set_of_gain_counters()}
while leaves:
leaf = None
for example in self.dataset:
leaf = walk_to_leaf(self.root, example)
if leaf not in leaves:
continue # Don't split leaves that where ignored
for gain_counter in leaves[leaf]:
gain_counter.add(example)
if leaf is None:
raise ValueError("Dataset is empty")
old_leaves = leaves
leaves = {}
for leaf, gains in old_leaves.iteritems():
winner = max(gains, key=lambda gain: gain.get_gain())