Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_no_gain(self):
f = lambda x: None
gain = OnlineInformationGain(f, f)
for i in xrange(30):
gain.add(i)
self.assertEqual(gain.get_gain(), 0)
def test_full_gain(self):
target = lambda x: x % 7
gain = OnlineInformationGain(target, target)
entropy = OnlineEntropy(target)
for i in xrange(50):
gain.add(i)
entropy.add(i)
self.assertEqual(gain.get_gain(), entropy.get_entropy())
self.assertGreaterEqual(gain.get_gain(), 0)
def test_starts_in_zero(self):
gain = OnlineInformationGain(lambda x: None, lambda x: None)
self.assertEqual(gain.get_gain(), 0)
self.assertEqual(gain.get_target_class_counts().items(), [])
self.assertEqual(gain.get_branches(), [])
def _new_set_of_gain_counters(self):
"""
Creates a new set of OnlineInformationGain objects
for each attribute.
"""
return [OnlineInformationGain(attribute, self.target)
for attribute in self.attributes]
def importance(self, attribute, examples):
"""
AIMA implies that importance should be information gain.
Since AIMA only defines it for binary features this implementation
was based on the wikipedia article:
http://en.wikipedia.org/wiki/Information_gain_in_decision_trees
"""
gain_counter = OnlineInformationGain(attribute, self.target)
for example in examples:
gain_counter.add(example)
return gain_counter.get_gain()