Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
ps.append(self.C[class_])
hypotheses.append((sum(ps), class_))
if hypotheses:
logprob, best = max(hypotheses)
Z = numpy.logaddexp.reduce([p for p, class_ in hypotheses])
logprob = logprob - Z
else: # Something not at all seen in training, return best a priori
logprob, best = max((p, class_) for class_, p
in self.C.iteritems())
p = numpy.exp(logprob)
assert 0.0 <= p and p <= 1.0
return best, p
class KNearestNeighbors(Classifier):
"""
Classifies objects based on closest training example.
Uses the k-nearest examples from the training and
gets the most common classification among these.
To use this classifier the problem must define a `distance`
method to messure the distance between two examples.
"""
def __init__(self, dataset, problem, k=1):
self.k = k
super(KNearestNeighbors, self).__init__(dataset, problem)
def learn(self):
try:
next(iter(self.dataset))
def load(cls, filepath):
"""
Loads a pickled version of the classifier saved in `filepath`
"""
with open(filepath) as filehandler:
classifier = pickle.load(filehandler)
if not isinstance(classifier, Classifier):
raise ValueError("Pickled object is not a Classifier")
return classifier
* K-Nearest Neighbor: See http://en.wikipedia.org/wiki/K-nearest_neighbor
"""
import numpy
from collections import defaultdict
from simpleai.machine_learning.models import Classifier
from simpleai.machine_learning.metrics import Counter, OnlineInformationGain, \
OnlineLogProbability
try:
import cPickle as pickle
except ImportError:
import pickle
class DecisionTreeLearner(Classifier):
"""
This implementation features an algorithm that *strictly* follows the
pseudocode given in AIMA.
It's obviously ineficient in too many ways (perhaps incomplete too), but
it's intended to be used pedagogically.
See the other implementations in this same file for some discusión and
issues solved.
This algorithm is equivalent to ID3.
"""
def __init__(self, dataset, problem):
self.dataset = dataset
self.problem = problem
def set_results_from_counts(self, counts):
self.counts = counts
total = sum(counts.itervalues())
majority = max(counts, key=counts.get) # Max frequency
self.result = (majority, counts[majority] / float(total))
def add_branch(self, value, branch=None):
assert not value in self.branches
if branch is None:
branch = self.__class__()
self.branches[value] = branch
branch.parent = self
return branch
class DecisionTreeLearner_Queued(Classifier):
"""
This implementations has a few improvements over the one based on the book:
-It uses a queue instead of recursion, so the python stack limit is
never reached.
-In case an attribute has a value not seen in training the intermediate
nodes can give a "best so far" classification.
-Abusive re-iteration of the train examples is avoided by calculating
at the same time all information gains of a single node split.
This algorithm is equivalent to ID3.
"""
def learn(self):
if not self.attributes:
self.root = self._single_node_tree()
return
AIMA implies that importance should be information gain.
Since AIMA only defines it for binary features this implementation
was based on the wikipedia article:
http://en.wikipedia.org/wiki/Information_gain_in_decision_trees
"""
gain_counter = OnlineInformationGain(attribute, self.target)
for example in examples:
gain_counter.add(example)
return gain_counter.get_gain()
def classify(self, example):
node = walk_to_leaf(self.root, example)
return node.result
class NaiveBayes(Classifier):
"""
Implements a classifier that uses the Bayes' theorem.
"""
def learn(self):
# Frequency count of target classes
self.C = OnlineLogProbability()
# Frequency count of P(Fi|C):
self.Fi = defaultdict(lambda: # For each class,
defaultdict(lambda: # For each attribute,
OnlineLogProbability())) # For each value, count it
for example in self.dataset:
class_ = self.target(example)
self.C.add(class_)
for attribute in self.attributes: