Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
test = OnlineCorpusReader(input_files, lambda i: i in testindexes)
print "Training Naive Bayes..."
classifier = NaiveBayes(train, problem)
print "Testing..."
p = precision(classifier, test)
print "Precision Naive Bayes = {}".format(p)
print "Training Decision Tree (large data)..."
classifier = DecisionTreeLearner_LargeData(train, problem, minsample=500)
print "Final tree:"
print tree_to_str(classifier.root)
print "Testing..."
p = precision(classifier, test)
print "Precision Decision Tree = {}".format(p)
N += 1
print "Corpus has {} examples".format(N)
# Choose test set, either 10% or 10000 examples, whatever is less
M = min(N / 10, 1000)
testindexes = set(random.sample(xrange(N), M))
corpus = ProConsCorpus(input_files, lambda i: i not in testindexes)
test = ProConsCorpus(input_files, lambda i: i in testindexes)
print "Corpuses created"
problem = OpinionProblem(corpus)
classifier = NaiveBayes(corpus, problem)
print "Classifier created"
p = precision(classifier, test)
print "Precision = {}".format(p)
print "Corpus has {} examples".format(N)
# Choose test set, either 10% or 10000 examples, whatever is less
M = min(N / 10, 10000)
testindexes = set(random.sample(xrange(N), M))
print "Keeping {} examples for testing".format(M)
problem = LanguageClassificationProblem()
train = OnlineCorpusReader(input_files, lambda i: i not in testindexes)
test = OnlineCorpusReader(input_files, lambda i: i in testindexes)
print "Training Naive Bayes..."
classifier = NaiveBayes(train, problem)
print "Testing..."
p = precision(classifier, test)
print "Precision Naive Bayes = {}".format(p)
print "Training Decision Tree (large data)..."
classifier = DecisionTreeLearner_LargeData(train, problem, minsample=500)
print "Final tree:"
print tree_to_str(classifier.root)
print "Testing..."
p = precision(classifier, test)
print "Precision Decision Tree = {}".format(p)
dataset = IrisDataset(IRIS_PATH, lambda i: i not in testindexes)
testset = IrisDataset(IRIS_PATH, lambda i: i in testindexes)
problem = VectorDataClassificationProblem(dataset, dataset.target_index)
# Distance without target
problem.distance = lambda x, y: euclidean_vector_distance(x[:-1], y[:-1])
classifiers = {
"K-Nearest Neighbours": KNearestNeighbors,
"Naive Bayes": NaiveBayes,
"Decision Tree": DecisionTreeLearner_Queued,
}
print "Precision:\n"
for name, method in classifiers.iteritems():
classifier = method(dataset, problem)
p = precision(classifier, testset)
print "{:>20} = {:.2}".format(name, p)