Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
prediction[res.actualClass] += res.classes[0]==res.actualClass
countsByFold[res.iterationNumber] += 1
prediction = [val*100.0 for val in prediction]
elif self.qualityMeasure == AUC:
aucResult = orngStat.AUC(results)
if aucResult:
return aucResult[0], None
else:
return 0, None
# compute accuracy only for classes that are selected as interesting. other class values do not participate in projection evaluation
acc = sum(prediction) / float(max(1, len(results.results))) # accuracy over all class values
classes = self.selectedClasses or range(len(self.graph.data_domain.classVar.values))
val = sum([prediction[index] for index in classes]) # accuracy over all selected classes
currentClassDistribution = [int(v) for v in orange.Distribution(table.domain.classVar, table)]
s = sum([currentClassDistribution[index] for index in classes])
prediction = [prediction[i] / float(max(1, currentClassDistribution[i])) for i in range(len(prediction))] # turn to probabilities
return val/max(1, float(s)), (acc, prediction, list(currentClassDistribution))
def updateHistogramColors(self):
if self.parent().drawPies():
return
attr=self.somMap.examples.domain.variables[self.parent().attribute]
for n in self.canvasObj:
if n.hasNode:
if attr.varType==orange.VarTypes.Discrete:
if self.parent().inputSet:
dist=orange.Distribution(attr, n.node.mappedExamples)
else:
dist=orange.Distribution(attr, n.node.examples)
colors=OWColorPalette.ColorPaletteHSV(len(dist))
maxProb=max(dist)
majValInd=filter(lambda i:dist[i]==maxProb, range(len(dist)))[0]
if self.parent().discHistMode==1:
n.histObj[0].setBrush(QBrush(colors[majValInd]))
elif self.parent().discHistMode==2:
light=180-80*float(dist[majValInd])/max(sum(dist),1)
n.histObj[0].setBrush(QBrush(colors[majValInd].light(light)))
else:
if self.parent().inputSet:
dist=orange.Distribution(attr, n.node.mappedExamples)
fullDist=orange.Distribution(attr, self.parent().examples)
else:
dist=orange.Distribution(attr, n.node.examples)
fullDist=orange.Distribution(attr, self.somMap.examples)
if len(dist)==0:
d = self.data.select([newFeature, self.data.domain.classVar]) # create a dataset that has only this new feature and class info
if not self.cvIndices:
if self.testingMethod == PROPORTION_TEST:
pick = orange.MakeRandomIndices2(stratified = orange.MakeRandomIndices.StratifiedIfPossible, p0 = 0.7, randomGenerator = 0)
self.cvIndices = [pick(d) for i in range(10)]
elif self.testingMethod == CROSSVALIDATION:
ind = orange.MakeRandomIndicesCV(d, 10, randomGenerator = 0, stratified = orange.MakeRandomIndices.StratifiedIfPossible)
self.cvIndices = [[val == i for val in ind] for i in range(10)]
acc = 0.0; count = 0
for ind in self.cvIndices:
learnset = d.selectref(ind, 0)
testset = d.selectref(ind, 1)
learnDist = orange.Distribution(d.domain.classVar, learnset)
newFeatureDist = orange.Distribution(newFeature, testset)
learnConts = orange.ContingencyAttrClass(newFeature, learnset)
testConts = orange.ContingencyAttrClass(newFeature, testset)
for val in testConts.keys():
s = sum(learnConts[val])
if not s: continue
learnClassProb = [v/float(s) for v in learnConts[val]] # class distribution for each class value (on learning set)
testClassDist = [v for v in testConts[val]] # number of examples for each class value (on testing set)
for i in range(len(testClassDist)):
acc += learnClassProb[i] * testClassDist[i]
count += testClassDist[i]
retVal = 100*acc / max(1, float(count))
del newFeature, quality
return retVal
if self.parent().inputSet:
dist=orange.Distribution(attr, n.node.mappedExamples)
else:
dist=orange.Distribution(attr, n.node.examples)
colors=OWColorPalette.ColorPaletteHSV(len(dist))
maxProb=max(dist)
majValInd=filter(lambda i:dist[i]==maxProb, range(len(dist)))[0]
if self.parent().discHistMode==1:
n.histObj[0].setBrush(QBrush(colors[majValInd]))
elif self.parent().discHistMode==2:
light=180-80*float(dist[majValInd])/max(sum(dist),1)
n.histObj[0].setBrush(QBrush(colors[majValInd].light(light)))
else:
if self.parent().inputSet:
dist=orange.Distribution(attr, n.node.mappedExamples)
fullDist=orange.Distribution(attr, self.parent().examples)
else:
dist=orange.Distribution(attr, n.node.examples)
fullDist=orange.Distribution(attr, self.somMap.examples)
if len(dist)==0:
continue
if self.parent().contHistMode==0:
n.histObj[0].setBrush(QBrush(DefColor))
if self.parent().contHistMode==1:
std=(dist.average()-fullDist.average())/max(fullDist.dev(),1)
std=min(max(std,-1),1)
#print std
n.histObj[0].setBrush(QBrush(QColor(70*(std+1)+50, 70*(std+1)+50, 0)))
if self.parent().contHistMode==2:
light = 300-200*dist.var()/fullDist.var()
n.histObj[0].setBrush(QBrush(QColor(0,0,20).light(light)))
def entropy(x, data):
"""entropy of an attribute x from dataset data"""
if type(x)==orange.EnumVariable:
return _entropy(p2f(orange.Distribution(x, data)))
if type(x)==list:
if len(x)==2: # joint entropy of a pair of attributes
c = orange.ContingencyAttrAttr(x, y, data)
return _entropy(p2f(flatten(c)))
else: # joint entropy of for a set of attributes
pass
diffClass.append(d)
diffClass.sort()
dist = sum(diffClass[:5]) / float(len(diffClass[:5]))
"""
# one way of computing the value
area = sqrt(sqrt(areaDict[key]))
if area > 0: value = points * dist / area
else: value = 0
"""
# another way of computing value
#value = points * dist / aveDistDict[key]
if self.distributionScale:
d = orange.Distribution(graph.objects.domain.classVar, graph.objects)
v = d[graph.objects[polygonVerticesDict[key][0]].getclass()]
if v == 0: continue
points *= sum(d) / float(v) # turn the number of points into a percentage of all points that belong to this class value and then multiply by the number of all data points in the data set
# and another
#dist = sqrt(dist*1000.0)/sqrt(aveDistDict[key]*1000.0)
dist = sqrt(dist*1000.0)
value = points
if self.considerDistance: value *= dist
valueDict[key] = value
#enlargedClosureDict[key] = enlargeClosure(graph, closureDict[key], aveDistDict[key])
enlargedClosureDict[key] = []
#otherDict[key] = (graph.objects[polygonVerticesDict[key][0]].getclass(), value, points, dist, area)
#otherDict[key] = (graph.objects[polygonVerticesDict[key][0]].getclass().value, value, points, dist, aveDistDict[key])
if not results.results or not results.results[0].probabilities[0]: return 0, 0
for res in results.results: val += res.probabilities[0].density(res.actualClass)
if len(results.results) > 0: val/= float(len(results.results))
return 100.0*val, (100.0*val)
# ###############################
# do we want to use very fast heuristic
# ###############################
elif self.evaluationAlgorithm == ALGORITHM_HEURISTIC:
# if input attributes are continuous (may be discrete for evaluating scatterplots, where we dicretize the whole domain...)
if testTable.domain[0].varType == orange.VarTypes.Continuous and testTable.domain[1].varType == orange.VarTypes.Continuous:
discX = orange.EquiDistDiscretization(testTable.domain[0], testTable, numberOfIntervals = NUMBER_OF_INTERVALS)
discY = orange.EquiDistDiscretization(testTable.domain[0], testTable, numberOfIntervals = NUMBER_OF_INTERVALS)
testTable = testTable.select([discX, discY, testTable.domain.classVar])
currentClassDistribution = [int(v) for v in orange.Distribution(testTable.domain.classVar, testTable)]
prediction = [0.0 for i in range(len(testTable.domain.classVar.values))]
# create a new attribute that is a cartesian product of the two visualized attributes
nattr = orange.EnumVariable(values=[str(i) for i in range(NUMBER_OF_INTERVALS*NUMBER_OF_INTERVALS)])
nattr.getValueFrom = orange.ClassifierByLookupTable2(nattr, testTable.domain[0], testTable.domain[1])
for i in range(len(nattr.getValueFrom.lookupTable)): nattr.getValueFrom.lookupTable[i] = i
for dist in orange.ContingencyAttrClass(nattr, testTable):
dist = list(dist)
if sum(dist) == 0: continue
m = max(dist)
prediction[dist.index(m)] += m * m / float(sum(dist))
prediction = [val*100.0 for val in prediction] # turn prediction array into percents
acc = sum(prediction) / float(max(1, len(testTable))) # compute accuracy for all classes
val = 0.0; s = 0.0
if not results.results or not results.results[0].probabilities[0]: return 0, 0
for res in results.results: val += res.probabilities[0].density(res.actualClass)
if len(results.results) > 0: val/= float(len(results.results))
return 100.0*val, (100.0*val)
# ###############################
# do we want to use very fast heuristic
# ###############################
elif self.evaluationAlgorithm == ALGORITHM_HEURISTIC:
# if input attributes are continuous (may be discrete for evaluating scatterplots, where we dicretize the whole domain...)
if testTable.domain[0].varType == orange.VarTypes.Continuous and testTable.domain[1].varType == orange.VarTypes.Continuous:
discX = orange.EquiDistDiscretization(testTable.domain[0], testTable, numberOfIntervals = NUMBER_OF_INTERVALS)
discY = orange.EquiDistDiscretization(testTable.domain[0], testTable, numberOfIntervals = NUMBER_OF_INTERVALS)
testTable = testTable.select([discX, discY, testTable.domain.classVar])
currentClassDistribution = [int(v) for v in orange.Distribution(testTable.domain.classVar, testTable)]
prediction = [0.0 for i in range(len(testTable.domain.classVar.values))]
# create a new attribute that is a cartesian product of the two visualized attributes
nattr = orange.EnumVariable(values=[str(i) for i in range(NUMBER_OF_INTERVALS*NUMBER_OF_INTERVALS)])
nattr.getValueFrom = orange.ClassifierByLookupTable2(nattr, testTable.domain[0], testTable.domain[1])
for i in range(len(nattr.getValueFrom.lookupTable)): nattr.getValueFrom.lookupTable[i] = i
for dist in orange.ContingencyAttrClass(nattr, testTable):
dist = list(dist)
if sum(dist) == 0: continue
m = max(dist)
prediction[dist.index(m)] += m * m / float(sum(dist))
prediction = [val*100.0 for val in prediction] # turn prediction array into percents
acc = sum(prediction) / float(max(1, len(testTable))) # compute accuracy for all classes
val = 0.0; s = 0.0