pyvision.analysis.classifier.ConfusionMatrix

1 # PyVision License 2 # 3 # Copyright (c) 2006-2008 David S. Bolme 4 # All rights reserved. 5 # 6 # Redistribution and use in source and binary forms, with or without 7 # modification, are permitted provided that the following conditions 8 # are met: 9 # 10 # 1. Redistributions of source code must retain the above copyright 11 # notice, this list of conditions and the following disclaimer. 12 # 13 # 2. Redistributions in binary form must reproduce the above copyright 14 # notice, this list of conditions and the following disclaimer in the 15 # documentation and/or other materials provided with the distribution. 16 # 17 # 3. Neither name of copyright holders nor the names of its contributors 18 # may be used to endorse or promote products derived from this software 19 # without specific prior written permission. 20 # 21 # 22 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 26 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 27 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 28 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 29 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 30 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 31 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 32 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 34 import unittest 35 36 #from pyvision.analysis.stats import cibinom 37 #from pyvision.analysis.Table import Table 38 39 import pyvision as pv 40 41 42

43 -class ConfusionMatrix(pv.Table):

44 - def __init__(self, algorithm_name=None, test_name=None):

45 pv.Table.__init__(self,default_value=0) 46 47 self.algorithm_name = algorithm_name 48 self.test_name = test_name 49 50 self.classes = set() 51 self.successes = 0 52 self.failures = 0 53 self.total = 0 54 55 self.setColumnFormat('Rate',"%0.4f") 56 self.setColumnFormat('Lower',"%0.4f") 57 self.setColumnFormat('Upper',"%0.4f") 58 self.setColumnFormat('Bar',"%-10s")

59 60

61 - def addData(self, truth, prediction, weight=1):

62 """ 63 Add data to a confusion matrix. "truth" is the true/correct and 64 "prediction" is the output of the classifier. Typically you would 65 build this matrix one "test" at a time. To add multiple test at a 66 time you can use the weight to populate the data more quickly. 67 """ 68 self.classes.add(truth) 69 self.classes.add(prediction) 70 71 self.accumulateData(truth,prediction,weight) 72 if truth == prediction: 73 self.successes += weight 74 else: 75 self.failures += weight 76 self.total += weight

77 78

79 - def update_rate(self):

80 '''Returns a point estimate of the probability of success''' 81 return float(self.successes)/float(self.total)

82 83

84 - def confidenceInterval(self,alpha=0.05):

85 ''' 86 Returns the estimated a confidence interval for the success update_rate by 87 modeling the success update_rate as a binomial distribution. 88 ''' 89 return pv.cibinom(self.total,self.successes,alpha=alpha)

90 91

92 - def computeRates(self,alpha=0.05):

93 ''' 94 Populates the distance matrix with more information such as 95 recognition rates for each row. Call this only after all of the 96 data has been added. 97 ''' 98 self.row_headers.sort() 99 self.col_headers.sort() 100 101 for row in self.classes: 102 successes = 0 103 total = 0 104 for col in self.classes: 105 total += self.element(row,col) 106 if row == col: 107 successes += self.element(row,col) 108 rate = float(successes)/total 109 self.setData(row,'Rate',rate) 110 self.setData(row,'Bar',"#"*int(10*rate+0.5)) 111 self.setData(row,'Lower',pv.cibinom(total,successes,alpha)[0]) 112 self.setData(row,'Upper',pv.cibinom(total,successes,alpha)[1]) 113 114 for col in self.classes: 115 successes = 0 116 total = 0 117 for row in self.classes: 118 total += self.element(row,col) 119 if row == col: 120 successes += self.element(row,col) 121 rate = float(successes)/total 122 self.setData('Total',col,"%0.4f"%rate) 123 124 self.setData('Total','Rate',self.update_rate()) 125 self.setData('Total','Bar',"#"*int(10*self.update_rate()+0.5)) 126 self.setData('Total','Lower',self.confidenceInterval(alpha)[0]) 127 self.setData('Total','Upper',self.confidenceInterval(alpha)[1])

128 129 130 131 132 133 134 135

136 -class _TestConfusionMatrix(unittest.TestCase):

137 - def setUp(self):

138 color = ConfusionMatrix() 139 color.addData('red','red') 140 color.addData('red','red') 141 color.addData('red','red') 142 color.addData('blue','blue') 143 color.addData('blue','blue') 144 color.addData('blue','blue') 145 color.addData('blue','blue') 146 color.addData('pink','pink') 147 color.addData('pink','pink') 148 color.addData('pink','pink') 149 color.addData('pink','pink') 150 color.addData('pink','pink') 151 color.addData('pink','red') 152 color.addData('pink','red') 153 color.addData('blue','red') 154 color.addData('blue','red') 155 color.addData('red','blue') 156 color.addData('green','green') 157 color.addData('red','green') 158 color.computeRates() 159 self.color = color 160 161 # Simulate a face recognition problem with a 162 # probe set of 1000 and a gallery set of 1000 163 # 0.001 FAR and 0.100 FRR 164 sim_face = ConfusionMatrix() 165 sim_face.addData('accept','accept',900) 166 sim_face.addData('reject','reject',998001) 167 sim_face.addData('accept','reject',100) 168 sim_face.addData('reject','accept',999) 169 sim_face.computeRates() 170 self.sim_face = sim_face

171

172 - def test_color(self):

173 #print 174 #print self.color 175 self.assertAlmostEquals(self.color.update_rate(),0.6842,places=4) 176 self.assertAlmostEquals(self.color.confidenceInterval()[0],0.4345,places=4) 177 self.assertAlmostEquals(self.color.confidenceInterval()[1],0.8742,places=4)

178

179 - def test_verification(self):

180 self.assertAlmostEquals(self.sim_face.update_rate(),0.99890100000000004,places=4) 181 self.assertAlmostEquals(self.sim_face.confidenceInterval()[0],0.99883409247930877,places=4) 182 self.assertAlmostEquals(self.sim_face.confidenceInterval()[1],0.99896499025635421,places=4)

183

Source Code for Module pyvision.analysis.classifier.ConfusionMatrix