pyvision.vector.VectorClassifier

64 65 66 ## 67 # Configure some defaults for the classifier value normalizion. 68 # 69 # <p>This configures some defalts for the classifier such as the 70 # type of classifier, and how values are normalized.

71 - def __init__(self, classifer_type, normalization=NORM_AUTO, reg_norm=REG_NORM_VALUE, pca_basis=0.95, pca_drop=0):

72 73 # Setup basic configuration 74 self.type = classifer_type 75 self.norm = normalization 76 self.reg_norm = reg_norm 77 self.pca_basis = pca_basis 78 self.pca_drop = pca_drop 79 80 self.labels = [] 81 self.vectors = [] 82 self.vector_length = None 83 84 self.reg_mean = 0.0 85 self.reg_std = 1.0

86 87 88 ## 89 # Learn the range of values that are expected for labels and data. 90 # Then setup for normalization.

91 - def trainNormalization(self):

92 93 assert len(self.labels) >= 2 94 95 if self.type == TYPE_TWOCLASS or self.type == TYPE_MULTICLASS: 96 # Learn the classes 97 n_classes = 0 98 self.class_map = {} 99 100 for label in self.labels: 101 if not self.class_map.has_key(label): 102 self.class_map[label] = n_classes 103 n_classes+=1 104 105 if self.type == TYPE_MULTICLASS: 106 assert n_classes >= 2 107 if self.type == TYPE_TWOCLASS: 108 assert n_classes == 2 109 110 self.class_inv = {} 111 for key,value in self.class_map.iteritems(): 112 self.class_inv[value] = key 113 114 new_labels=[] 115 for each in self.labels: 116 new_labels.append(self.class_map[each]) 117 self.labels = new_labels 118 119 if self.type == TYPE_REGRESSION: 120 self.reg_mean = mean(self.labels) 121 self.reg_std = std(self.labels) 122 123 new_labels=[] 124 for each in self.labels: 125 new_labels.append((each - self.reg_mean)/self.reg_std) 126 self.labels = new_labels 127 128 #test length 129 shape = self.vectors[0].shape 130 assert len(shape) == 1 131 132 for each in self.vectors: 133 assert shape == each.shape 134 135 #crate a data matrix 136 data = array(self.vectors,'d') 137 if self.norm == NORM_AUTO: 138 self.norm = NORM_VALUE 139 if data.shape[1] > 128: 140 self.norm = NORM_PCA 141 142 #Setup value normalization 143 if self.norm == NORM_VALUE: 144 self.dmean = data.mean(axis=0) 145 self.dstd = data.std(axis=0) 146 self.vectors = (data-self.dmean)/self.dstd 147 148 elif self.norm == NORM_PCA: 149 self.pca = PCA() 150 for vec in self.vectors: 151 self.pca.addFeature(vec) 152 153 if self.pca_basis > 1: 154 self.pca.train(drop_front=self.pca_drop,number=self.pca_basis) 155 else: 156 self.pca.train(drop_front=self.pca_drop,energy=self.pca_basis) 157 158 new_vectors = [] 159 for each in self.vectors: 160 new_vectors.append(self.pca.project(each,whiten=True)) 161 self.vectors=array(new_vectors,'d')

162 163 164 165 ## 166 # Normalize the values in a data vector to be mean zero.

167 - def normalizeVector(self,data):

168 if self.norm == NORM_NONE: 169 return data 170 elif self.norm == NORM_VALUE: 171 return (data-self.dmean)/self.dstd 172 elif self.norm == NORM_PCA: 173 return self.pca.project(data,whiten=True) 174 else: 175 raise NotImplementedError("Could not determine nomalization type: "+ self.norm)

176 177 178 ## 179 # Add a training sample. Data must be a vector of numbers.

180 - def addTraining(self,label,data,ilog=None):

181 if self.type == TYPE_REGRESSION: 182 self.labels.append(float(label)) 183 else: 184 self.labels.append(label) 185 186 if isinstance(data,pv.Image): 187 data = data.asMatrix2D().flatten() 188 data = array(data,'d').flatten() 189 190 self.vectors.append(data)

191 192 193 ## 194 # Predict the class or the value for the input data. 195 # 196 # <p>This function will perform value normalization and then 197 # delegate to the subclass to perform classifiaction or 198 # regression.

199 - def predict(self,data,ilog=None):

200 if isinstance(data,pv.Image): 201 data = data.asMatrix2D().flatten() 202 data = array(data,'d').flatten() 203 204 data = self.normalizeVector(data) 205 206 value = self.predictValue(data,ilog=ilog) 207 208 if self.type == TYPE_TWOCLASS or self.type == TYPE_MULTICLASS: 209 return self.invertClass(value) 210 if self.type == TYPE_REGRESSION: 211 return self.invertReg(value)

212 213 214 ## 215 # Override this method in subclasses. 216 # Input should be a numpy array of doubles 217 # 218 # If classifer output is int 219 # If regression output is float

220 - def predictValue(self,data):

221 raise NotImplementedError("This is an abstract method")

222 223 224 ## 225 # Train the classifer on the training data. 226 # 227 # This normalizes the data and the labels, and then passes the 228 # results to the subclass for training.

229 - def train(self,ilog=None,**kwargs):

230 self.trainNormalization() 231 232 self.trainClassifer(self.labels,self.vectors,ilog=ilog,**kwargs) 233 234 # remove training data 235 del self.labels 236 del self.vectors

237 238 239 ## 240 # This abstract method should be overridden by subclasses. 241 # 242 # <p> This method is called from {@link train}. The vectors and values 243 # passed to this method will have been normalized. This method is should 244 # train a classifier or regression algorithm for that normalized data. 245 # 246 # <p> Any keyword arguments passed to train will also be passed on to train 247 # classifier. This could allow variations in training or for verbose 248 # output.

249 - def trainClassifer(self,labels,vectors,ilog=None, **kwargs):

250 raise NotImplementedError("This is an abstract method")

251 252 ## 253 # Convert a normalized regression value back to the original scale

254 - def invertReg(self,value):

255 return value*self.reg_std + self.reg_mean

256 257 258 ## 259 # Convert an integer class value back to the original label values.

260 - def invertClass(self,value):

261 '''Map an integer back into a class label''' 262 return self.class_inv[value]

274

275 - def setUp(self):

276 277 # a simple binary two class 278 xor = VectorClassifier(TYPE_TWOCLASS) 279 xor.addTraining(0,[0,0]) 280 xor.addTraining(0,[1,1]) 281 xor.addTraining(1,[0,1]) 282 xor.addTraining(1,[1,0]) 283 self.xor = xor 284 285 # synthetic linear regression 286 rega = VectorClassifier(TYPE_REGRESSION) 287 filename = os.path.join(pv.__path__[0],'data','synthetic','regression.dat') 288 reg_file = open(filename,'r') 289 for line in reg_file: 290 datapoint = line.split() 291 rega.addTraining(float(datapoint[0]),[float(datapoint[3]),float(datapoint[4]),float(datapoint[5])]) 292 self.rega = rega 293 294 # image classification 295 gender = VectorClassifier(TYPE_TWOCLASS) 296 filename = os.path.join(pv.__path__[0],'data','csuScrapShots','gender.txt') 297 f = open(filename,'r') 298 for line in f: 299 im_name, class_name = line.split() 300 im_name = os.path.join(pv.__path__[0],'data','csuScrapShots',im_name) 301 im = pv.Image(im_name) 302 im = pv.Image(im.asPIL().resize((200,200))) 303 gender.addTraining(class_name,im) 304 self.gender = gender

305

306 - def test_vc_create(self):

307 _ = VectorClassifier(TYPE_TWOCLASS) 308 _ = VectorClassifier(TYPE_MULTICLASS) 309 _ = VectorClassifier(TYPE_REGRESSION)

310

311 - def test_vc_normalize(self):

312 # This should test class normalization 313 self.xor.trainNormalization() 314 self.assert_(self.xor.norm == NORM_VALUE) 315 self.assert_( _mse(self.xor.dmean, [0.5,0.5]) < 0.0001 ) 316 self.assert_( _mse(self.xor.dstd, [0.5,0.5]) < 0.0001 ) 317 self.assert_(self.xor.class_map == {0:0,1:1}) 318 self.assert_(self.xor.class_inv == {0:0,1:1}) 319 320 # This should test value normalization 321 self.rega.trainNormalization() 322 self.assert_(self.rega.norm == NORM_VALUE) 323 self.assertAlmostEqual( self.rega.reg_mean, 85.49472, places = 4) 324 self.assertAlmostEqual( self.rega.reg_std, 12.20683, places = 4) 325 self.assert_( _mse(self.rega.dmean, [29.082505, 29.9741642, 30.4516687]) < 0.0001 ) 326 self.assert_( _mse(self.rega.dstd, [11.08164301,11.983678,11.18806686]) < 0.0001 ) 327 328 # This should test PCA normalization 329 self.gender.trainNormalization() 330 self.assertEqual(self.gender.norm, NORM_PCA) 331 self.assertEqual(len(self.gender.pca.getValues()), 73) 332 self.assert_(self.gender.class_map == {'M': 1, 'F': 0}) 333 self.assert_(self.gender.class_inv == {0: 'F', 1: 'M'})

Source Code for Module pyvision.vector.VectorClassifier