1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36 from numpy import array,mean,std
37 import pyvision as pv
38
39 from pyvision.vector.PCA import PCA
40 import unittest
41 import os.path
42
43
44 NORM_NONE="NONE"
45 NORM_PCA="PCA_WHITEN"
46 NORM_VALUE="VALUE"
47 NORM_AUTO="AUTO"
48
49 REG_NORM_NONE="NONE"
50 REG_NORM_VALUE="VALUE"
51
52 TYPE_TWOCLASS="TWOCLASS"
53 TYPE_MULTICLASS="MULTICLASS"
54 TYPE_REGRESSION="REGRESSION"
55
56
57
58
59
60
61
62
64
65
66
67
68
69
70
72
73
74 self.type = classifer_type
75 self.norm = normalization
76 self.reg_norm = reg_norm
77 self.pca_basis = pca_basis
78 self.pca_drop = pca_drop
79
80 self.labels = []
81 self.vectors = []
82 self.vector_length = None
83
84 self.reg_mean = 0.0
85 self.reg_std = 1.0
86
87
88
89
90
92
93 assert len(self.labels) >= 2
94
95 if self.type == TYPE_TWOCLASS or self.type == TYPE_MULTICLASS:
96
97 n_classes = 0
98 self.class_map = {}
99
100 for label in self.labels:
101 if not self.class_map.has_key(label):
102 self.class_map[label] = n_classes
103 n_classes+=1
104
105 if self.type == TYPE_MULTICLASS:
106 assert n_classes >= 2
107 if self.type == TYPE_TWOCLASS:
108 assert n_classes == 2
109
110 self.class_inv = {}
111 for key,value in self.class_map.iteritems():
112 self.class_inv[value] = key
113
114 new_labels=[]
115 for each in self.labels:
116 new_labels.append(self.class_map[each])
117 self.labels = new_labels
118
119 if self.type == TYPE_REGRESSION:
120 self.reg_mean = mean(self.labels)
121 self.reg_std = std(self.labels)
122
123 new_labels=[]
124 for each in self.labels:
125 new_labels.append((each - self.reg_mean)/self.reg_std)
126 self.labels = new_labels
127
128
129 shape = self.vectors[0].shape
130 assert len(shape) == 1
131
132 for each in self.vectors:
133 assert shape == each.shape
134
135
136 data = array(self.vectors,'d')
137 if self.norm == NORM_AUTO:
138 self.norm = NORM_VALUE
139 if data.shape[1] > 128:
140 self.norm = NORM_PCA
141
142
143 if self.norm == NORM_VALUE:
144 self.dmean = data.mean(axis=0)
145 self.dstd = data.std(axis=0)
146 self.vectors = (data-self.dmean)/self.dstd
147
148 elif self.norm == NORM_PCA:
149 self.pca = PCA()
150 for vec in self.vectors:
151 self.pca.addFeature(vec)
152
153 if self.pca_basis > 1:
154 self.pca.train(drop_front=self.pca_drop,number=self.pca_basis)
155 else:
156 self.pca.train(drop_front=self.pca_drop,energy=self.pca_basis)
157
158 new_vectors = []
159 for each in self.vectors:
160 new_vectors.append(self.pca.project(each,whiten=True))
161 self.vectors=array(new_vectors,'d')
162
163
164
165
166
168 if self.norm == NORM_NONE:
169 return data
170 elif self.norm == NORM_VALUE:
171 return (data-self.dmean)/self.dstd
172 elif self.norm == NORM_PCA:
173 return self.pca.project(data,whiten=True)
174 else:
175 raise NotImplementedError("Could not determine nomalization type: "+ self.norm)
176
177
178
179
191
192
193
194
195
196
197
198
212
213
214
215
216
217
218
219
221 raise NotImplementedError("This is an abstract method")
222
223
224
225
226
227
228
229 - def train(self,ilog=None,**kwargs):
237
238
239
240
241
242
243
244
245
246
247
248
250 raise NotImplementedError("This is an abstract method")
251
252
253
255 return value*self.reg_std + self.reg_mean
256
257
258
259
261 '''Map an integer back into a class label'''
262 return self.class_inv[value]
263
264
266 assert len(a) == len(b)
267 ss = 0.0
268 for i in range(len(a)):
269 d = float(a[i])-float(b[i])
270 ss += d*d
271 return ss/len(a)
272
274
276
277
278 xor = VectorClassifier(TYPE_TWOCLASS)
279 xor.addTraining(0,[0,0])
280 xor.addTraining(0,[1,1])
281 xor.addTraining(1,[0,1])
282 xor.addTraining(1,[1,0])
283 self.xor = xor
284
285
286 rega = VectorClassifier(TYPE_REGRESSION)
287 filename = os.path.join(pv.__path__[0],'data','synthetic','regression.dat')
288 reg_file = open(filename,'r')
289 for line in reg_file:
290 datapoint = line.split()
291 rega.addTraining(float(datapoint[0]),[float(datapoint[3]),float(datapoint[4]),float(datapoint[5])])
292 self.rega = rega
293
294
295 gender = VectorClassifier(TYPE_TWOCLASS)
296 filename = os.path.join(pv.__path__[0],'data','csuScrapShots','gender.txt')
297 f = open(filename,'r')
298 for line in f:
299 im_name, class_name = line.split()
300 im_name = os.path.join(pv.__path__[0],'data','csuScrapShots',im_name)
301 im = pv.Image(im_name)
302 im = pv.Image(im.asPIL().resize((200,200)))
303 gender.addTraining(class_name,im)
304 self.gender = gender
305
310
312
313 self.xor.trainNormalization()
314 self.assert_(self.xor.norm == NORM_VALUE)
315 self.assert_( _mse(self.xor.dmean, [0.5,0.5]) < 0.0001 )
316 self.assert_( _mse(self.xor.dstd, [0.5,0.5]) < 0.0001 )
317 self.assert_(self.xor.class_map == {0:0,1:1})
318 self.assert_(self.xor.class_inv == {0:0,1:1})
319
320
321 self.rega.trainNormalization()
322 self.assert_(self.rega.norm == NORM_VALUE)
323 self.assertAlmostEqual( self.rega.reg_mean, 85.49472, places = 4)
324 self.assertAlmostEqual( self.rega.reg_std, 12.20683, places = 4)
325 self.assert_( _mse(self.rega.dmean, [29.082505, 29.9741642, 30.4516687]) < 0.0001 )
326 self.assert_( _mse(self.rega.dstd, [11.08164301,11.983678,11.18806686]) < 0.0001 )
327
328
329 self.gender.trainNormalization()
330 self.assertEqual(self.gender.norm, NORM_PCA)
331 self.assertEqual(len(self.gender.pca.getValues()), 73)
332 self.assert_(self.gender.class_map == {'M': 1, 'F': 0})
333 self.assert_(self.gender.class_inv == {0: 'F', 1: 'M'})
334