1 from pyvision.vector.VectorClassifier import VectorClassifier,TYPE_REGRESSION
2 import numpy as np
3
4
5 from pyvision.analysis.Table import Table
6 import pyvision
7 import random
8 import os
9 import unittest
10
11
13 '''
14 Basic Radial Basis Function kernel
15 '''
16
19
21
22 tmp = X-y
23 return np.exp(-self.gamma*(tmp*tmp).sum(axis=0))
24
26 return "RBF(%f)"%self.gamma
27
29 '''
30 Basic linear kernel.
31 '''
32
35
37 return np.dot(X.transpose(),y).flatten()
38
41
42
44 '''
45 Ridge Regression algorithm based on Max Welling's Tutorial,
46 Univerity of Toronto.
47
48 http://www.ics.uci.edu/~welling/classnotes/papers_class/Kernel-Ridge.pdf
49
50 Implemented by David Bolme.
51
52 '''
57
59 '''
60 Do not call this function instead call train.
61 '''
62 self.training_size = len(labels)
63
64 c = len(labels)
65 r = len(vectors[0])
66
67 y = np.array(labels,'d')
68
69 X = np.zeros((r,c),'d')
70
71 for i in range(len(vectors)):
72 X[:,i] = vectors[i]
73
74 tmp1 = np.linalg.inv(self.lam*np.eye(r) + np.dot(X,X.transpose()))
75
76 tmp2 = np.dot(y,X.transpose())
77
78 self.w = np.dot(tmp1,tmp2)
79
81 '''
82 Please call predict instead.
83 '''
84 x = np.array(data,'d')
85
86 return np.dot(self.w,x)
87
88
90 '''
91 Ridge Regression algorithm based on Max Welling's Tutorial,
92 Univerity of Toronto.
93
94 http://www.ics.uci.edu/~welling/classnotes/papers_class/Kernel-Ridge.pdf
95
96 Implemented by David Bolme.
97
98 '''
99 - def __init__(self,
100 training_size=0.67,
101 validation_size=None,
102 kernels= [RBF(gamma=2**i) for i in range(-15,4)],
103 lams = [2.0**i for i in range(-8,9)],
104 random_seed = None,
105 **kwargs):
106
107 if isinstance(lams,list):
108 self.lams = lams
109 else:
110 self.lams = [lams]
111
112 if isinstance(kernels,list):
113 self.kernels = kernels
114 else:
115 self.kernels = [kernels]
116
117 self.training_size = training_size
118 self.validation_size = validation_size
119
120
121 self.mse = None
122 self.lam = None
123 self.kernel = None
124 self.training_info = []
125
126 self.rng = random.Random(random_seed)
127
128 VectorClassifier.__init__(self,TYPE_REGRESSION,**kwargs)
129
131 '''
132 Do not call this function instead call train.
133 '''
134
135 if len(self.lams) > 1 or len(self.kernels) > 1:
136
137
138 if self.training_size <= 1.0:
139 self.training_size = int(self.training_size*len(labels))
140 else:
141 self.training_size = int(self.training_size)
142
143 if self.validation_size == None:
144 self.validation_size = len(labels) - int(self.training_size)
145 else:
146 self.validation_size = int(self.validation_size)
147
148 train_labels = []
149 train_vectors = []
150 verify_labels = []
151 verify_vectors = []
152
153 order = list(range(len(labels)))
154 self.rng.shuffle(order)
155 for i in order[:self.training_size]:
156 train_labels.append(labels[i])
157 train_vectors.append(vectors[i])
158 for i in order[self.training_size:self.training_size+self.validation_size]:
159 verify_labels.append(labels[i])
160 verify_vectors.append(vectors[i])
161
162 best_mse = None
163 best_lam = None
164 best_kernel = None
165
166 self.training_size = len(train_labels)
167
168 c = len(train_labels)
169 r = len(train_vectors[0])
170
171 self.c = c
172 self.r = r
173 y = np.array(train_labels,'d')
174
175 X = np.zeros((r,c),'d')
176 for i in range(len(train_vectors)):
177 X[:,i] = train_vectors[i]
178
179 self.X = X
180
181 for kernel in self.kernels:
182 self.kernel = kernel
183
184 kernel_matrix = np.zeros((c,c),'d')
185 for i in range(c):
186 kernel_matrix[i,:] = self.kernel(X,X[:,i:i+1])
187
188
189 for lam in self.lams:
190 self.lam = lam
191
192 self.w = np.dot(y,np.linalg.inv(kernel_matrix + self.lam*np.eye(c)))
193
194 n = len(verify_labels)
195 mse = 0.0
196 for i in range(n):
197 e = verify_labels[i] - self.predictValue(verify_vectors[i])
198 mse += e*e
199 mse = mse/n
200
201 self.training_info.append([lam,kernel,mse])
202
203 if verbose: print "KRR Trianing: %s %10.5f %s %10.5f %s"%(kernel_matrix.shape,lam,kernel,mse,best_mse)
204
205 if best_mse == None or best_mse > mse:
206 best_mse = mse
207 best_lam = lam
208 best_kernel = kernel
209
210 self.mse = best_mse
211 self.lam = best_lam
212 self.kernel = best_kernel
213
214 else:
215 self.lam = self.lams[0]
216 self.kernel = self.kernels[0]
217
218 self.trainRidgeRegression(labels,vectors,verbose=verbose)
219
221 self.training_size = len(labels)
222
223 c = len(labels)
224 r = len(vectors[0])
225
226 self.c = c
227 self.r = r
228 y = np.array(labels,'d')
229
230 X = np.zeros((r,c),'d')
231 for i in range(len(vectors)):
232 X[:,i] = vectors[i]
233
234 self.X = X
235
236 kernel_matrix = np.zeros((c,c),'d')
237 for i in range(c):
238 kernel_matrix[:,i] = self.kernel(X,X[:,i:i+1])
239
240 self.w = np.dot(y,np.linalg.inv(kernel_matrix + self.lam*np.eye(c)))
241
242
244 '''
245 Please call predict instead.
246 '''
247 x = np.array(data)
248 x = x.reshape((self.r,1))
249 k = self.kernel(self.X,x)
250
251 return np.dot(self.w,k)
252
253
254
256 ''' Unit tests for SVM '''
257
260
261
263 rega = KernelRidgeRegression(lams=0.1,kernels=LINEAR())
264 filename = os.path.join(pyvision.__path__[0],'data','synthetic','regression.dat')
265 reg_file = open(filename,'r')
266 labels = []
267 vectors = []
268 for line in reg_file:
269 datapoint = line.split()
270 labels.append(float(datapoint[0]))
271 vectors.append([float(datapoint[3]),float(datapoint[4]),float(datapoint[5])])
272
273 for i in range(50):
274 rega.addTraining(labels[i],vectors[i])
275 rega.train()
276
277 mse = 0.0
278 for i in range(50,len(labels)):
279 p = rega.predict(vectors[i])
280 e = p - labels[i]
281 mse += e*e
282 mse = mse/(len(labels)-50)
283
284 self.assertAlmostEqual(mse,0.24301122718491874,places=4)
285
286
288 rega = KernelRidgeRegression(lams=0.1,kernels=LINEAR())
289 filename = os.path.join(pyvision.__path__[0],'data','synthetic','synth1_lin.txt')
290 reg_file = open(filename,'r')
291 labels = []
292 vectors = []
293 truth = []
294 for line in reg_file:
295 datapoint = line.split()
296 labels.append(float(datapoint[1]))
297 vectors.append([float(datapoint[0])])
298 truth.append(float(datapoint[2]))
299
300 for i in range(100):
301 rega.addTraining(labels[i],vectors[i])
302 rega.train()
303
304 mse = 0.0
305 ase = 0.0
306 for i in range(50,len(labels)):
307 p = rega.predict(vectors[i])
308 e = p - labels[i]
309 mse += e*e
310 a = p - truth[i]
311 ase += a*a
312
313 mse = mse/(len(labels)-50)
314 ase = ase/(len(labels)-50)
315
316
317 self.assertAlmostEqual(mse,0.041430725415995143,places=7)
318 self.assertAlmostEqual(ase,0.00029750577054177876,places=7)
319
321 rega = KernelRidgeRegression(lams=0.1,kernels=LINEAR())
322 filename = os.path.join(pyvision.__path__[0],'data','synthetic','synth1_cos.txt')
323 reg_file = open(filename,'r')
324 labels = []
325 vectors = []
326 truth = []
327 for line in reg_file:
328 datapoint = line.split()
329 labels.append(float(datapoint[1]))
330 vectors.append([float(datapoint[0])])
331 truth.append(float(datapoint[2]))
332
333 for i in range(100):
334 rega.addTraining(labels[i],vectors[i])
335 rega.train()
336
337 mse = 0.0
338 ase = 0.0
339 for i in range(50,len(labels)):
340 p = rega.predict(vectors[i])
341 e = p - labels[i]
342 mse += e*e
343 a = p - truth[i]
344 ase += a*a
345
346 mse = mse/(len(labels)-50)
347 ase = ase/(len(labels)-50)
348
349
350 self.assertAlmostEqual(mse,0.54150787637715125,places=7)
351 self.assertAlmostEqual(ase,0.50489208165416299,places=7)
352
354 rega = KernelRidgeRegression(lams=0.1,kernels=RBF())
355 filename = os.path.join(pyvision.__path__[0],'data','synthetic','synth1_cos.txt')
356 reg_file = open(filename,'r')
357 labels = []
358 vectors = []
359 truth = []
360 for line in reg_file:
361 datapoint = line.split()
362 labels.append(float(datapoint[1]))
363 vectors.append([float(datapoint[0])])
364 truth.append(float(datapoint[2]))
365
366 for i in range(100):
367 rega.addTraining(labels[i],vectors[i])
368 rega.train()
369
370 mse = 0.0
371 ase = 0.0
372 table = Table()
373 for i in range(100,len(labels)):
374 p = rega.predict(vectors[i])
375 e = p - labels[i]
376 mse += e*e
377 a = p - truth[i]
378 ase += a*a
379 table.setElement(i,'x',vectors[i][0])
380 table.setElement(i,'measure',labels[i])
381 table.setElement(i,'truth',truth[i])
382 table.setElement(i,'pred',p)
383 table.setElement(i,'e',e)
384 table.setElement(i,'a',a)
385
386
387 mse = mse/(len(labels)-100)
388 ase = ase/(len(labels)-100)
389
390
391
392
393 self.assertAlmostEqual(mse,0.0462477093266263,places=7)
394 self.assertAlmostEqual(ase,0.0067187523452463625,places=7)
395
397 rega = KernelRidgeRegression(lams=0.1,kernels=RBF())
398 filename = os.path.join(pyvision.__path__[0],'data','synthetic','synth1_mix.txt')
399 reg_file = open(filename,'r')
400 labels = []
401 vectors = []
402 truth = []
403 for line in reg_file:
404 datapoint = line.split()
405 labels.append(float(datapoint[1]))
406 vectors.append([float(datapoint[0])])
407 truth.append(float(datapoint[2]))
408
409 for i in range(100):
410 rega.addTraining(labels[i],vectors[i])
411 rega.train()
412
413
414 mse = 0.0
415 ase = 0.0
416 table = Table()
417 for i in range(100,len(labels)):
418 p = rega.predict(vectors[i])
419 e = p - labels[i]
420 mse += e*e
421 a = p - truth[i]
422 ase += a*a
423 table.setElement(i,'x',vectors[i][0])
424 table.setElement(i,'measure',labels[i])
425 table.setElement(i,'truth',truth[i])
426 table.setElement(i,'pred',p)
427 table.setElement(i,'e',e)
428 table.setElement(i,'a',a)
429
430
431 mse = mse/(len(labels)-100)
432 ase = ase/(len(labels)-100)
433
434
435
436
437 self.assertAlmostEqual(mse,0.563513669235162,places=7)
438 self.assertAlmostEqual(ase,0.51596869146460422,places=7)
439
441 rega = KernelRidgeRegression(random_seed=28378)
442 filename = os.path.join(pyvision.__path__[0],'data','synthetic','synth1_mix.txt')
443 reg_file = open(filename,'r')
444 labels = []
445 vectors = []
446 truth = []
447 for line in reg_file:
448 datapoint = line.split()
449 labels.append(float(datapoint[1]))
450 vectors.append([float(datapoint[0])])
451 truth.append(float(datapoint[2]))
452
453 for i in range(100):
454 rega.addTraining(labels[i],vectors[i])
455 rega.train(verbose=False)
456
457
458 mse = 0.0
459 ase = 0.0
460 table = Table()
461 for i in range(100,len(labels)):
462 p = rega.predict(vectors[i])
463 e = p - labels[i]
464 mse += e*e
465 a = p - truth[i]
466 ase += a*a
467 table.setElement(i,'x',vectors[i][0])
468 table.setElement(i,'measure',labels[i])
469 table.setElement(i,'truth',truth[i])
470 table.setElement(i,'pred',p)
471 table.setElement(i,'e',e)
472 table.setElement(i,'a',a)
473
474
475 mse = mse/(len(labels)-100)
476 ase = ase/(len(labels)-100)
477
478
479
480
481 self.assertAlmostEqual(mse,0.047179521440302921,places=7)
482 self.assertAlmostEqual(ase,0.0052453297596735905,places=7)
483
485 rega = KernelRidgeRegression(random_seed=28378)
486 filename = os.path.join(pyvision.__path__[0],'data','synthetic','synth1_quad.txt')
487 reg_file = open(filename,'r')
488 labels = []
489 vectors = []
490 truth = []
491 for line in reg_file:
492 datapoint = line.split()
493 labels.append(float(datapoint[1]))
494 vectors.append([float(datapoint[0])])
495 truth.append(float(datapoint[2]))
496
497 for i in range(100):
498 rega.addTraining(labels[i],vectors[i])
499 rega.train(verbose=False)
500
501
502 mse = 0.0
503 ase = 0.0
504 table = Table()
505 for i in range(100,len(labels)):
506 p = rega.predict(vectors[i])
507 e = p - labels[i]
508 mse += e*e
509 a = p - truth[i]
510 ase += a*a
511 table.setElement(i,'x',vectors[i][0])
512 table.setElement(i,'measure',labels[i])
513 table.setElement(i,'truth',truth[i])
514 table.setElement(i,'pred',p)
515 table.setElement(i,'e',e)
516 table.setElement(i,'a',a)
517
518
519 mse = mse/(len(labels)-100)
520 ase = ase/(len(labels)-100)
521
522
523
524
525 self.assertAlmostEqual(mse,0.063883259792847411,places=7)
526 self.assertAlmostEqual(ase,0.028811752673991175,places=7)
527
529
530 rega = RidgeRegression()
531 filename = os.path.join(pyvision.__path__[0],'data','synthetic','regression.dat')
532 reg_file = open(filename,'r')
533 labels = []
534 vectors = []
535 for line in reg_file:
536 datapoint = line.split()
537 labels.append(float(datapoint[0]))
538 vectors.append([float(datapoint[3]),float(datapoint[4]),float(datapoint[5])])
539
540 for i in range(50):
541 rega.addTraining(labels[i],vectors[i])
542 rega.train()
543
544 mse = 0.0
545 table = Table()
546 for i in range(50,len(labels)):
547 p = rega.predict(vectors[i])
548 e = p - labels[i]
549 table.setElement(i,'truth',labels[i])
550 table.setElement(i,'pred',p)
551 table.setElement(i,'Residual',e)
552
553 mse += e*e
554
555
556
557
558 mse = mse/(len(labels)-50)
559 self.assertAlmostEqual(mse,0.24301122718491874,places=4)
560