1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35 '''
36 This module contains code from Nicolas Pinto. Added to PyVision by David Bolme
37
38 Pinto N, Cox DD, DiCarlo JJ (2008) Why is Real-World Visual Object Recognition Hard?
39 PLoS Computational Biology 4(1): e27 doi:10.1371/journal.pcbi.0040027
40
41 Created on May 29, 2011
42
43 @author: bolme
44 '''
45
46
47 import numpy as np
48 import scipy.signal
49 conv = scipy.signal.convolve
50
51
53
55 '''
56 @param params: representation parameters
57 @type params: dict
58 @param featsel: features to include in the vector
59 @type featsel: dict
60 '''
61 self.params = params
62 self.featsel = featsel
63
65 '''
66 Extract V1 like features from an image.
67
68 @param im: the image
69 @type im: pv.Image
70 '''
71
72 mat = im.asMatrix3D()
73 mat = mat.transpose((1,2,0))
74 out = v1like_fromarray(mat,self.params)
75 return out
76
77
78
79
81
82 V1LIKE_PARAMS_A = {
83 'preproc': {
84 'max_edge': 150,
85 'lsum_ksize': 3,
86 'resize_method': 'bicubic',
87 },
88 'normin': {
89 'kshape': (3,3),
90 'threshold': 1.0,
91 },
92 'filter': {
93 'kshape': (43,43),
94 'orients': [ o*np.pi/16 for o in xrange(16) ],
95 'freqs': [ 1./n for n in [2, 3, 4, 6, 11, 18] ],
96 'phases': [0],
97 },
98 'activ': {
99 'minout': 0,
100 'maxout': 1,
101 },
102 'normout': {
103 'kshape': (3,3),
104 'threshold': 1.0,
105 },
106 'pool': {
107 'lsum_ksize': 17,
108 'outshape': (30,30),
109 },
110 }
111
112 V1LIKE_FEATURES_A = {
113 'output': True,
114 'input_gray': None,
115 'input_colorhists': None,
116 'normin_hists': None,
117 'filter_hists': None,
118 'activ_hists': None,
119 'normout_hists': None,
120 'pool_hists': None,
121 }
122
123
125
126
127 out = 0.2989*arr[:,:,0] + \
128 0.5870*arr[:,:,1] + \
129 0.1141*arr[:,:,2]
130
131 return out
132
133
134
136
137 out = np.empty_like(arr)
138
139
140 out[:,:,0] = arr[:,:,0] - arr[:,:,1]
141
142 out[:,:,1] = arr[:,:,2] - arr[:,:,[0,1]].min(2)
143
144 out[:,:,2] = arr.max(2)
145
146 return out
147
148
150
151
152 arr = arr.astype('float32')
153 out = np.empty(arr.shape[:2]+(2,), dtype='float32')
154
155 print out.shape
156
157
158 out[:,:,0] = arr[:,:,0] - arr[:,:,1]
159
160 out[:,:,1] = arr[:,:,2] - arr[:,:,[0,1]].min(2)
161
162 denom = arr.max(2)
163
164 mask = denom < threshold
165
166 out[:,:,0] /= denom
167 out[:,:,1] /= denom
168
169 np.putmask(out[:,:,0], mask, 0)
170 np.putmask(out[:,:,1], mask, 0)
171
172 return out
173
174
176
177
178 opp = opp_convert(arr)
179 out = np.empty_like(opp[:,:,[0,1]])
180
181 rg = opp[:,:,0]
182 by = opp[:,:,1]
183 intensity = opp[:,:,2]
184
185 lowi = intensity < 0.1*intensity.max()
186 rg[lowi] = 0
187 by[lowi] = 0
188
189 denom = intensity
190 denom[denom==0] = 1
191 out[:,:,0] = rg / denom
192 out[:,:,1] = by / denom
193
194 return out
195
196
198
199
200 out = np.empty_like(arr[:,:,[0,1]])
201
202 red = arr[:,:,0]
203 green = arr[:,:,1]
204
205 intensity = arr.mean(2)
206
207 lowi = intensity < 0.1*intensity.max()
208 arr[lowi] = 0
209
210 denom = arr.sum(2)
211 denom[denom==0] = 1
212 out[:,:,0] = red / denom
213 out[:,:,1] = green / denom
214
215 return out
216
217
219 """ fast rgb_to_hsv using numpy array """
220
221
222
223
224
225
226
227
228 arr = arr.astype("float32")
229 out = np.empty_like(arr)
230
231 arr_max = arr.max(-1)
232 delta = arr.ptp(-1)
233 s = delta / arr_max
234
235 s[delta==0] = 0
236
237
238 idx = (arr[:,:,0] == arr_max)
239 out[idx, 0] = (arr[idx, 1] - arr[idx, 2]) / delta[idx]
240
241
242 idx = (arr[:,:,1] == arr_max)
243 out[idx, 0] = 2. + (arr[idx, 2] - arr[idx, 0] ) / delta[idx]
244
245
246 idx = (arr[:,:,2] == arr_max)
247 out[idx, 0] = 4. + (arr[idx, 0] - arr[idx, 1] ) / delta[idx]
248
249 out[:,:,0] = (out[:,:,0]/6.0) % 1.0
250 out[:,:,1] = s
251 out[:,:,2] = arr_max
252
253
254
255
256
257 out[np.isnan(out)] = 0
258
259 return out
260
262
263
264
265 if arr.ndim == 2 or arr.shape[2] == 1:
266 arr_new = np.empty(arr.shape[:2] + (3,), dtype="float32")
267 arr_new[:,:,0] = arr.copy()
268 arr_new[:,:,1] = arr.copy()
269 arr_new[:,:,2] = arr.copy()
270 arr = arr_new
271
272 return arr
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
315 """ Fast Euclidean Norm (L2)
316
317 This version should be faster than numpy.linalg.norm if
318 the dot function uses blas.
319
320 Inputs:
321 x -- numpy array
322
323 Output:
324 L2 norm from 1d representation of x
325
326 """
327 xv = x.ravel()
328 return np.dot(xv, xv)**(1/2.)
329
330
331
332 -def gabor2d(gw, gh, gx0, gy0, wfreq, worient, wphase, shape):
333 """ Generate a gabor 2d array
334
335 Inputs:
336 gw -- width of the gaussian envelope
337 gh -- height of the gaussian envelope
338 gx0 -- x indice of center of the gaussian envelope
339 gy0 -- y indice of center of the gaussian envelope
340 wfreq -- frequency of the 2d wave
341 worient -- orientation of the 2d wave
342 wphase -- phase of the 2d wave
343 shape -- shape tuple (height, width)
344
345 Outputs:
346 gabor -- 2d gabor with zero-mean and unit-variance
347
348 """
349
350 height, width = shape
351 y, x = np.mgrid[0:height, 0:width]
352
353 X = x * np.cos(worient) * wfreq
354 Y = y * np.sin(worient) * wfreq
355
356 env = np.exp( -np.pi * ( ((x-gx0)**2./gw**2.) + ((y-gy0)**2./gh**2.) ) )
357 wave = np.exp( 1j*(2*np.pi*(X+Y) + wphase) )
358 gabor = np.real(env * wave)
359
360 gabor -= gabor.mean()
361 gabor /= fastnorm(gabor)
362
363 return gabor
364
365
367 """ Simple 3d array resampling
368
369 Inputs:
370 src -- a ndimensional array (dim>2)
371 outshape -- fixed output shape for the first 2 dimensions
372
373 Outputs:
374 hout -- resulting n-dimensional array
375
376 """
377
378 inh, inw = src.shape[:2]
379 outh, outw = outshape
380 hslice = (np.arange(outh) * (inh-1.)/(outh-1.)).round().astype(int)
381 wslice = (np.arange(outw) * (inw-1.)/(outw-1.)).round().astype(int)
382 hout = src[hslice, :][:, wslice]
383 return hout.copy()
384
385
386
387 -def v1like_pool(hin, conv_mode, lsum_ksize, outshape=None, order=1):
388 """ V1LIKE Pooling
389 Boxcar Low-pass filter featuremap-wise
390
391 Inputs:
392 hin -- a 3-dimensional array (width X height X n_channels)
393 lsum_ksize -- kernel size of the local sum ex: 17
394 outshape -- fixed output shape (2d slices)
395 order -- XXX
396
397 Outputs:
398 hout -- resulting 3-dimensional array
399
400 """
401
402 order = float(order)
403 assert(order >= 1)
404
405
406 if lsum_ksize is not None:
407 hin_h, hin_w, hin_d = hin.shape
408 dtype = hin.dtype
409 if conv_mode == "valid":
410 aux_shape = hin_h-lsum_ksize+1, hin_w-lsum_ksize+1, hin_d
411 aux = np.empty(aux_shape, dtype)
412 else:
413 aux = np.empty(hin.shape, dtype)
414 k1d = np.ones((lsum_ksize), 'f')
415
416 krow = k1d[None,:]
417 kcol = k1d[:,None]
418 for d in xrange(aux.shape[2]):
419 if order == 1:
420 aux[:,:,d] = conv(conv(hin[:,:,d], krow, conv_mode), kcol, conv_mode)
421 else:
422 aux[:,:,d] = conv(conv(hin[:,:,d]**order, krow, conv_mode), kcol, conv_mode)**(1./order)
423
424 else:
425 aux = hin
426
427
428 if outshape is None or outshape == aux.shape:
429 hout = aux
430 else:
431 hout = sresample(aux, outshape)
432
433 return hout
434
435
436
437 fft_cache = {}
438
440 """ V1LIKE linear filtering
441 Perform separable convolutions on an image with a set of filters
442
443 Inputs:
444 hin -- input image (a 2-dimensional array)
445 filterbank -- TODO list of tuples with 1d filters (row, col)
446 used to perform separable convolution
447 use_cache -- Boolean, use internal fft_cache (works _well_ if the input
448 shapes don't vary much, otherwise you'll blow away the memory)
449
450 Outputs:
451 hout -- a 3-dimensional array with outputs of the filters
452 (width X height X n_filters)
453
454 """
455
456 nfilters = len(filterbank)
457
458 filt0 = filterbank[0]
459 fft_shape = np.array(hin.shape) + np.array(filt0.shape) - 1
460 hin_fft = np.fft.fftn(hin, fft_shape)
461
462 if conv_mode == "valid":
463 hout_shape = list( np.array(hin.shape[:2]) - np.array(filt0.shape[:2]) + 1 ) + [nfilters]
464 hout_new = np.empty(hout_shape, 'f')
465 begy = filt0.shape[0]
466 endy = begy + hout_shape[0]
467 begx = filt0.shape[1]
468 endx = begx + hout_shape[1]
469 elif conv_mode == "same":
470 hout_shape = hin.shape[:2] + (nfilters,)
471 hout_new = np.empty(hout_shape, 'f')
472 begy = filt0.shape[0] / 2
473 endy = begy + hout_shape[0]
474 begx = filt0.shape[1] / 2
475 endx = begx + hout_shape[1]
476 else:
477 raise NotImplementedError
478
479 for i in xrange(nfilters):
480 filt = filterbank[i]
481
482 if use_cache:
483 key = (filt.tostring(), tuple(fft_shape))
484 if key in fft_cache:
485 filt_fft = fft_cache[key]
486 else:
487 filt_fft = scipy.fft.fftn(filt, fft_shape)
488 fft_cache[key] = filt_fft
489 else:
490 filt_fft = np.fft.fftn(filt, fft_shape)
491
492 res_fft = np.fft.ifftn(hin_fft*filt_fft)
493 res_fft = res_fft[begy:endy, begx:endx]
494 hout_new[:,:,i] = np.real(res_fft)
495
496 hout = hout_new
497
498 return hout
499
500
501 filt_l = None
502
504 """ Return a Gabor filterbank (generate it if needed)
505
506 Inputs:
507 params -- filters parameters (dict)
508
509 Outputs:
510 filt_l -- filterbank (list)
511
512 """
513
514 global filt_l
515
516 if filt_l is not None:
517 return filt_l
518
519
520 fh, fw = params['kshape']
521 orients = params['orients']
522 freqs = params['freqs']
523 phases = params['phases']
524
525
526 xc = fw/2
527 yc = fh/2
528 filt_l = []
529
530
531 for freq in freqs:
532 for orient in orients:
533 for phase in phases:
534
535 filt = gabor2d(xc,yc,xc,yc,
536 freq,orient,phase,
537 (fw,fh))
538 filt_l += [filt]
539
540 return filt_l
541
542
543
545 """ V1LIKE local normalization
546
547 Each pixel in the input image is divisively normalized by the L2 norm
548 of the pixels in a local neighborhood around it, and the result of this
549 division is placed in the output image.
550
551 Inputs:
552 hin -- a 3-dimensional array (width X height X rgb)
553 kshape -- kernel shape (tuple) ex: (3,3) for a 3x3 normalization
554 neighborhood
555 threshold -- magnitude threshold, if the vector's length is below
556 it doesn't get resized ex: 1.
557
558 Outputs:
559 hout -- a normalized 3-dimensional array (width X height X rgb)
560
561 """
562 eps = 1e-5
563 kh, kw = kshape
564 dtype = hin.dtype
565 hsrc = hin[:].copy()
566
567
568 hin_h, hin_w, hin_d = hin.shape
569 hout_h = hin_h
570 hout_w = hin_w
571
572 if conv_mode != "same":
573 hout_h = hout_h - kh + 1
574 hout_w = hout_w - kw + 1
575
576 hout_d = hin_d
577 hout = np.empty((hout_h, hout_w, hout_d), 'float32')
578
579
580
581 hin_d = hin.shape[-1]
582 kshape3d = list(kshape) + [hin_d]
583 ker = np.ones(kshape3d, dtype=dtype)
584 size = ker.size
585
586
587 hsq = hsrc ** 2.
588
589 kerH = ker[:,0,0][:, None]
590 kerW = ker[0,:,0][None, :]
591 kerD = ker[0,0,:][None, None, :]
592
593 hssq = conv(
594 conv(
595 conv(hsq, kerD, 'valid')[:,:,0].astype(dtype),
596 kerW,
597 conv_mode),
598 kerH,
599 conv_mode).astype(dtype)
600 hssq = hssq[:,:,None]
601
602
603 ys = kh / 2
604 xs = kw / 2
605 hout_h, hout_w, hout_d = hout.shape[-3:]
606 hs = hout_h
607 ws = hout_w
608
609 hsum = conv(
610 conv(
611 conv(hsrc,
612 kerD, 'valid')[:,:,0].astype(dtype),
613 kerW,
614 conv_mode),
615 kerH,
616 conv_mode).astype(dtype)
617
618 hsum = hsum[:,:,None]
619 if conv_mode == 'same':
620 hnum = hsrc - (hsum/size)
621 else:
622 hnum = hsrc[ys:ys+hs, xs:xs+ws] - (hsum/size)
623 val = (hssq - (hsum**2.)/size)
624 val[val<0] = 0
625 hdiv = val ** (1./2) + eps
626
627
628
629 np.putmask(hdiv, hdiv < (threshold+eps), 1.)
630 result = (hnum / hdiv)
631
632
633 hout[:] = result
634
635 return hout
636
637
638
639
641 """ Applies a simple V1-like model and generates a feature vector from
642 its outputs.
643
644 Inputs:
645 arr -- image's array
646 params -- representation parameters (dict)
647 featsel -- features to include to the vector (dict)
648
649 Outputs:
650 fvector -- corresponding feature vector
651
652 """
653
654 if 'conv_mode' not in params:
655 params['conv_mode'] = 'same'
656 if 'color_space' not in params:
657 params['color_space'] = 'gray'
658
659 arr = np.atleast_3d(arr)
660
661 smallest_edge = min(arr.shape[:2])
662
663 rep = params
664
665 preproc_lsum = rep['preproc']['lsum_ksize']
666 if preproc_lsum is None:
667 preproc_lsum = 1
668 smallest_edge -= (preproc_lsum-1)
669
670 normin_kshape = rep['normin']['kshape']
671 smallest_edge -= (normin_kshape[0]-1)
672
673 filter_kshape = rep['filter']['kshape']
674 smallest_edge -= (filter_kshape[0]-1)
675
676 normout_kshape = rep['normout']['kshape']
677 smallest_edge -= (normout_kshape[0]-1)
678
679 pool_lsum = rep['pool']['lsum_ksize']
680 smallest_edge -= (pool_lsum-1)
681
682 arrh, arrw, _ = arr.shape
683
684 if smallest_edge <= 0 and rep['conv_mode'] == 'valid':
685 if arrh > arrw:
686 new_w = arrw - smallest_edge + 1
687 new_h = int(np.round(1.*new_w * arrh/arrw))
688 print new_w, new_h
689 raise
690 elif arrh < arrw:
691 new_h = arrh - smallest_edge + 1
692 new_w = int(np.round(1.*new_h * arrw/arrh))
693 print new_w, new_h
694 raise
695 else:
696 pass
697
698
699 assert min(arr.shape[:2]) > 0
700
701
702 orig_imga = arr.astype("float32")[:,:,:3]
703
704
705 if orig_imga.shape[2] == 3 \
706 and (orig_imga[:,:,0]-orig_imga.mean(2) < 0.1*orig_imga.max()).all() \
707 and (orig_imga[:,:,1]-orig_imga.mean(2) < 0.1*orig_imga.max()).all() \
708 and (orig_imga[:,:,2]-orig_imga.mean(2) < 0.1*orig_imga.max()).all():
709 orig_imga = np.atleast_3d(orig_imga[:,:,0])
710
711
712
713 if orig_imga.min() == orig_imga.max():
714 raise MinMaxError("[ERROR] orig_imga.min() == orig_imga.max() "
715 "orig_imga.min() = %f, orig_imga.max() = %f"
716 % (orig_imga.min(), orig_imga.max())
717 )
718
719 orig_imga -= orig_imga.min()
720 orig_imga /= orig_imga.max()
721
722
723
724
725 if orig_imga.ndim == 2 or orig_imga.shape[2] == 1:
726 orig_imga_new = np.empty(orig_imga.shape[:2] + (3,), dtype="float32")
727 orig_imga.shape = orig_imga_new[:,:,0].shape
728 orig_imga_new[:,:,0] = 0.2989*orig_imga
729 orig_imga_new[:,:,1] = 0.5870*orig_imga
730 orig_imga_new[:,:,2] = 0.1141*orig_imga
731 orig_imga = orig_imga_new
732
733
734 if params['color_space'] == 'rgb':
735 orig_imga_conv = orig_imga
736
737
738 elif params['color_space'] == 'rg2':
739 orig_imga_conv = rg2_convert(orig_imga)
740 elif params['color_space'] == 'gray':
741 orig_imga_conv = gray_convert(orig_imga)
742 orig_imga_conv.shape = orig_imga_conv.shape + (1,)
743 elif params['color_space'] == 'opp':
744 orig_imga_conv = opp_convert(orig_imga)
745 elif params['color_space'] == 'oppnorm':
746 orig_imga_conv = oppnorm_convert(orig_imga)
747 elif params['color_space'] == 'chrom':
748 orig_imga_conv = chrom_convert(orig_imga)
749
750
751
752
753 elif params['color_space'] == 'hsv':
754 orig_imga_conv = hsv_convert(orig_imga)
755 else:
756 raise ValueError, "params['color_space'] not understood"
757
758
759
760 for cidx in xrange(orig_imga_conv.shape[2]):
761 imga0 = orig_imga_conv[:,:,cidx]
762
763 assert(imga0.min() != imga0.max())
764
765
766
767
768
769 if 'flip_lr' in params['preproc'] and params['preproc']['flip_lr']:
770 imga0 = imga0[:,::-1]
771
772 if 'flip_ud' in params['preproc'] and params['preproc']['flip_ud']:
773 imga0 = imga0[::-1,:]
774
775
776 lsum_ksize = params['preproc']['lsum_ksize']
777 conv_mode = params['conv_mode']
778 if lsum_ksize is not None:
779 k = np.ones((lsum_ksize), 'f') / lsum_ksize
780 imga0 = conv(conv(imga0, k[np.newaxis,:], conv_mode),
781 k[:,np.newaxis], conv_mode)
782
783
784 if 'whiten' not in params['preproc'] or params['preproc']['whiten']:
785 imga0 -= imga0.mean()
786 if imga0.std() != 0:
787 imga0 /= imga0.std()
788
789
790 imga1 = v1like_norm(imga0[:,:,np.newaxis], conv_mode, **params['normin'])
791
792
793 filt_l = get_gabor_filters(params['filter'])
794 imga2 = v1like_filter(imga1[:,:,0], conv_mode, filt_l)
795
796
797 minout = params['activ']['minout']
798 maxout = params['activ']['maxout']
799 imga3 = imga2.clip(minout, maxout)
800
801
802 imga4 = v1like_norm(imga3, conv_mode, **params['normout'])
803
804
805 if "sparsify" in params and params["sparsify"]:
806 imga4 = (imga4.max(2)[:,:,None] == imga4)
807
808
809
810 imga5 = v1like_pool(imga4, conv_mode, **params['pool'])
811 output = imga5
812
813 return output
814