Package pyvision :: Package features :: Module v1like
[hide private]
[frames] | no frames]

Source Code for Module pyvision.features.v1like

  1  # PyVision License 
  2  # 
  3  # Copyright (c) 2006-2008 David S. Bolme 
  4  # All rights reserved. 
  5  # 
  6  # Redistribution and use in source and binary forms, with or without 
  7  # modification, are permitted provided that the following conditions 
  8  # are met: 
  9  #  
 10  # 1. Redistributions of source code must retain the above copyright 
 11  # notice, this list of conditions and the following disclaimer. 
 12  #  
 13  # 2. Redistributions in binary form must reproduce the above copyright 
 14  # notice, this list of conditions and the following disclaimer in the 
 15  # documentation and/or other materials provided with the distribution. 
 16  #  
 17  # 3. Neither name of copyright holders nor the names of its contributors 
 18  # may be used to endorse or promote products derived from this software 
 19  # without specific prior written permission. 
 20  #  
 21  #  
 22  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 23  # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 24  # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 25  # A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR 
 26  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
 27  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
 28  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
 29  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
 30  # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
 31  # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
 32  # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 33   
 34   
 35  ''' 
 36  This module contains code from Nicolas Pinto.  Added to PyVision by David Bolme 
 37   
 38  Pinto N, Cox DD, DiCarlo JJ (2008) Why is Real-World Visual Object Recognition Hard? 
 39  PLoS Computational Biology 4(1): e27 doi:10.1371/journal.pcbi.0040027 
 40   
 41  Created on May 29, 2011 
 42   
 43  @author: bolme 
 44  ''' 
 45   
 46  #import scipy as sp 
 47  import numpy as np 
 48  import scipy.signal 
 49  conv = scipy.signal.convolve 
 50  #import pyvision as pv 
 51   
52 -class V1Processing:
53
54 - def __init__(self,params,featsel):
55 ''' 56 @param params: representation parameters 57 @type params: dict 58 @param featsel: features to include in the vector 59 @type featsel: dict 60 ''' 61 self.params = params 62 self.featsel = featsel
63
64 - def extractFromImage(self,im):
65 ''' 66 Extract V1 like features from an image. 67 68 @param im: the image 69 @type im: pv.Image 70 ''' 71 #image should have channels as third axis 72 mat = im.asMatrix3D() 73 mat = mat.transpose((1,2,0)) 74 out = v1like_fromarray(mat,self.params) 75 return out
76 77 #============================================================================ 78 # Pinto's code below here 79 #============================================================================
80 -class MinMaxError(Exception): pass
81 82 V1LIKE_PARAMS_A = { 83 'preproc': { 84 'max_edge': 150, 85 'lsum_ksize': 3, 86 'resize_method': 'bicubic', 87 }, 88 'normin': { 89 'kshape': (3,3), 90 'threshold': 1.0, 91 }, 92 'filter': { 93 'kshape': (43,43), 94 'orients': [ o*np.pi/16 for o in xrange(16) ], 95 'freqs': [ 1./n for n in [2, 3, 4, 6, 11, 18] ], 96 'phases': [0], 97 }, 98 'activ': { 99 'minout': 0, 100 'maxout': 1, 101 }, 102 'normout': { 103 'kshape': (3,3), 104 'threshold': 1.0, 105 }, 106 'pool': { 107 'lsum_ksize': 17, 108 'outshape': (30,30), 109 }, 110 } 111 112 V1LIKE_FEATURES_A = { 113 'output': True, 114 'input_gray': None, 115 'input_colorhists': None, 116 'normin_hists': None, 117 'filter_hists': None, 118 'activ_hists': None, 119 'normout_hists': None, 120 'pool_hists': None, 121 } 122 123 # ------------------------------------------------------------------------------
124 -def gray_convert(arr):
125 #assert(arr.min()>=0 and arr.max()<=1) 126 # grayscale conversion 127 out = 0.2989*arr[:,:,0] + \ 128 0.5870*arr[:,:,1] + \ 129 0.1141*arr[:,:,2] 130 #out.shape = out.shape + (1,) 131 return out
132 133 134 # ------------------------------------------------------------------------------
135 -def opp_convert(arr):
136 #assert(arr.min()>=0 and arr.max()<=1) 137 out = np.empty_like(arr) 138 139 # red-green 140 out[:,:,0] = arr[:,:,0] - arr[:,:,1] 141 # blue-yellow 142 out[:,:,1] = arr[:,:,2] - arr[:,:,[0,1]].min(2) 143 # intensity 144 out[:,:,2] = arr.max(2) 145 146 return out
147 148 # ------------------------------------------------------------------------------
149 -def oppnorm_convert(arr, threshold=0.1):
150 #assert(arr.min()>=0 and arr.max()<=1) 151 #out = sp.empty_like(arr) 152 arr = arr.astype('float32') 153 out = np.empty(arr.shape[:2]+(2,), dtype='float32') 154 155 print out.shape 156 157 # red-green 158 out[:,:,0] = arr[:,:,0] - arr[:,:,1] 159 # blue-yellow 160 out[:,:,1] = arr[:,:,2] - arr[:,:,[0,1]].min(2) 161 # intensity 162 denom = arr.max(2) 163 164 mask = denom < threshold#*denom[:,:,2].mean() 165 166 out[:,:,0] /= denom 167 out[:,:,1] /= denom 168 169 np.putmask(out[:,:,0], mask, 0) 170 np.putmask(out[:,:,1], mask, 0) 171 172 return out
173 174 # ------------------------------------------------------------------------------
175 -def chrom_convert(arr):
176 #assert(arr.min()>=0 and arr.max()<=1) 177 178 opp = opp_convert(arr) 179 out = np.empty_like(opp[:,:,[0,1]]) 180 181 rg = opp[:,:,0] 182 by = opp[:,:,1] 183 intensity = opp[:,:,2] 184 185 lowi = intensity < 0.1*intensity.max() 186 rg[lowi] = 0 187 by[lowi] = 0 188 189 denom = intensity 190 denom[denom==0] = 1 191 out[:,:,0] = rg / denom 192 out[:,:,1] = by / denom 193 194 return out
195 196 # ------------------------------------------------------------------------------
197 -def rg2_convert(arr):
198 #assert(arr.min()>=0 and arr.max()<=1) 199 200 out = np.empty_like(arr[:,:,[0,1]]) 201 202 red = arr[:,:,0] 203 green = arr[:,:,1] 204 #blue = arr[:,:,2] 205 intensity = arr.mean(2) 206 207 lowi = intensity < 0.1*intensity.max() 208 arr[lowi] = 0 209 210 denom = arr.sum(2) 211 denom[denom==0] = 1 212 out[:,:,0] = red / denom 213 out[:,:,1] = green / denom 214 215 return out
216 217 # ------------------------------------------------------------------------------
218 -def hsv_convert(arr):
219 """ fast rgb_to_hsv using numpy array """ 220 221 # adapted from Arnar Flatberg 222 # http://www.mail-archive.com/numpy-discussion@scipy.org/msg06147.html 223 # it now handles NaN properly and mimics colorsys.rgb_to_hsv output 224 225 #assert(arr.min()>=0 and arr.max()<=1) 226 227 #arr = arr/255. 228 arr = arr.astype("float32") 229 out = np.empty_like(arr) 230 231 arr_max = arr.max(-1) 232 delta = arr.ptp(-1) 233 s = delta / arr_max 234 235 s[delta==0] = 0 236 237 # red is max 238 idx = (arr[:,:,0] == arr_max) 239 out[idx, 0] = (arr[idx, 1] - arr[idx, 2]) / delta[idx] 240 241 # green is max 242 idx = (arr[:,:,1] == arr_max) 243 out[idx, 0] = 2. + (arr[idx, 2] - arr[idx, 0] ) / delta[idx] 244 245 # blue is max 246 idx = (arr[:,:,2] == arr_max) 247 out[idx, 0] = 4. + (arr[idx, 0] - arr[idx, 1] ) / delta[idx] 248 249 out[:,:,0] = (out[:,:,0]/6.0) % 1.0 250 out[:,:,1] = s 251 out[:,:,2] = arr_max 252 253 # rescale back to [0, 255] 254 #out *= 255. 255 256 # remove NaN 257 out[np.isnan(out)] = 0 258 259 return out
260
261 -def rgb_convert(arr):
262 #assert(arr.min()>=0 and arr.max()<=1) 263 264 # force 3 dims 265 if arr.ndim == 2 or arr.shape[2] == 1: 266 arr_new = np.empty(arr.shape[:2] + (3,), dtype="float32") 267 arr_new[:,:,0] = arr.copy() 268 arr_new[:,:,1] = arr.copy() 269 arr_new[:,:,2] = arr.copy() 270 arr = arr_new 271 272 return arr
273 274 #def rephists2(hin, division, nfeatures): 275 # """ Compute local feature histograms from a given 3d (width X height X 276 # n_channels) image. 277 # 278 # These histograms are intended to serve as easy-to-compute additional 279 # features that can be concatenated onto the V1-like output vector to 280 # increase performance with little additional complexity. These additional 281 # features are only used in the V1LIKE+ (i.e. + 'easy tricks') version of 282 # the model. 283 # 284 # Inputs: 285 # hin -- 3d image (width X height X n_channels) 286 # division -- granularity of the local histograms (e.g. 2 corresponds 287 # to computing feature histograms in each quadrant) 288 # nfeatures -- desired number of resulting features 289 # 290 # Outputs: 291 # fvector -- feature vector 292 # 293 # """ 294 # 295 # hin_h, hin_w, hin_d = hin.shape 296 # nzones = hin_d * division**2 297 # nbins = nfeatures / nzones 298 # sx = (hin_w-1.)/division 299 # sy = (hin_h-1.)/division 300 # fvector = sp.zeros((nfeatures), 'f') 301 # hists = [] 302 # for d in xrange(hin_d): 303 # h = [sp.histogram(hin[j*sy:(j+1)*sy,i*sx:(i+1)*sx,d], bins=nbins)[0].ravel() 304 # for i in xrange(division) 305 # for j in xrange(division) 306 # ] 307 # hists += [h] 308 # 309 # hists = sp.array(hists, 'f').ravel() 310 # fvector[:hists.size] = hists 311 # return fvector 312 313
314 -def fastnorm(x):
315 """ Fast Euclidean Norm (L2) 316 317 This version should be faster than numpy.linalg.norm if 318 the dot function uses blas. 319 320 Inputs: 321 x -- numpy array 322 323 Output: 324 L2 norm from 1d representation of x 325 326 """ 327 xv = x.ravel() 328 return np.dot(xv, xv)**(1/2.)
329 330 331
332 -def gabor2d(gw, gh, gx0, gy0, wfreq, worient, wphase, shape):
333 """ Generate a gabor 2d array 334 335 Inputs: 336 gw -- width of the gaussian envelope 337 gh -- height of the gaussian envelope 338 gx0 -- x indice of center of the gaussian envelope 339 gy0 -- y indice of center of the gaussian envelope 340 wfreq -- frequency of the 2d wave 341 worient -- orientation of the 2d wave 342 wphase -- phase of the 2d wave 343 shape -- shape tuple (height, width) 344 345 Outputs: 346 gabor -- 2d gabor with zero-mean and unit-variance 347 348 """ 349 350 height, width = shape 351 y, x = np.mgrid[0:height, 0:width] 352 353 X = x * np.cos(worient) * wfreq 354 Y = y * np.sin(worient) * wfreq 355 356 env = np.exp( -np.pi * ( ((x-gx0)**2./gw**2.) + ((y-gy0)**2./gh**2.) ) ) 357 wave = np.exp( 1j*(2*np.pi*(X+Y) + wphase) ) 358 gabor = np.real(env * wave) 359 360 gabor -= gabor.mean() 361 gabor /= fastnorm(gabor) 362 363 return gabor
364 365
366 -def sresample(src, outshape):
367 """ Simple 3d array resampling 368 369 Inputs: 370 src -- a ndimensional array (dim>2) 371 outshape -- fixed output shape for the first 2 dimensions 372 373 Outputs: 374 hout -- resulting n-dimensional array 375 376 """ 377 378 inh, inw = src.shape[:2] 379 outh, outw = outshape 380 hslice = (np.arange(outh) * (inh-1.)/(outh-1.)).round().astype(int) 381 wslice = (np.arange(outw) * (inw-1.)/(outw-1.)).round().astype(int) 382 hout = src[hslice, :][:, wslice] 383 return hout.copy()
384 385 386
387 -def v1like_pool(hin, conv_mode, lsum_ksize, outshape=None, order=1):
388 """ V1LIKE Pooling 389 Boxcar Low-pass filter featuremap-wise 390 391 Inputs: 392 hin -- a 3-dimensional array (width X height X n_channels) 393 lsum_ksize -- kernel size of the local sum ex: 17 394 outshape -- fixed output shape (2d slices) 395 order -- XXX 396 397 Outputs: 398 hout -- resulting 3-dimensional array 399 400 """ 401 402 order = float(order) 403 assert(order >= 1) 404 405 # -- local sum 406 if lsum_ksize is not None: 407 hin_h, hin_w, hin_d = hin.shape 408 dtype = hin.dtype 409 if conv_mode == "valid": 410 aux_shape = hin_h-lsum_ksize+1, hin_w-lsum_ksize+1, hin_d 411 aux = np.empty(aux_shape, dtype) 412 else: 413 aux = np.empty(hin.shape, dtype) 414 k1d = np.ones((lsum_ksize), 'f') 415 #k2d = np.ones((lsum_ksize, lsum_ksize), 'f') 416 krow = k1d[None,:] 417 kcol = k1d[:,None] 418 for d in xrange(aux.shape[2]): 419 if order == 1: 420 aux[:,:,d] = conv(conv(hin[:,:,d], krow, conv_mode), kcol, conv_mode) 421 else: 422 aux[:,:,d] = conv(conv(hin[:,:,d]**order, krow, conv_mode), kcol, conv_mode)**(1./order) 423 424 else: 425 aux = hin 426 427 # -- resample output 428 if outshape is None or outshape == aux.shape: 429 hout = aux 430 else: 431 hout = sresample(aux, outshape) 432 433 return hout
434 435 436 437 fft_cache = {} 438
439 -def v1like_filter(hin, conv_mode, filterbank, use_cache=False):
440 """ V1LIKE linear filtering 441 Perform separable convolutions on an image with a set of filters 442 443 Inputs: 444 hin -- input image (a 2-dimensional array) 445 filterbank -- TODO list of tuples with 1d filters (row, col) 446 used to perform separable convolution 447 use_cache -- Boolean, use internal fft_cache (works _well_ if the input 448 shapes don't vary much, otherwise you'll blow away the memory) 449 450 Outputs: 451 hout -- a 3-dimensional array with outputs of the filters 452 (width X height X n_filters) 453 454 """ 455 456 nfilters = len(filterbank) 457 458 filt0 = filterbank[0] 459 fft_shape = np.array(hin.shape) + np.array(filt0.shape) - 1 460 hin_fft = np.fft.fftn(hin, fft_shape) 461 462 if conv_mode == "valid": 463 hout_shape = list( np.array(hin.shape[:2]) - np.array(filt0.shape[:2]) + 1 ) + [nfilters] 464 hout_new = np.empty(hout_shape, 'f') 465 begy = filt0.shape[0] 466 endy = begy + hout_shape[0] 467 begx = filt0.shape[1] 468 endx = begx + hout_shape[1] 469 elif conv_mode == "same": 470 hout_shape = hin.shape[:2] + (nfilters,) 471 hout_new = np.empty(hout_shape, 'f') 472 begy = filt0.shape[0] / 2 473 endy = begy + hout_shape[0] 474 begx = filt0.shape[1] / 2 475 endx = begx + hout_shape[1] 476 else: 477 raise NotImplementedError 478 479 for i in xrange(nfilters): 480 filt = filterbank[i] 481 482 if use_cache: 483 key = (filt.tostring(), tuple(fft_shape)) 484 if key in fft_cache: 485 filt_fft = fft_cache[key] 486 else: 487 filt_fft = scipy.fft.fftn(filt, fft_shape) 488 fft_cache[key] = filt_fft 489 else: 490 filt_fft = np.fft.fftn(filt, fft_shape) 491 492 res_fft = np.fft.ifftn(hin_fft*filt_fft) 493 res_fft = res_fft[begy:endy, begx:endx] 494 hout_new[:,:,i] = np.real(res_fft) 495 496 hout = hout_new 497 498 return hout
499 500 # TODO: make this not a global variable 501 filt_l = None 502
503 -def get_gabor_filters(params):
504 """ Return a Gabor filterbank (generate it if needed) 505 506 Inputs: 507 params -- filters parameters (dict) 508 509 Outputs: 510 filt_l -- filterbank (list) 511 512 """ 513 514 global filt_l 515 516 if filt_l is not None: 517 return filt_l 518 519 # -- get parameters 520 fh, fw = params['kshape'] 521 orients = params['orients'] 522 freqs = params['freqs'] 523 phases = params['phases'] 524 #nf = len(orients) * len(freqs) * len(phases) 525 #fbshape = nf, fh, fw 526 xc = fw/2 527 yc = fh/2 528 filt_l = [] 529 530 # -- build the filterbank 531 for freq in freqs: 532 for orient in orients: 533 for phase in phases: 534 # create 2d gabor 535 filt = gabor2d(xc,yc,xc,yc, 536 freq,orient,phase, 537 (fw,fh)) 538 filt_l += [filt] 539 540 return filt_l
541 542 543
544 -def v1like_norm(hin, conv_mode, kshape, threshold):
545 """ V1LIKE local normalization 546 547 Each pixel in the input image is divisively normalized by the L2 norm 548 of the pixels in a local neighborhood around it, and the result of this 549 division is placed in the output image. 550 551 Inputs: 552 hin -- a 3-dimensional array (width X height X rgb) 553 kshape -- kernel shape (tuple) ex: (3,3) for a 3x3 normalization 554 neighborhood 555 threshold -- magnitude threshold, if the vector's length is below 556 it doesn't get resized ex: 1. 557 558 Outputs: 559 hout -- a normalized 3-dimensional array (width X height X rgb) 560 561 """ 562 eps = 1e-5 563 kh, kw = kshape 564 dtype = hin.dtype 565 hsrc = hin[:].copy() 566 567 # -- prepare hout 568 hin_h, hin_w, hin_d = hin.shape 569 hout_h = hin_h# - kh + 1 570 hout_w = hin_w# - kw + 1 571 572 if conv_mode != "same": 573 hout_h = hout_h - kh + 1 574 hout_w = hout_w - kw + 1 575 576 hout_d = hin_d 577 hout = np.empty((hout_h, hout_w, hout_d), 'float32') 578 579 # -- compute numerator (hnum) and divisor (hdiv) 580 # sum kernel 581 hin_d = hin.shape[-1] 582 kshape3d = list(kshape) + [hin_d] 583 ker = np.ones(kshape3d, dtype=dtype) 584 size = ker.size 585 586 # compute sum-of-square 587 hsq = hsrc ** 2. 588 #hssq = conv(hsq, ker, conv_mode).astype(dtype) 589 kerH = ker[:,0,0][:, None]#, None] 590 kerW = ker[0,:,0][None, :]#, None] 591 kerD = ker[0,0,:][None, None, :] 592 593 hssq = conv( 594 conv( 595 conv(hsq, kerD, 'valid')[:,:,0].astype(dtype), 596 kerW, 597 conv_mode), 598 kerH, 599 conv_mode).astype(dtype) 600 hssq = hssq[:,:,None] 601 602 # compute hnum and hdiv 603 ys = kh / 2 604 xs = kw / 2 605 hout_h, hout_w, hout_d = hout.shape[-3:] 606 hs = hout_h 607 ws = hout_w 608 609 hsum = conv( 610 conv( 611 conv(hsrc, 612 kerD, 'valid')[:,:,0].astype(dtype), 613 kerW, 614 conv_mode), 615 kerH, 616 conv_mode).astype(dtype) 617 618 hsum = hsum[:,:,None] 619 if conv_mode == 'same': 620 hnum = hsrc - (hsum/size) 621 else: 622 hnum = hsrc[ys:ys+hs, xs:xs+ws] - (hsum/size) 623 val = (hssq - (hsum**2.)/size) 624 val[val<0] = 0 625 hdiv = val ** (1./2) + eps 626 627 # -- apply normalization 628 # 'volume' threshold 629 np.putmask(hdiv, hdiv < (threshold+eps), 1.) 630 result = (hnum / hdiv) 631 632 #print result.shape 633 hout[:] = result 634 #print hout.shape, hout.dtype 635 return hout
636 637 638 639
640 -def v1like_fromarray(arr, params):
641 """ Applies a simple V1-like model and generates a feature vector from 642 its outputs. 643 644 Inputs: 645 arr -- image's array 646 params -- representation parameters (dict) 647 featsel -- features to include to the vector (dict) 648 649 Outputs: 650 fvector -- corresponding feature vector 651 652 """ 653 654 if 'conv_mode' not in params: 655 params['conv_mode'] = 'same' 656 if 'color_space' not in params: 657 params['color_space'] = 'gray' 658 659 arr = np.atleast_3d(arr) 660 661 smallest_edge = min(arr.shape[:2]) 662 663 rep = params 664 665 preproc_lsum = rep['preproc']['lsum_ksize'] 666 if preproc_lsum is None: 667 preproc_lsum = 1 668 smallest_edge -= (preproc_lsum-1) 669 670 normin_kshape = rep['normin']['kshape'] 671 smallest_edge -= (normin_kshape[0]-1) 672 673 filter_kshape = rep['filter']['kshape'] 674 smallest_edge -= (filter_kshape[0]-1) 675 676 normout_kshape = rep['normout']['kshape'] 677 smallest_edge -= (normout_kshape[0]-1) 678 679 pool_lsum = rep['pool']['lsum_ksize'] 680 smallest_edge -= (pool_lsum-1) 681 682 arrh, arrw, _ = arr.shape 683 684 if smallest_edge <= 0 and rep['conv_mode'] == 'valid': 685 if arrh > arrw: 686 new_w = arrw - smallest_edge + 1 687 new_h = int(np.round(1.*new_w * arrh/arrw)) 688 print new_w, new_h 689 raise 690 elif arrh < arrw: 691 new_h = arrh - smallest_edge + 1 692 new_w = int(np.round(1.*new_h * arrw/arrh)) 693 print new_w, new_h 694 raise 695 else: 696 pass 697 698 # TODO: finish image size adjustment 699 assert min(arr.shape[:2]) > 0 700 701 # use the first 3 channels only 702 orig_imga = arr.astype("float32")[:,:,:3] 703 704 # make sure that we don't have a 3-channel (pseudo) gray image 705 if orig_imga.shape[2] == 3 \ 706 and (orig_imga[:,:,0]-orig_imga.mean(2) < 0.1*orig_imga.max()).all() \ 707 and (orig_imga[:,:,1]-orig_imga.mean(2) < 0.1*orig_imga.max()).all() \ 708 and (orig_imga[:,:,2]-orig_imga.mean(2) < 0.1*orig_imga.max()).all(): 709 orig_imga = np.atleast_3d(orig_imga[:,:,0]) 710 711 # rescale to [0,1] 712 #print orig_imga.min(), orig_imga.max() 713 if orig_imga.min() == orig_imga.max(): 714 raise MinMaxError("[ERROR] orig_imga.min() == orig_imga.max() " 715 "orig_imga.min() = %f, orig_imga.max() = %f" 716 % (orig_imga.min(), orig_imga.max()) 717 ) 718 719 orig_imga -= orig_imga.min() 720 orig_imga /= orig_imga.max() 721 722 # -- color conversion 723 # insure 3 dims 724 #print orig_imga.shape 725 if orig_imga.ndim == 2 or orig_imga.shape[2] == 1: 726 orig_imga_new = np.empty(orig_imga.shape[:2] + (3,), dtype="float32") 727 orig_imga.shape = orig_imga_new[:,:,0].shape 728 orig_imga_new[:,:,0] = 0.2989*orig_imga 729 orig_imga_new[:,:,1] = 0.5870*orig_imga 730 orig_imga_new[:,:,2] = 0.1141*orig_imga 731 orig_imga = orig_imga_new 732 733 # - 734 if params['color_space'] == 'rgb': 735 orig_imga_conv = orig_imga 736 # elif params['color_space'] == 'rg': 737 # orig_imga_conv = colorconv.rg_convert(orig_imga) 738 elif params['color_space'] == 'rg2': 739 orig_imga_conv = rg2_convert(orig_imga) 740 elif params['color_space'] == 'gray': 741 orig_imga_conv = gray_convert(orig_imga) 742 orig_imga_conv.shape = orig_imga_conv.shape + (1,) 743 elif params['color_space'] == 'opp': 744 orig_imga_conv = opp_convert(orig_imga) 745 elif params['color_space'] == 'oppnorm': 746 orig_imga_conv = oppnorm_convert(orig_imga) 747 elif params['color_space'] == 'chrom': 748 orig_imga_conv = chrom_convert(orig_imga) 749 # elif params['color_space'] == 'opponent': 750 # orig_imga_conv = colorconv.opponent_convert(orig_imga) 751 # elif params['color_space'] == 'W': 752 # orig_imga_conv = colorconv.W_convert(orig_imga) 753 elif params['color_space'] == 'hsv': 754 orig_imga_conv = hsv_convert(orig_imga) 755 else: 756 raise ValueError, "params['color_space'] not understood" 757 758 # -- process each map 759 760 for cidx in xrange(orig_imga_conv.shape[2]): 761 imga0 = orig_imga_conv[:,:,cidx] 762 763 assert(imga0.min() != imga0.max()) 764 765 # -- 0. preprocessing 766 #imga0 = imga0 / 255.0 767 768 # flip image ? 769 if 'flip_lr' in params['preproc'] and params['preproc']['flip_lr']: 770 imga0 = imga0[:,::-1] 771 772 if 'flip_ud' in params['preproc'] and params['preproc']['flip_ud']: 773 imga0 = imga0[::-1,:] 774 775 # smoothing 776 lsum_ksize = params['preproc']['lsum_ksize'] 777 conv_mode = params['conv_mode'] 778 if lsum_ksize is not None: 779 k = np.ones((lsum_ksize), 'f') / lsum_ksize 780 imga0 = conv(conv(imga0, k[np.newaxis,:], conv_mode), 781 k[:,np.newaxis], conv_mode) 782 783 # whiten full image (assume True) 784 if 'whiten' not in params['preproc'] or params['preproc']['whiten']: 785 imga0 -= imga0.mean() 786 if imga0.std() != 0: 787 imga0 /= imga0.std() 788 789 # -- 1. input normalization 790 imga1 = v1like_norm(imga0[:,:,np.newaxis], conv_mode, **params['normin']) 791 792 # -- 2. linear filtering 793 filt_l = get_gabor_filters(params['filter']) 794 imga2 = v1like_filter(imga1[:,:,0], conv_mode, filt_l) 795 796 # -- 3. simple non-linear activation (clamping) 797 minout = params['activ']['minout'] # sustain activity 798 maxout = params['activ']['maxout'] # saturation 799 imga3 = imga2.clip(minout, maxout) 800 801 # -- 4. output normalization 802 imga4 = v1like_norm(imga3, conv_mode, **params['normout']) 803 804 # -- 5. sparsify ? 805 if "sparsify" in params and params["sparsify"]: 806 imga4 = (imga4.max(2)[:,:,None] == imga4) 807 #raise 808 809 # -- 6. volume dimension reduction 810 imga5 = v1like_pool(imga4, conv_mode, **params['pool']) 811 output = imga5 812 813 return output
814