1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33 '''
34 This module contains the AffineTransform class and a set of factory
35 functions used to create AffineTransform instances given different
36 sets of parameters. Most factory functions require information that
37 specifies the transformation and a size for the output image.
38 '''
39
40
41
42 import unittest
43 import os.path
44 import math
45 import copy
46 import weakref
47
48 try:
49 from PIL.Image import AFFINE,NEAREST,BILINEAR,BICUBIC,ANTIALIAS
50 except:
51 from Image import AFFINE,NEAREST,BILINEAR,BICUBIC,ANTIALIAS
52
53 from numpy import array,dot,sqrt
54 from numpy.linalg import inv,solve,lstsq
55 from scipy.ndimage import affine_transform
56 import random
57
58 import pyvision
59 import pyvision as pv
60 import numpy as np
61 import cv
62 import cv2
63
64 from pyvision.types.img import Image, TYPE_PIL, TYPE_MATRIX_2D, TYPE_MATRIX_RGB, TYPE_OPENCV, TYPE_OPENCV2, TYPE_OPENCV2BW
65 from pyvision.types.Point import Point
66 from pyvision.types.Rect import Rect
67 from pyvision.vector.RANSAC import RANSAC,LMeDs
68
69
71 '''
72 Create a transform that centers a set of points_b such that there mean is (0,0)
73 and then scale such that there average distance from (0,0) is 1.0
74
75 @param points_b: list of link.Point to normalize
76 @returns: an AffineTransform object
77 '''
78
79 mean = Point(0,0)
80 count = 0
81 for point in points_b:
82 mean += point
83 count += 1
84 mean = (1.0/count)*mean
85
86
87 center = AffineTranslate(-mean.X(),-mean.Y(),(0,0))
88 points_b = center.transformPoints(points_b)
89
90
91 mean_dist = 0.0
92 count = 0
93 for point in points_b:
94 x,y = point.X(),point.Y()
95 dist = sqrt(x*x+y*y)
96 mean_dist += dist
97 count += 1
98 mean_dist = (1.0/count)*mean_dist
99
100
101 scale = AffineScale(1.0/mean_dist,(0,0))
102 points_b = scale.transformPoints(points_b)
103
104
105 norm = scale*center
106
107 return norm
108
109
110
112 '''
113 Create a simple translation transform
114
115 @param dx: translation in the x direction
116 @param dy: translation in the y direction
117 @param new_size: new size for the image
118 @param interpolate: PIL interpolate to use
119 '''
120 matrix = array([[1,0,dx],[0,1,dy],[0,0,1]],'d')
121
122 return AffineTransform(matrix,new_size,interpolate)
123
124
125 -def AffineScale(scale,new_size,center=None,interpolate=BILINEAR):
126 '''
127 Create a simple scale transform.
128
129 @param scale: the amount to scale the image.
130 @param new_size: new size for the image.
131 @param interpolate: PIL interpolate to use.
132 '''
133 matrix = array([[scale,0,0],[0,scale,0],[0,0,1]],'d')
134
135 scale = AffineTransform(matrix,new_size,interpolate)
136 if center == None:
137 return scale
138 else:
139 return AffineTranslate(center.X(),center.Y(),new_size)*scale*AffineTranslate(-center.X(),-center.Y(),new_size)
140
141
154
155
156 -def AffineRotate(theta,new_size,center=None,interpolate=BILINEAR):
157 '''
158 Create a rotation about the origin.
159
160 @param theta: the angle to rotate the image in radians.
161 @param new_size: new size for the image.
162 @param interpolate: PIL interpolate to use.
163 '''
164 matrix = array([[math.cos(theta),-math.sin(theta),0],[math.sin(theta),math.cos(theta),0],[0,0,1]],'d')
165
166 rotate = AffineTransform(matrix,new_size,interpolate)
167 if center == None:
168 return rotate
169 else:
170 return AffineTranslate(center.X(),center.Y(),new_size)*rotate*AffineTranslate(-center.X(),-center.Y(),new_size)
171
173 '''
174 Create a transform from a source rectangle to a new image. This basically
175 crops a rectangle out of the image and rescales it to the new size.
176
177 @param rect: the source link.Rect.
178 @param new_size: new size for the image.
179 @param interpolate: PIL interpolate to use.
180 '''
181 w,h = new_size
182
183 x_scale = float(w)/rect.w
184 y_scale = float(h)/rect.h
185 x_trans = -rect.x*x_scale
186 y_trans = -rect.y*y_scale
187 matrix = array([[x_scale,0,x_trans],[0,y_scale,y_trans],[0,0,1]],'d')
188
189 return AffineTransform(matrix,new_size,interpolate)
190
191
193 '''
194 Extract an image tile centered on a point.
195
196 @param center: the center link.Point of the tile.
197 @param new_size: new size for the image.
198 @param interpolate: PIL interpolate to use.
199 '''
200 w,h = new_size
201 rect = Rect(center.X()-w/2,center.Y()-h/2,w,h)
202
203 x_scale = float(w)/rect.w
204 y_scale = float(h)/rect.h
205 x_trans = -rect.x*x_scale
206 y_trans = -rect.y*y_scale
207 matrix = array([[x_scale,0,x_trans],[0,y_scale,y_trans],[0,0,1]],'d')
208
209 return AffineTransform(matrix,new_size,interpolate)
210
211
213 '''
214 An affine transform that will rotate, translate, and scale to map one
215 set of points_b to the other. For example, to align eye coordinates in face images.
216
217 Find a transform (a,b,tx,ty) such that it maps the source points_b to the
218 destination points_b::
219
220 a*x1-b*y1+tx = x2
221 b*x1+a*y1+ty = y2
222
223 The mapping between the two points_b creates a set of four linear equations
224 with four unknowns. This set of equations is solved to find the transform.
225
226 @param src1: the first link.Point in the source image.
227 @param src2: the second link.Point in the source image.
228 @param dst1: the first link.Point in the destination image.
229 @param dst2: the second link.Point in the destination image.
230 @param new_size: new size for the image.
231 @param interpolate: PIL interpolate to use.
232 '''
233
234
235 A = [[src1.X(),-src1.Y(),1,0],
236 [src1.Y(),src1.X(),0,1],
237 [src2.X(),-src2.Y(),1,0],
238 [src2.Y(),src2.X(),0,1]]
239 b = [dst1.X(),dst1.Y(),dst2.X(),dst2.Y()]
240 A = array(A)
241 b = array(b)
242 result = solve(A,b)
243
244 a,b,tx,ty = result
245
246 matrix = array([[a,-b,tx],[b,a,ty],[0,0,1]],'d')
247
248 return AffineTransform(matrix,new_size,interpolate)
249
250
251
252
254 '''
255 An affine transform that will rotate, translate, and scale to map one
256 set of points_b to the other. For example, to align eye coordinates in face images.
257
258 Find a transform (a,b,tx,ty) such that it maps the source points_b to the
259 destination points_b::
260
261 a*x1+(-b+c)*y1+tx = x2
262 (b+d)*x1+a*y1+ty = y2
263
264 This method minimizes the squared error to find an optimal fit between the
265 points_b.
266
267 @param src: a list of link.Points in the source image.
268 @param dst: a list of link.Points in the destination image.
269 @param new_size: new size for the image.
270 @param interpolate: PIL interpolate to use.
271 '''
272 if normalize:
273
274 src_norm = AffineNormalizePoints(src)
275 src = src_norm.transformPoints(src)
276 dst_norm = AffineNormalizePoints(dst)
277 dst = dst_norm.transformPoints(dst)
278
279
280 A = []
281 b = []
282 for i in range(len(src)):
283 A.append([src[i].X(),-src[i].Y(),1,0])
284 A.append([src[i].Y(), src[i].X(),0,1])
285 b.append(dst[i].X())
286 b.append(dst[i].Y())
287
288 A = array(A,dtype=np.float64)
289 b = array(b,dtype=np.float64)
290
291 result,_,_,_ = lstsq(A,b)
292
293 a,b,tx,ty = result
294
295 matrix = array([[a,-b,tx],[b,a,ty],[0,0,1]],'d')
296
297 if normalize:
298 matrix = dot(dst_norm.inverse,dot(matrix,src_norm.matrix))
299
300 return AffineTransform(matrix,new_size,interpolate)
301
302
304 '''
305 An affine transform that will rotate, translate, and scale to map one
306 set of points_b to the other. For example, to align eye coordinates in face images.
307
308 Find a transform (a,b,tx,ty) such that it maps the source points_b to the
309 destination points_b::
310
311 a*x1-b*y1+tx = x2
312 b*x1+a*y1+ty = y2
313
314 This method minimizes the squared error to find an optimal fit between the
315 points_b. Instead of a LS solver the RANSAC solver is used to
316 produce a transformation that is robust to outliers.
317
318 @param src: a list of link.Points in the source image.
319 @param dst: a list of link.Points in the destination image.
320 @param new_size: new size for the image.
321 @param interpolate: PIL interpolate to use.
322 '''
323 if normalize:
324
325 src_norm = AffineNormalizePoints(src)
326 src = src_norm.transformPoints(src)
327 dst_norm = AffineNormalizePoints(dst)
328 dst = dst_norm.transformPoints(dst)
329
330
331 A = []
332 b = []
333 for i in range(len(src)):
334 A.append([src[i].X(),-src[i].Y(),1,0])
335 A.append([src[i].Y(), src[i].X(),0,1])
336 b.append(dst[i].X())
337 b.append(dst[i].Y())
338
339 A = array(A)
340 b = array(b)
341
342 result = RANSAC(A,b,tol=tol,group=2)
343
344
345
346 a,b,tx,ty = result
347
348 matrix = array([[a,-b,tx],[b,a,ty],[0,0,1]],'d')
349
350 if normalize:
351 matrix = dot(dst_norm.inverse,dot(matrix,src_norm.matrix))
352
353 return AffineTransform(matrix,new_size,interpolate)
354
355
357 '''
358 An affine transform that will rotate, translate, and scale to map one
359 set of points_b to the other. For example, to align eye coordinates in face images.
360
361 Find a transform (a,b,tx,ty) such that it maps the source points_b to the
362 destination points_b::
363
364 a*x1-b*y1+tx = x2
365 b*x1+a*y1+ty = y2
366
367 This method minimizes the squared error to find an optimal fit between the
368 points_b. Instead of a LS solver the RANSAC solver is used to
369 produce a transformation that is robust to outliers.
370
371 @param src: a list of link.Points in the source image.
372 @param dst: a list of link.Points in the destination image.
373 @param new_size: new size for the image.
374 @param interpolate: PIL interpolate to use.
375 '''
376 if normalize:
377
378 src_norm = AffineNormalizePoints(src)
379 src = src_norm.transformPoints(src)
380 dst_norm = AffineNormalizePoints(dst)
381 dst = dst_norm.transformPoints(dst)
382
383
384 A = []
385 b = []
386 for i in range(len(src)):
387 A.append([src[i].X(),-src[i].Y(),1,0])
388 A.append([src[i].Y(), src[i].X(),0,1])
389 b.append(dst[i].X())
390 b.append(dst[i].Y())
391
392 A = array(A)
393 b = array(b)
394
395 result = LMeDs(A,b)
396
397
398
399 a,b,tx,ty = result
400
401 matrix = array([[a,-b,tx],[b,a,ty],[0,0,1]],'d')
402
403 if normalize:
404 matrix = dot(dst_norm.inverse,dot(matrix,src_norm.matrix))
405
406 return AffineTransform(matrix,new_size,interpolate)
407
408
409 -def AffinePerturb(Dscale, Drotate, Dtranslate, new_size, mirror=False, flip=False, rng = None):
410 '''
411 Generates an link.AffineTrasform that slightly perturbs the image. Primarily
412 to generate more training images.
413
414 The perturbations include small scale, rotation, and translations. The
415 transform can also mirror the image in the left/right direction or flip the
416 top and bottom as other ways to generate synthetic training images.
417
418 @param Dscale: the difference in scale [1.0+Dscale, 1.0-Dscale].
419 @param Drotate: the range of difference in rotation [-Drotate,+Drotate] .
420 @param Dtranslate: the range of difference in translation [-Dtranslate,+Dtranslate] .
421 @param new_size: new size for the image.
422 @param mirror: Include mirror perturbations.
423 @param flip: Include flipped perturbations
424 '''
425 tile_size = new_size
426 w,h = tile_size
427 if rng == None:
428 rng = random
429
430 tx = rng.uniform(-Dtranslate,Dtranslate)
431 ty = rng.uniform(-Dtranslate,Dtranslate)
432 if mirror:
433 sx = rng.choice([-1.,1.])
434 else:
435 sx = 1.0
436 if flip:
437 sy = rng.choice([-1.,1.])
438 else:
439 sy = 1.0
440 s = rng.uniform(1-Dscale,1+Dscale)
441 r = rng.uniform(-Drotate,Drotate)
442
443 there = AffineTranslate(-w/2,-h/2,tile_size)
444 flipflop = AffineNonUniformScale(sx,sy,tile_size)
445 scale = AffineScale(s,tile_size)
446 rotate = AffineRotate(r,tile_size)
447 translate = AffineTranslate(tx,ty,tile_size)
448 back = AffineTranslate(w/2,h/2,tile_size)
449 affine = back*translate*rotate*scale*flipflop*there
450
451 return affine
452
453
681
682
683
685
687 fname = os.path.join(pyvision.__path__[0],'data','nonface','NONFACE_13.jpg')
688 self.test_image = Image(fname)
689
690
692 transform = AffineRotate(3.14/8,(640,480))
693 _ = transform.transformImage(self.test_image)
694
695
696 pt = transform.transformPoint(Point(320,240))
697 self.assertAlmostEqual(pt.X(),203.86594448424472)
698 self.assertAlmostEqual(pt.Y(),344.14920700118842)
699
700 pt = transform.invertPoint(Point(320,240))
701 self.assertAlmostEqual(pt.X(),387.46570317672939)
702 self.assertAlmostEqual(pt.Y(),99.349528744542198)
703
705 transform = AffineScale(1.5,(640,480))
706 _ = transform.transformImage(self.test_image)
707
708
709 pt = transform.transformPoint(Point(320,240))
710 self.assertAlmostEqual(pt.X(),480.)
711 self.assertAlmostEqual(pt.Y(),360.)
712
713 pt = transform.invertPoint(Point(320,240))
714 self.assertAlmostEqual(pt.X(),213.33333333333331)
715 self.assertAlmostEqual(pt.Y(),160.)
716
718 transform = AffineTranslate(10.,15.,(640,480))
719 _ = transform.transformImage(self.test_image)
720
721
722 pt = transform.transformPoint(Point(320,240))
723 self.assertAlmostEqual(pt.X(),330.)
724 self.assertAlmostEqual(pt.Y(),255.)
725
726 pt = transform.invertPoint(Point(320,240))
727 self.assertAlmostEqual(pt.X(),310.)
728 self.assertAlmostEqual(pt.Y(),225.)
729
731
732 transform = AffineFromRect(Rect(100,100,300,300),(100,100))
733 _ = transform.transformImage(self.test_image)
734
735
736 pt = transform.transformPoint(Point(320,240))
737 self.assertAlmostEqual(pt.X(),73.333333333333329)
738 self.assertAlmostEqual(pt.Y(),46.666666666666671)
739
740 pt = transform.invertPoint(Point(50.,50.))
741 self.assertAlmostEqual(pt.X(),250.0)
742 self.assertAlmostEqual(pt.Y(),250.0)
743
747
751
755
759
775
776
792
793
809
810
826
827
831
833 fname = os.path.join(pyvision.__path__[0],'data','nonface','NONFACE_13.jpg')
834 im_a = Image(fname)
835 ref = weakref.ref(im_a)
836
837 self.assertEquals(ref(), im_a)
838
839 tmp = im_a
840 del im_a
841
842 self.assertEquals(ref(), tmp)
843
844 del tmp
845
846 self.assertEquals(ref(), None)
847
848
850 fname = os.path.join(pyvision.__path__[0],'data','nonface','NONFACE_13.jpg')
851 im_a = Image(fname)
852
853 w,h = im_a.size
854
855
856 tmp1 = AffineScale(0.1,(w/10,h/10)).transformImage(im_a)
857
858
859 tmp2 = AffineScale(10.0,(w,h)).transformImage(tmp1,use_orig=False)
860 tmp2.annotateLabel(pv.Point(10,10), "This image should be blurry.")
861
862
863 tmp3 = AffineScale(10.0,(w,h)).transformImage(tmp1,use_orig=True)
864 tmp3.annotateLabel(pv.Point(10,10), "This image should be sharp.")
865
866
867 del im_a
868
869 tmp4 = AffineScale(10.0,(w,h)).transformImage(tmp1,use_orig=True)
870 tmp4.annotateLabel(pv.Point(10,10), "This image should be blurry.")
871
872
874 fname = os.path.join(pv.__path__[0],'data','nonface','NONFACE_13.jpg')
875 torig = tprev = im_a = Image(fname)
876
877 w,h = im_a.size
878
879
880 aff = AffineScale(0.5,(w/2,h/2))
881 accu = aff
882 torig = aff.transformImage(torig)
883 tprev = aff.transformImage(tprev,use_orig=False)
884 taccu = accu.transformImage(im_a)
885
886 torig.annotateLabel(pv.Point(10,10), "use_orig = True")
887 tprev.annotateLabel(pv.Point(10,10), "use_orig = False")
888 taccu.annotateLabel(pv.Point(10,10), "accumulated")
889
890
891
892
893
894
895 aff = AffineTranslate(20,20,(w/2,h/2))
896 accu = aff*accu
897 torig = aff.transformImage(torig)
898 tprev = aff.transformImage(tprev,use_orig=False)
899 taccu = accu.transformImage(im_a)
900
901 torig.annotateLabel(pv.Point(10,10), "use_orig = True")
902 tprev.annotateLabel(pv.Point(10,10), "use_orig = False")
903 taccu.annotateLabel(pv.Point(10,10), "accumulated")
904
905
906
907
908
909
910
911 aff = AffineRotate(np.pi/4,(w/2,h/2))
912 accu = aff*accu
913 torig = aff.transformImage(torig)
914 tprev = aff.transformImage(tprev,use_orig=False)
915 taccu = accu.transformImage(im_a)
916
917 torig.annotateLabel(pv.Point(10,10), "use_orig = True")
918 tprev.annotateLabel(pv.Point(10,10), "use_orig = False")
919 taccu.annotateLabel(pv.Point(10,10), "accumulated")
920
921
922
923
924
925
926
927
928 aff = AffineTranslate(100,-10,(w/2,h/2))
929 accu = aff*accu
930 torig = aff.transformImage(torig)
931 tprev = aff.transformImage(tprev,use_orig=False)
932 taccu = accu.transformImage(im_a)
933
934 torig.annotateLabel(pv.Point(10,10), "use_orig = True")
935 tprev.annotateLabel(pv.Point(10,10), "use_orig = False")
936 taccu.annotateLabel(pv.Point(10,10), "accumulated")
937
938
939
940
941
942
943
944 aff = AffineScale(2.0,(w,h))
945 accu = aff*accu
946 torig = aff.transformImage(torig)
947 tprev = aff.transformImage(tprev,use_orig=False)
948 taccu = accu.transformImage(im_a)
949
950 torig.annotateLabel(pv.Point(10,10), "use_orig = True")
951 tprev.annotateLabel(pv.Point(10,10), "use_orig = False")
952 taccu.annotateLabel(pv.Point(10,10), "accumulated")
953
954
955
956
957