1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33 '''
34 Created on Jan 14, 2011
35
36 @author: bolme
37
38 Convert a simple sigset into a comma sep value file. Does not currently support complex sigests.
39 '''
40 import pyvision as pv
41
42 import optparse
43
44
45
46 import os.path
47
48
49 IMAGE_EXTENSIONS = set(['.TIF','.TIFF','.JPG','.JPEG','.PNG','.GIF','.PPM','.PGM','.BMP'])
50
51
52
54 '''
55 This section parses the basename without file extensions from the path.
56
57 @returns basename from /path/to/file/basename.eee
58 '''
59 if path == None:
60 return None
61
62 _,filename = os.path.split(path)
63
64 basename = filename.split('.')[0]
65
66 return basename
67
68
69
70
72 '''
73 This function scans the image directories for the files listed in the
74 sigset. If using the reduced image format, the images should be named
75 using the recording ID with a standard image extension such as "jpg".
76 Otherwise, the directories are scanned for images matching the basename
77 of the filename. This means that the paths in the sigset do not have
78 to be specified accuratly but problems do arise if multiple copies
79 of an image are in the image directory.
80 '''
81 image_map = {}
82 file_map = {}
83 rec_map = {}
84
85
86 for each in sigset:
87 rec_id = each[1][0]['name']
88 basename = parseBasename(each[1][0]['file-name'])
89 image_map[rec_id] = None
90 file_map[basename] = rec_id
91 rec_map[rec_id] = basename
92
93
94 n_images = 0
95
96 for rootdir,_,files in os.walk(imdir):
97 for filename in files:
98 basename,ext = os.path.splitext(filename)
99 if ext.upper() not in IMAGE_EXTENSIONS:
100 continue
101 print filename
102 if file_map.has_key(basename) and image_map[file_map[basename]] == None:
103 image_map[file_map[basename]] = os.path.join(rootdir,filename)
104 n_images += 1
105 elif file_map.has_key(basename) and image_map[file_map[basename]] != None:
106 raise ValueError("Multiple images found matching recording id %s:\n First instance: %s\n Second instance: %s"%(file_map[basename],image_map[file_map[basename]],os.path.join(rootdir,filename)))
107 if rec_map.has_key(basename) and rec_map[file_map[basename]] == None:
108 image_map[file_map[basename]] = os.path.join(rootdir,filename)
109 n_images += 1
110 elif rec_map.has_key(basename) and image_map[file_map[basename]] != None:
111 raise ValueError("Multiple images found matching recording id %s:\n First instance: %s\n Second instance: %s"%(file_map[basename],image_map[file_map[basename]],os.path.join(rootdir,filename)))
112
113
114 if True: print "Found %d of %d images."%(n_images,len(image_map))
115
116 missing = []
117 found = []
118 for item in sigset:
119 rec_id = item[1][0]['name']
120 filename = image_map[rec_id]
121 if filename == None:
122 missing.append(item)
123 else:
124 found.append(item)
125
126
127 return found,missing
128
129
130
131
133 usage = "usage: %prog [options] <input.xml> <image_directory> <found.xml> [<missing.xml>]\nReads a sigset and removes any entries that cannot be associated with an image."
134
135 parser = optparse.OptionParser(usage)
136
137
138
139 (options, args) = parser.parse_args()
140
141 if len(args) not in [3,4]:
142 parser.error("This program requires at least three arguments: an input sigest, an image directory, and an output sigset.")
143
144 return options, args
145
146
147
148 if __name__ == '__main__':
149 options,args = parseOptions()
150
151 sigset = pv.parseSigSet(args[0])
152 imdir = args[1]
153 found,missing = locateFiles(sigset,imdir)
154 pv.saveSigset(found, args[2])
155 if len(args) >= 4:
156 pv.saveSigset(missing, args[3])
157