Package pyvision :: Package ml :: Module libsvm
[hide private]
[frames] | no frames]

Source Code for Module pyvision.ml.libsvm

  1  ''' 
  2  Created on Nov 23, 2010 
  3   
  4  @author: bolme 
  5  ''' 
  6   
  7  import svm 
  8  import numpy as np 
  9  import tempfile 
 10  import os 
 11   
12 -class UntrainedClassifierError(Exception):
13 pass
14 15
16 -class _LabelMap:
17 '''Converts class labels back and forth to integer codes.''' 18
19 - def __init__(self):
20 '''Create and initialze the mapping.''' 21 self._forward_map = {} 22 self._backward_map = {} 23 self._current_code = 0
24
25 - def toCode(self,label):
26 '''Converts a label to an integer code. Codes are defined as needed.''' 27 if not self._forward_map.has_key(label): 28 self._forward_map[label] = self._current_code 29 self._backward_map[self._current_code] = label 30 self._current_code += 1 31 32 return self._forward_map[label]
33
34 - def toLabel(self,code):
35 '''Converts a code back into a label''' 36 return self._backward_map[code]
37
38 -class _LabelScale:
39 '''Converts class labels back and forth to integer codes.''' 40
41 - def __init__(self):
42 '''Create and initialze the mapping.''' 43 self.mean = 0.0 44 self.std = 0.0
45
46 - def train(self,labels,data):
47 labels = np.array(labels) 48 49 self.mean = labels.mean() 50 self.std = labels.std() 51 52 labels = (labels - self.mean)/self.std 53 54 return labels,data
55
56 - def toScaled(self,label):
57 '''''' 58 return (label - self.mean)/self.std
59
60 - def toOrig(self,code):
61 '''''' 62 return code * self.std + self.mean
63 64
65 -class FeaturePreprocessor:
66 - def __init__(self):
67 pass
68
69 - def train(self,labels,data):
70 raise NotImplementedError()
71
72 - def __call__(self,vector):
73 raise NotImplementedError()
74
75 -class NoNorm(FeaturePreprocessor):
76
77 - def __init__(self):
78 pass
79
80 - def train(self,labels,data):
81 return labels,data
82
83 - def __call__(self,vector):
84 return vector
85 86
87 -class ZNormValues(FeaturePreprocessor):
88
89 - def __init__(self):
90 pass
91
92 - def train(self,labels,data):
93 self.means = data.mean(axis=0) 94 self.stds = data.std(axis=0) 95 96 n = len(self.means) 97 self.means.shape = (1,n) 98 self.stds.shape = (1,n) 99 100 # TODO: Need to correct for zero values in stds 101 # self.stds[np.abs(self.stds) < 1e-6] = 1.0 102 103 data = (data - self.means) / self.stds 104 105 self.means = self.means.flatten() 106 self.stds = self.stds.flatten() 107 108 return labels,data
109
110 - def __call__(self,vector):
111 #if not isinstance(vector,np.ndarray): 112 vector = np.array(vector) 113 vector = (vector - self.means) / self.stds 114 return vector
115
116 -class Classifier:
117 pass
118
119 -class Regression:
120 pass
121
122 -class SVC(Classifier):
123
124 - def __init__(self,C=1.0,gamma=1.0,preprocessor=ZNormValues()):
125 '''Create a support vector machine classifier.''' 126 127 self._model = None 128 129 assert isinstance(preprocessor,FeaturePreprocessor) 130 self._preprocessor = preprocessor 131 132 self._C = C 133 self._gamma = gamma 134 self._label_map = _LabelMap()
135 136
137 - def __getstate__(self):
138 '''This function is neccessary for pickling''' 139 # Translate everything but the svm because that cannot be simply pickled. 140 state = {} 141 for key,value in self.__dict__.iteritems(): 142 if key == '_model': 143 filename = tempfile.mktemp() 144 self._model.save(filename) 145 data_buffer = open(filename).read() 146 os.remove(filename) 147 state[key] = data_buffer 148 continue 149 150 state[key] = value 151 152 return state
153
154 - def __setstate__(self,state):
155 '''This function is neccessary for pickling''' 156 # Translate everything but the svm because that cannot be simply pickled. 157 for key,value in state.iteritems(): 158 if key == '_model': 159 filename = tempfile.mktemp() 160 open(filename,'w').write(value) 161 self._model = svm.svm_model(filename) 162 os.remove(filename) 163 continue 164 165 self.__dict__[key] = value
166 167
168 - def train(self,labels,data):
169 ''' 170 Train the classifier. 171 172 @param labels: A list of class labels. 173 @param data: A 2D array or list of feature vectors. One feature vector per row. 174 ''' 175 176 # Check the types and convert to np arrays 177 if isinstance(data,list) or isinstance(data,tuple): 178 data = np.array(data,dtype=np.double) 179 180 181 labels = [self._label_map.toCode(each) for each in labels] 182 labels = np.array(labels) 183 184 # Preprocess the data 185 labels,data = self._preprocessor.train(labels,data) 186 187 # Create the svm parameter data and problem description 188 param = svm.svm_parameter(svm_type=svm.C_SVC,kernel_type = svm.RBF, C = self._C, gamma=self._gamma) 189 prob = svm.svm_problem(labels.tolist(),data.tolist()) 190 191 # train the svm 192 self._model = svm.svm_model(prob, param)
193 194
195 - def __call__(self,vector):
196 '''Classify a feature vector.''' 197 198 if self._model == None: 199 raise UntrainedClassifierError() 200 201 # convert to an array 202 if isinstance(vector,list) or isinstance(vector,tuple): 203 vector = np.array(vector,dtype=np.double) 204 205 # preprocess the data 206 vector = self._preprocessor(vector) 207 208 # return the prediction 209 code = self._model.predict(vector.tolist()) 210 return self._label_map.toLabel(code)
211
212 -class SVR(Regression):
213
214 - def __init__(self,epsilon=0.01,gamma=1.0,preprocessor=ZNormValues()):
215 '''Create a support vector machine classifier.''' 216 217 self._model = None 218 219 assert isinstance(preprocessor,FeaturePreprocessor) 220 self._preprocessor = preprocessor 221 self._label_scale = _LabelScale() 222 223 self._epsilon = epsilon 224 self._gamma = gamma
225 226 227
228 - def __getstate__(self):
229 '''This function is neccessary for pickling''' 230 # Translate everything but the svm because that cannot be simply pickled. 231 state = {} 232 for key,value in self.__dict__.iteritems(): 233 if key == '_model': 234 filename = tempfile.mktemp() 235 self._model.save(filename) 236 data_buffer = open(filename).read() 237 os.remove(filename) 238 state[key] = data_buffer 239 continue 240 241 state[key] = value 242 243 return state
244
245 - def __setstate__(self,state):
246 '''This function is neccessary for pickling''' 247 # Translate everything but the svm because that cannot be simply pickled. 248 for key,value in state.iteritems(): 249 if key == '_model': 250 filename = tempfile.mktemp() 251 open(filename,'w').write(value) 252 self._model = svm.svm_model(filename) 253 os.remove(filename) 254 continue 255 256 self.__dict__[key] = value
257 258
259 - def train(self,labels,data):
260 ''' 261 Train the classifier. 262 263 @param labels: A list of class labels. 264 @param data: A 2D array or list of feature vectors. One feature vector per row. 265 ''' 266 267 # Check the types and convert to np arrays 268 if isinstance(data,list) or isinstance(data,tuple): 269 data = np.array(data,dtype=np.double) 270 271 272 labels = np.array(labels,dtype=np.double) 273 274 # Preprocess the data 275 labels,data = self._preprocessor.train(labels,data) 276 labels,data = self._label_scale.train(labels,data) 277 278 279 # Create the svm parameter data and problem description 280 param = svm.svm_parameter(svm_type=svm.EPSILON_SVR,kernel_type = svm.RBF, eps = self._epsilon, gamma=self._gamma) 281 prob = svm.svm_problem(labels.tolist(),data.tolist()) 282 283 # train the svm 284 self._model = svm.svm_model(prob, param)
285 286
287 - def __call__(self,vector):
288 '''Classify a feature vector.''' 289 290 if self._model == None: 291 raise UntrainedClassifierError() 292 293 # convert to an array 294 if isinstance(vector,list) or isinstance(vector,tuple): 295 vector = np.array(vector,dtype=np.double) 296 297 # preprocess the data 298 vector = self._preprocessor(vector) 299 300 # return the prediction 301 value = self._model.predict(vector.tolist()) 302 return self._label_scale.toOrig(value)
303