pyvision.ml.regression

1 ''' 2 Created on Apr 27, 2012 3 4 @author: bolme 5 ''' 6 7 import numpy as np 8 import copy 9 import scipy as sp 10

11 -def logit_approx(p):

12 p = copy.deepcopy(p) 13 14 sel = p < 0.01 15 p[sel] = 0.01 16 sel = p > 0.99 17 p[sel] = 0.99 18 19 return np.log(p/(1-p))

20 21

22 -class pdf2nll(object):

23 ''' This is a wraper class that converts a pdf to a negitive log likelihood function. ''' 24

25 - def __init__(self,pdf):

26 self.pdf = pdf

27

28 - def __call__(self,params,obs):

29 result = 0 30 for x in obs: 31 result += np.log(self.pdf(params,x)) 32 return -result

33 34 35

36 -def maxLikelihoodEstimate(obs,params,pdf=None,nll=None):

37 ''' 38 Produces a maximum likelihood estimate of the parameters at least one pdf or nll needs to be specified. 39 40 @param obs: a list of numerical values that are the observations. 41 @type obs: list of numbers 42 @param params: a list or numpy array containing estimates of the parameters. 43 @type params: list of numbers 44 @param pdf: a function returning the probability density for a distribution. 45 @type pdf: pdf(params,x) -> probablity density 46 @param nll: a function returning the negitive log likelihood given the parameters and observations as arguments. 47 @type nll: nll(params,obs) -> negitive log likelihood. 48 ''' 49 assert nll == None or pdf == None 50 assert nll != None or pdf != None 51 52 if nll == None and pdf != None: 53 nll = pdf2nll(pdf) 54 return sp.optimize.fmin(lambda params: nll(params,obs),params,disp=0,xtol=1.0e-7) 55 elif nll != None and pdf == None: 56 return sp.optimize.fmin(lambda params: nll(params,obs),params,disp=0,xtol=1.0e-7) 57 else: 58 raise ValueError("Must specify one of pdf or nll.")

59 60 61

62 -class LogisticRegression(object):

63 ''' 64 This object implements a logistic regression model. 65 66 For the house-votes-84.data.txt data set this learned a model that was consistant with R. 67 ''' 68

69 - def __init__(self):

70 ''' 71 Create and initialize the model. 72 ''' 73 self.params = None

74

75 - def train(self,obs,data,method='ml'):

76 ''' 77 Train a logistic regression model. 78 @param obs: these are the observations. 79 @type obs: float in [0.0:1.0] 80 @param data: floating point matrix of data 81 @type data: list of data points in rows 82 @param method: determines the optimization criteria. 83 @type method: can be 'ml' (maximum likelihood) or 'fast' (least sqaures estimate) 84 ''' 85 # Convert data to arrays 86 data = np.array(data,dtype=np.float64) 87 obs = np.array(obs,dtype=np.float64) 88 89 # Set up approximate fitting 90 N,D = data.shape 91 r = logit_approx(obs) 92 93 # add column for intercept 94 data = np.concatenate((data,np.ones((N,1),dtype=np.float64)),axis=1) 95 96 # get an estimate least squares fit 97 x,_,_,_ = np.linalg.lstsq(data, r) 98 x.shape = (D+1,1) 99 100 if method == 'fast': 101 self.params = x 102 103 elif method == 'ml': 104 # The negative log likelihood cost function 105 def logitNll(params,obs): 106 # project the data 107 vals = np.dot(data,params) 108 # compute the probablities 109 vals = 1.0/(1.0 + np.exp(-vals)) 110 # Compute the negitive log likelihood 111 return -(obs*np.log(vals) + (1-obs)*np.log(1-vals)).sum()

112 113 # Find the optimal max likelihood estimate 114 result = maxLikelihoodEstimate(obs, x.flatten(), nll=logitNll) 115 116 # Save the parameters 117 self.params = np.array(result) 118 self.params.shape = (D+1,1) 119 120 else: 121 raise ValueError("Unknown method type '%s'"%(method,))

122

123 - def predict(self,data):

124 ''' 125 Predict the results for a dataset. 126 127 @param data: floating point matrix of data 128 @type data: list of data points in rows 129 130 @returns: a probibility for each data point. 131 ''' 132 # Convert data to arrays 133 D = self.params.shape[0]-1 134 data = np.array(data,dtype=np.float64) 135 if len(data.shape) == 1: 136 data.shape = (1,D) 137 138 assert data.shape[1] == self.params.shape[0]-1 139 N = data.shape[0] 140 141 # add column for intercept 142 data = np.concatenate((data,np.ones((N,1),dtype=np.float64)),axis=1) 143 144 # project the data 145 vals = np.dot(data,self.params) 146 147 # compute the probablities 148 vals = 1.0/(1.0 + np.exp(-vals)) 149 150 return vals.flatten()

151

Source Code for Module pyvision.ml.regression