1 '''
2 Created on Apr 27, 2012
3
4 @author: bolme
5 '''
6
7 import numpy as np
8 import copy
9 import scipy as sp
10
12 p = copy.deepcopy(p)
13
14 sel = p < 0.01
15 p[sel] = 0.01
16 sel = p > 0.99
17 p[sel] = 0.99
18
19 return np.log(p/(1-p))
20
21
23 ''' This is a wraper class that converts a pdf to a negitive log likelihood function. '''
24
27
29 result = 0
30 for x in obs:
31 result += np.log(self.pdf(params,x))
32 return -result
33
34
35
37 '''
38 Produces a maximum likelihood estimate of the parameters at least one pdf or nll needs to be specified.
39
40 @param obs: a list of numerical values that are the observations.
41 @type obs: list of numbers
42 @param params: a list or numpy array containing estimates of the parameters.
43 @type params: list of numbers
44 @param pdf: a function returning the probability density for a distribution.
45 @type pdf: pdf(params,x) -> probablity density
46 @param nll: a function returning the negitive log likelihood given the parameters and observations as arguments.
47 @type nll: nll(params,obs) -> negitive log likelihood.
48 '''
49 assert nll == None or pdf == None
50 assert nll != None or pdf != None
51
52 if nll == None and pdf != None:
53 nll = pdf2nll(pdf)
54 return sp.optimize.fmin(lambda params: nll(params,obs),params,disp=0,xtol=1.0e-7)
55 elif nll != None and pdf == None:
56 return sp.optimize.fmin(lambda params: nll(params,obs),params,disp=0,xtol=1.0e-7)
57 else:
58 raise ValueError("Must specify one of pdf or nll.")
59
60
61
63 '''
64 This object implements a logistic regression model.
65
66 For the house-votes-84.data.txt data set this learned a model that was consistant with R.
67 '''
68
70 '''
71 Create and initialize the model.
72 '''
73 self.params = None
74
75 - def train(self,obs,data,method='ml'):
76 '''
77 Train a logistic regression model.
78 @param obs: these are the observations.
79 @type obs: float in [0.0:1.0]
80 @param data: floating point matrix of data
81 @type data: list of data points in rows
82 @param method: determines the optimization criteria.
83 @type method: can be 'ml' (maximum likelihood) or 'fast' (least sqaures estimate)
84 '''
85
86 data = np.array(data,dtype=np.float64)
87 obs = np.array(obs,dtype=np.float64)
88
89
90 N,D = data.shape
91 r = logit_approx(obs)
92
93
94 data = np.concatenate((data,np.ones((N,1),dtype=np.float64)),axis=1)
95
96
97 x,_,_,_ = np.linalg.lstsq(data, r)
98 x.shape = (D+1,1)
99
100 if method == 'fast':
101 self.params = x
102
103 elif method == 'ml':
104
105 def logitNll(params,obs):
106
107 vals = np.dot(data,params)
108
109 vals = 1.0/(1.0 + np.exp(-vals))
110
111 return -(obs*np.log(vals) + (1-obs)*np.log(1-vals)).sum()
112
113
114 result = maxLikelihoodEstimate(obs, x.flatten(), nll=logitNll)
115
116
117 self.params = np.array(result)
118 self.params.shape = (D+1,1)
119
120 else:
121 raise ValueError("Unknown method type '%s'"%(method,))
122
124 '''
125 Predict the results for a dataset.
126
127 @param data: floating point matrix of data
128 @type data: list of data points in rows
129
130 @returns: a probibility for each data point.
131 '''
132
133 D = self.params.shape[0]-1
134 data = np.array(data,dtype=np.float64)
135 if len(data.shape) == 1:
136 data.shape = (1,D)
137
138 assert data.shape[1] == self.params.shape[0]-1
139 N = data.shape[0]
140
141
142 data = np.concatenate((data,np.ones((N,1),dtype=np.float64)),axis=1)
143
144
145 vals = np.dot(data,self.params)
146
147
148 vals = 1.0/(1.0 + np.exp(-vals))
149
150 return vals.flatten()
151