contrib/mul/clsfy/clsfy_logit_loss_function.cxx Source File

Go to the documentation of this file.
00001 // This is mul/clsfy/clsfy_logit_loss_function.cxx
00002 //:
00003 // \file
00004 // \brief Loss function for logit of linear classifier
00005 // \author TFC
00006 
00007 #include "clsfy_logit_loss_function.h"
00008 #include <vcl_cassert.h>
00009 
00010 clsfy_logit_loss_function::clsfy_logit_loss_function(
00011             mbl_data_wrapper<vnl_vector<double> >& x,
00012             const vnl_vector<double> & c,
00013             double min_p, vnl_cost_function* reg_fn)
00014   : x_(x),c_(c),min_p_(min_p),reg_fn_(reg_fn)
00015 {
00016   x.reset();
00017   set_number_of_unknowns(x.current().size());
00018 }
00019 
00020 //: Assumes w.size()=x.size()+1. Computes (1 x')w
00021 inline double dot1(const vnl_vector<double>& w,
00022                    const vnl_vector<double>& x)
00023 {
00024   return w[0] + vnl_c_vector<double>::dot_product(w.begin()+1,
00025                                       x.begin(),
00026                                       x.size());
00027 }
00028 
00029 //:  The main function: Compute f(w)
00030 double clsfy_logit_loss_function::f(vnl_vector<double> const& v)
00031 {
00032   double sum=0;
00033   if (reg_fn_) sum=reg_fn_->f(v);
00034   x_.reset();
00035   double a=1.0/x_.size();
00036   for (unsigned i=0;i<x_.size();++i,x_.next())
00037   {
00038     double z = c_[i]*dot1(v,x_.current());
00039     sum -= a*vcl_log(min_p_+(1-min_p_)/(1+vcl_exp(-z)) );
00040   }
00041   return sum;
00042 }
00043 
00044   //:  Calculate the gradient of f at parameter vector v.
00045 void clsfy_logit_loss_function::gradf(vnl_vector<double> const& v, 
00046                      vnl_vector<double>& gradient)
00047 {
00048   x_.reset();
00049   unsigned n=x_.current().size();
00050 
00051   // Term from regulariser
00052   if (reg_fn_)
00053     reg_fn_->gradf(v,gradient);
00054   else
00055   {
00056     gradient.set_size(n+1);
00057     gradient.fill(0.0);
00058   }
00059 
00060   double a=1.0/x_.size();
00061   double *g1 = gradient.data_block()+1; // g[1] associated with x[0]
00062   assert(v.size()==n+1);
00063   for (unsigned i=0;i<x_.size();++i,x_.next())
00064   {
00065     double z = c_[i]*dot1(v,x_.current());
00066     double ez = vcl_exp(-1*z);
00067     double sz=1/(1+ez);
00068     double fi = min_p_+(1-min_p_)*sz;
00069     double k = a*(1-min_p_)*ez*sz*sz*c_[i]/fi;
00070     gradient[0]-=k;  // First element notionally 1.0
00071     const double* x = x_.current().data_block();
00072     for (unsigned j=0;j<n;++j) g1[j]-=k*x[j];
00073   }
00074 }
00075 
00076 void clsfy_logit_loss_function::compute(vnl_vector<double> const& v,
00077                double *f, vnl_vector<double>* gradient)
00078 {
00079   if (!gradient)
00080   {
00081     if (f) *f = this->f(v);
00082     return;
00083   }
00084 
00085   x_.reset();
00086   unsigned n=x_.current().size();
00087 
00088   // Term from regulariser
00089   if (reg_fn_)
00090     reg_fn_->gradf(v,*gradient);
00091   else
00092   {
00093     gradient->set_size(n+1);
00094     gradient->fill(0.0);
00095   }
00096 
00097 
00098   double a=1.0/x_.size();
00099   double *g1 = gradient->data_block()+1; // g[1] associated with x[0]
00100 
00101   double sum=0;
00102   if (reg_fn_) sum=reg_fn_->f(v);
00103 
00104   assert(v.size()==n+1);
00105   for (unsigned i=0;i<x_.size();++i,x_.next())
00106   {
00107     double z = c_[i]*dot1(v,x_.current());
00108     double ez = vcl_exp(-1*z);
00109     double sz=1/(1+ez);
00110     double fi = min_p_+(1-min_p_)*sz;
00111     sum -= a*vcl_log(fi);
00112     double k = a*(1-min_p_)*ez*sz*sz*c_[i]/fi;
00113 
00114     // Update gradient with k*x (extending x by 1 in first element)
00115     (*gradient)[0]-=k;  // First element notionally 1.0
00116     const double* x = x_.current().data_block();
00117     for (unsigned j=0;j<n;++j) g1[j]-=k*x[j];
00118   }
00119   if (f) *f = sum;
00120 }
00121 
00122 clsfy_quad_regulariser::clsfy_quad_regulariser(double alpha)
00123   : alpha_(alpha)
00124 {
00125 }
00126 
00127 //:  The main function: Compute f(v)
00128 double clsfy_quad_regulariser::f(vnl_vector<double> const& v)
00129 {
00130   double sum=0.0;
00131   for (unsigned i=1;i<v.size();++i) sum+=v[i]*v[i];
00132   return alpha_*sum;
00133 }
00134 
00135 //:  Calculate the gradient of f at parameter vector v.
00136 void clsfy_quad_regulariser::gradf(vnl_vector<double> const& v, 
00137                      vnl_vector<double>& gradient)
00138 {
00139   gradient = (2*alpha_)*v;
00140   gradient[0]=0.0;  // Function independent of first element
00141 }
00142 
00143