contrib/rpl/rrel/rrel_linear_regression.cxx
Go to the documentation of this file.
00001 #include "rrel_linear_regression.h"
00002 
00003 #include <vnl/vnl_matrix.h>
00004 #include <vnl/vnl_vector.h>
00005 #include <vnl/algo/vnl_svd.h>
00006 
00007 #include <vcl_iostream.h>
00008 #include <vcl_vector.h>
00009 #include <vcl_cassert.h>
00010 
00011 rrel_linear_regression::rrel_linear_regression( const vcl_vector< vnl_vector<double> >& pts,
00012                                                 bool use_intercept )
00013   : rand_vars_( pts.size() ),
00014     ind_vars_( pts.size() )
00015 {
00016   unsigned int num_pts = pts.size();
00017   unsigned int vect_size = pts[0].size();
00018   for ( unsigned int i=0; i<num_pts; ++i ) {
00019     rand_vars_[i] = pts[i][ vect_size-1 ];
00020   }
00021 
00022   //
00023   //  If there is to be an intercept parameter, the first value in each
00024   //  independent variable vector will be a 1.0 and the remaining values
00025   //  are taken from locations 0 .. vect_size-2 of each point.
00026   //
00027   if ( use_intercept ) {
00028     set_param_dof( vect_size );
00029     for ( unsigned int i=0; i<num_pts; ++i ) {
00030       ind_vars_[i] = vnl_vector<double>(param_dof());
00031       ind_vars_[i][0] = 1.0;
00032       for ( unsigned int j=1; j<vect_size; ++j )
00033         ind_vars_[i][j] = pts[i][j-1];
00034     }
00035   }
00036 
00037   //
00038   //  Otherwise, with no intercept parameter, the independent variable
00039   //  vectors are formed from locations 0 .. vect_size-2 of each point.
00040   //
00041   else {
00042     set_param_dof( vect_size-1 );
00043     for ( unsigned int i=0; i<num_pts; ++i ) {
00044       ind_vars_[i] = vnl_vector<double>(param_dof());
00045       for ( unsigned int j=0; j<vect_size-1; ++j )
00046         ind_vars_[i][j] = pts[i][j];
00047     }
00048   }
00049   if ( param_dof() > num_pts ) {
00050     vcl_cerr << "\nrrel_linear_regression::rrel_linear_regression  WARNING:  DoF is greater than\n"
00051              << "the number of data points.  An infinite set of equally valid\n"
00052              << "solutions exists.\n";
00053   }
00054   set_num_samples_for_fit( param_dof() );
00055 }
00056 
00057 // ctor that just copies the independent and dependent variables vectors.
00058 rrel_linear_regression::rrel_linear_regression( const vcl_vector< vnl_vector<double> >& ind_vars,
00059                                                 const vcl_vector< double >& dep_vars )
00060   : rand_vars_(dep_vars), ind_vars_(ind_vars)
00061 {
00062   set_param_dof( ind_vars_[0].size() );
00063   if ( param_dof() > ind_vars.size() ) {
00064     vcl_cerr << "rrel_linear_regression::rrel_linear_regression  WARNING:  DoF is greater than\n"
00065              << "the number of data points.  An infinite set of solutions exists.\n";
00066   }
00067   set_num_samples_for_fit( param_dof() );
00068 }
00069 
00070 
00071 rrel_linear_regression::~rrel_linear_regression()
00072 {
00073 }
00074 
00075 unsigned int
00076 rrel_linear_regression::num_samples( ) const
00077 {
00078   return rand_vars_.size();
00079 }
00080 
00081 
00082 //  The equation to be solved is A p = b, where A is a dof_ x dof_
00083 //  array formed from the independent variables and b is dof_ x 1 and
00084 //  formed from the dependent variables.  If A is not full-rank, false
00085 //  is returned.  Otherwise, params = A^{-1} b and true is returned.
00086 //
00087 bool
00088 rrel_linear_regression::fit_from_minimal_set( const vcl_vector<int>& point_indices,
00089                                               vnl_vector<double>& params ) const
00090 {
00091   if ( point_indices.size() != param_dof() ) {
00092     vcl_cerr << "rrel_linear_regression::fit_from_minimal_sample  The number of point "
00093              << "indices must agree with the fit degrees of freedom.\n";
00094     return false;
00095   }
00096   vnl_matrix<double> A(param_dof(), param_dof());
00097   vnl_vector<double> b(param_dof());
00098   for ( unsigned int i=0; i<param_dof(); ++i ) {
00099     int index = point_indices[i];
00100     for ( unsigned int j=0; j<param_dof(); ++j ) {
00101       A(j,i) = ind_vars_[index][j];
00102     }
00103     b[i] = rand_vars_[index];
00104   }
00105 
00106   vnl_svd<double> svd( A, 1.0e-8 );
00107   if ( (unsigned int)svd.rank() < param_dof() ) {
00108     return false;    // singular fit --- no error message needed
00109   }
00110   else {
00111     params = vnl_vector<double>( b * svd.inverse() );
00112     return true;
00113   }
00114 }
00115 
00116 
00117 void
00118 rrel_linear_regression::compute_residuals( const vnl_vector<double>& params,
00119                                            vcl_vector<double>& residuals ) const
00120 {
00121   assert( residuals.size() == rand_vars_.size() );
00122 
00123   for ( unsigned int i=0; i<rand_vars_.size(); ++i ) {
00124     residuals[i] = rand_vars_[i] - dot_product( params, ind_vars_[i] );
00125   }
00126 }
00127 
00128 
00129 bool
00130 rrel_linear_regression::weighted_least_squares_fit( vnl_vector<double>& params,
00131                                                     vnl_matrix<double>& norm_covar,
00132                                                     const vcl_vector<double>* weights ) const
00133 {
00134   // If params and cofact are NULL pointers and the fit is successful,
00135   // this function will allocate a new vector and a new
00136   // matrix. Otherwise, it assumes that *params is a 1 x param_dof() vector
00137   // and that cofact is param_dof() x param_dof() matrix.
00138 
00139   assert( !weights || weights->size() == rand_vars_.size() );
00140 
00141   vnl_matrix<double> sumProds(param_dof(), param_dof(), 0.0);
00142   vnl_matrix<double> sumDists(param_dof(), 1, 0.0);
00143 
00144   //  Aside:  this probably would be faster if I used iterators...
00145 
00146   if (weights) {
00147     for ( unsigned int i=0; i<rand_vars_.size(); ++i ) {
00148       for ( unsigned int j=0; j<param_dof(); ++j ) {
00149         for ( unsigned int k=j; k<param_dof(); k++ )
00150           sumProds(j,k) += ind_vars_[i][j] * ind_vars_[i][k] * (*weights)[i];
00151         sumDists(j,0) += ind_vars_[i][j] * rand_vars_[i] * (*weights)[i];
00152       }
00153     }
00154   }
00155   else {
00156     for ( unsigned int i=0; i<rand_vars_.size(); ++i ) {
00157       for ( unsigned int j=0; j<param_dof(); ++j ) {
00158         for ( unsigned int k=j; k<param_dof(); k++ )
00159           sumProds(j,k) += ind_vars_[i][j] * ind_vars_[i][k];
00160         sumDists(j,0) += ind_vars_[i][j] * rand_vars_[i];
00161       }
00162     }
00163   }
00164 
00165   for ( unsigned int j=1; j<param_dof(); j++ )
00166     for ( unsigned int k=0; k<j; k++ )
00167       sumProds(j,k) = sumProds(k,j);
00168 
00169   vnl_svd<double> svd( sumProds, 1.0e-8 );
00170   if ( (unsigned int)svd.rank() < param_dof() ) {
00171     vcl_cerr << "rrel_linear_regression::WeightedLeastSquaresFit --- singularity!\n";
00172     return false;
00173   }
00174   else {
00175     vnl_matrix<double> sumP_inv( svd.inverse() );
00176     vnl_matrix<double> int_result = sumP_inv * sumDists;
00177     params = int_result.get_column(0);
00178     norm_covar = sumP_inv;
00179     return true;
00180   }
00181 }
00182 
00183 
00184 void
00185 rrel_linear_regression::print_points() const
00186 {
00187   vcl_cout << "\nrrel_linear_regression::print_points:\n"
00188            << "  param_dof() = " << param_dof() << '\n'
00189            << "  num_pts = " << rand_vars_.size() << "\n\n"
00190            << " i   rand_vars_   ind_vars_\n"
00191            << " =   ==========   =========\n";
00192   for ( unsigned int i=0; i<rand_vars_.size(); ++i ) {
00193     vcl_cout << ' ' << i << "   " << rand_vars_[i] << "    " << ind_vars_[i] << '\n';
00194   }
00195 }