contrib/mul/vpdfl/vpdfl_pc_gaussian_builder.cxx
Go to the documentation of this file.
00001 // This is mul/vpdfl/vpdfl_pc_gaussian_builder.cxx
00002 #ifdef VCL_NEEDS_PRAGMA_INTERFACE
00003 #pragma implementation
00004 #endif
00005 //:
00006 // \file
00007 // \brief Interface for Multi-variate Principle Component gaussian PDF Builder.
00008 // \author Ian Scott
00009 // \date 21-Jul-2000
00010 //
00011 // Modifications
00012 // \verbatim
00013 // 23 April 2001 IMS - Ported to VXL
00014 // \endverbatim
00015 
00016 #include "vpdfl_pc_gaussian_builder.h"
00017 //
00018 #include <vcl_string.h>
00019 #include <vcl_sstream.h>
00020 #include <vcl_cassert.h>
00021 #include <vcl_cstdlib.h> // for vcl_abort()
00022 #include <mbl/mbl_data_wrapper.h>
00023 #include <vnl/vnl_math.h>
00024 #include <vnl/vnl_c_vector.h>
00025 #include <vnl/algo/vnl_symmetric_eigensystem.h>
00026 #include <vpdfl/vpdfl_gaussian_builder.h>
00027 #include <vpdfl/vpdfl_pdf_base.h>
00028 #include <vpdfl/vpdfl_pc_gaussian.h>
00029 #include <mbl/mbl_parse_block.h>
00030 #include <mbl/mbl_read_props.h>
00031 #include <mbl/mbl_exception.h>
00032 #include <vul/vul_string.h>
00033 
00034 //=======================================================================
00035 
00036 vpdfl_pc_gaussian_builder::vpdfl_pc_gaussian_builder() :
00037   partitionMethod_(vpdfl_pc_gaussian_builder::fixed),
00038   proportionOfVariance_(0),
00039   fixed_partition_(1)
00040 {
00041 }
00042 
00043 //=======================================================================
00044 
00045 vpdfl_pc_gaussian_builder::~vpdfl_pc_gaussian_builder()
00046 {
00047 }
00048 
00049 //=======================================================================
00050 
00051 //: Use proportion of variance to decide on the number of principle components.
00052 // Specify the proportion (between 0 and 1).
00053 // The default setting uses a fixed number of principle components.
00054 void vpdfl_pc_gaussian_builder::set_proportion_partition( double proportion)
00055 {
00056   assert(proportion >= 0.0);
00057   assert(proportion <= 1.0);
00058 
00059   proportionOfVariance_ = proportion;
00060   partitionMethod_ = proportionate;
00061 }
00062 
00063 //=======================================================================
00064 
00065 //: Set the number of principle components when using fixed partition.
00066 void vpdfl_pc_gaussian_builder::set_fixed_partition(int n_principle_components)
00067 {
00068   assert(n_principle_components >=0);
00069   fixed_partition_ = n_principle_components;
00070   partitionMethod_ = vpdfl_pc_gaussian_builder::fixed;
00071 }
00072 
00073 //=======================================================================
00074 
00075 vpdfl_pc_gaussian& vpdfl_pc_gaussian_builder::gaussian(vpdfl_pdf_base& model) const
00076 {
00077     // need a vpdfl_gaussian
00078   assert(model.is_class("vpdfl_pc_gaussian"));
00079   return static_cast<vpdfl_pc_gaussian&>( model);
00080 }
00081 
00082 //=======================================================================
00083 
00084 vpdfl_pdf_base* vpdfl_pc_gaussian_builder::new_model() const
00085 {
00086   return new vpdfl_pc_gaussian();
00087 }
00088 
00089 //=======================================================================
00090 
00091 void vpdfl_pc_gaussian_builder::build(vpdfl_pdf_base& model,
00092                                       const vnl_vector<double>& mean) const
00093 {
00094   vpdfl_pc_gaussian& g = gaussian(model);
00095   int n = mean.size();
00096 
00097   // Generate an identity matrix for eigenvectors
00098   vnl_matrix<double> P(n,n);
00099   P.fill(0);
00100   P.fill_diagonal(1.0);
00101 
00102   g.set(mean,P,vnl_vector<double>(0), min_var());
00103 }
00104 
00105 #if 0 // this doesn't work
00106     //: Build model from mean and covariance
00107 void vpdfl_pc_gaussian_builder::buildFromCovar(vpdfl_pc_gaussian& g,
00108                                                const vnl_vector<double>& mean,
00109                                                const vnl_matrix<double>& S,
00110                                                unsigned nPrinComps) const
00111 {
00112   int n = mean.size();
00113   vnl_matrix<double> evecs;
00114   vnl_vector<double> evals;
00115 
00116   NR_CalcSymEigens(S,evecs,evals,0);
00117   vnl_vector<double> principleEVals(nPrinComps);
00118 
00119   // Apply threshold to variance
00120   for (int i=1;i<=nPrinComps;++i)
00121     if (evals(i)<min_var())
00122       principleEVals(i)=min_var();
00123     else
00124       principleEVals(i)=evals(i);
00125 
00126   double sum = 0.0; // The sum of the complementary space eigenvalues.
00127   for (int i=nPrinComps+1; i <= n; i++)
00128     sum += evals(i);
00129 
00130     // The Eigenvalue of the complementary space basis vectors
00131   double complementaryEVals = sum / (n - nPrinComps);
00132 
00133   if (complementaryEVals < min_var()) complementaryEVals = min_var();
00134 
00135   g.set(mean, evecs, principleEVals, complementaryEVals);
00136 }
00137 #endif
00138 
00139 
00140 //: replace any eigenvalues that are less than zero, with zero.
00141 // Small negative eigenvalues can be generated due to rounding errors.
00142 // This function assumes that the eigenvalues are stored in descending order.
00143 static void eValsFloorZero(vnl_vector<double> &v)
00144 {
00145   int n = v.size();
00146   double *v_data = v.data_block();
00147   int i=n-1;
00148   while (i && v_data[i] < 0.0)
00149   {
00150     v_data[i]=0.0;
00151     i--;
00152   }
00153 }
00154 
00155 
00156 void vpdfl_pc_gaussian_builder::build(vpdfl_pdf_base& model,
00157                                       mbl_data_wrapper<vnl_vector<double> >& data) const
00158 {
00159   vpdfl_pc_gaussian& g = gaussian(model);
00160 
00161   unsigned long n_samples = data.size();
00162   assert (n_samples>=2L);
00163 
00164   int n = data.current().size();
00165 
00166   vnl_vector<double> mean;
00167 //vnl_matrix<double> evecs;
00168 //vnl_vector<double> evals;
00169   vnl_matrix<double> evecs(n,n);
00170   vnl_vector<double> evals(n);
00171   vnl_matrix<double> S;
00172 
00173   meanCovar(mean,S,data);
00174 
00175   vnl_symmetric_eigensystem_compute(S, evecs, evals);
00176   // eigenvalues are lowest first here
00177   evals.flip();
00178   evecs.fliplr();
00179   // eigenvalues are highest first now
00180 
00181   int n_principle_components = decide_partition(evals, n_samples, 0);
00182 
00183   vnl_vector<double> principleEVals(n_principle_components);
00184 
00185   // Apply threshold to variance
00186   for (int i=0;i<n_principle_components;++i)
00187     if (evals(i)<min_var())
00188       principleEVals(i)=min_var();
00189     else
00190       principleEVals(i)=evals(i);
00191 
00192   double eVsum = 0.0; // The sum of the complementary space eigenvalues.
00193   for (int i=n_principle_components; i < n; i++)
00194     eVsum += evals(i);
00195 
00196     // The Eigenvalue of the complementary space basis vectors
00197   double complementaryEVals = eVsum / (n - n_principle_components);
00198 
00199   if (complementaryEVals < min_var()) complementaryEVals = min_var();
00200 
00201   g.set(mean, evecs, principleEVals, complementaryEVals);
00202 }
00203 
00204 //: Computes mean and covariance of given data
00205 void vpdfl_pc_gaussian_builder::mean_covar(vnl_vector<double>& mean, vnl_matrix<double>& S,
00206                                            mbl_data_wrapper<vnl_vector<double> >& data) const
00207 {
00208   unsigned long n_samples = data.size();
00209 
00210   assert (n_samples!=0L);
00211 
00212   int n_dims = data.current().size();
00213   vnl_vector<double> sum(n_dims);
00214   sum.fill(0);
00215 
00216   S.set_size(0,0);
00217 
00218   data.reset();
00219   for (unsigned long i=0;i<n_samples;i++)
00220   {
00221     sum += data.current();
00222     updateCovar(S,data.current(),1.0);
00223 
00224     data.next();
00225   }
00226 
00227   mean = sum;
00228   mean/=n_samples;
00229   S/=n_samples;
00230   updateCovar(S,mean,-1.0);
00231 }
00232 
00233 
00234 void vpdfl_pc_gaussian_builder::weighted_build(vpdfl_pdf_base& model,
00235                                                mbl_data_wrapper<vnl_vector<double> >& data,
00236                                                const vcl_vector<double>& wts) const
00237 {
00238   vpdfl_pc_gaussian& g = gaussian(model);
00239 
00240   unsigned long n_samples = data.size();
00241 
00242   if (n_samples<2L)
00243   {
00244     vcl_cerr<<"vpdfl_gaussian_builder::weighted_build() Too few examples available.\n";
00245     vcl_abort();
00246   }
00247 
00248   data.reset();
00249   const int n = data.current().size();
00250   vnl_vector<double> sum(n);
00251   sum.fill(0.0);
00252   vnl_matrix<double> evecs(n,n);
00253   vnl_vector<double> evals(n);
00254   vnl_matrix<double> S;
00255   double w_sum = 0.0;
00256   double w;
00257   unsigned actual_samples = 0;
00258 
00259   for (unsigned long i=0;i<n_samples;i++)
00260   {
00261     w = wts[i];
00262     if (w != 0.0) // Common case - save time.
00263     {
00264       actual_samples ++;
00265       w_sum += w;
00266       data.current().assert_finite();
00267       sum += w*data.current();
00268       updateCovar(S,data.current(),w);
00269     }
00270     data.next();
00271   }
00272 
00273   updateCovar(S,sum,-1.0/w_sum);
00274   S*=actual_samples/((actual_samples - 1) *w_sum);
00275   sum/=w_sum;
00276   // now sum = weighted mean
00277   // and S = weighted covariance corrected for unbiased rather than ML result.
00278 
00279 
00280   vnl_symmetric_eigensystem_compute(S, evecs, evals);
00281   // eigenvalues are lowest first here
00282   evals.flip();
00283   evecs.fliplr();
00284   // eigenvalues are highest first now
00285 
00286 #if 0
00287   vcl_cerr << 'S' << S <<vcl_endl
00288            << "evals" << evals <<vcl_endl
00289            << "evecs" << evecs <<vcl_endl;
00290 #endif
00291 
00292   eValsFloorZero(evals);
00293 
00294   int n_principle_components = decide_partition(evals, n);
00295 
00296   vnl_vector<double> principleEVals(n_principle_components);
00297 
00298   // Apply threshold to variance
00299   for (int i=0;i<n_principle_components;++i)
00300     if (evals(i)<min_var())
00301       principleEVals(i)=min_var();
00302     else
00303       principleEVals(i)=evals(i);
00304   double eVsum = 0.0; // The sum of the complementary space eigenvalues.
00305   for (int i=n_principle_components; i < n; i++)
00306     eVsum += evals(i);
00307 
00308     // The Eigenvalue of the complementary space basis vectors
00309   double complementaryEVals;
00310   if (n_principle_components != n) // avoid divide by 0
00311     complementaryEVals = eVsum / (n - n_principle_components);
00312   else
00313     complementaryEVals = 0.0; // actual could be any value.
00314 
00315   if (complementaryEVals < min_var()) complementaryEVals = min_var();
00316 
00317   g.set(sum, evecs, principleEVals, complementaryEVals);
00318 }
00319 
00320 
00321 //: Decide where to partition an Eigenvector space
00322 // Returns the number of principle components to be used.
00323 // Pass in the Eigenvalues (eVals), the number of samples
00324 // that went to make up this Gaussian (nSamples), and the noise floor
00325 // for the dataset. The method may use simplified algorithms if
00326 // you indicate that the number of samples or noise floor is unknown
00327 // (by setting the latter parameters to 0.)
00328 unsigned vpdfl_pc_gaussian_builder::decide_partition(const vnl_vector<double>& eVals,
00329                                                      unsigned /*nSamples =0*/,
00330                                                      double   /*noise =0.0*/) const
00331 {
00332   assert (eVals.size() > 0);
00333   if (partitionMethod_ == vpdfl_pc_gaussian_builder::fixed)
00334   {
00335     return vnl_math_min(eVals.size(), (unsigned)fixed_partition()+1);;
00336   }
00337   else if (partitionMethod_ == proportionate)
00338   {
00339     double sum = vnl_c_vector<double>::sum(eVals.data_block(), eVals.size());
00340     assert (proportionOfVariance_ < 1.0 && proportionOfVariance_ > 0.0);
00341     double stopWhen = sum * proportionOfVariance_;
00342     sum = eVals(0);
00343     unsigned i=0;
00344     while (sum <= stopWhen)
00345     {
00346       i++;
00347       sum += eVals(i);
00348     }
00349     return i;
00350   }
00351   else
00352   {
00353     vcl_cerr << "vpdfl_pc_gaussian_builder::decide_partition(): Unexpected partition method: "
00354              << (short)partitionMethod_ << '\n';
00355     vcl_abort();
00356     return 0;
00357   }
00358 }
00359 
00360 //: Read initialisation settings from a stream.
00361 // Parameters:
00362 // \verbatim
00363 // {
00364 //   mode_choice: fixed  // Alternative: proportionate
00365 //   var_prop: 0.95
00366 //   n_modes: 3
00367 //   min_var: 1.0e-6
00368 // }
00369 // \endverbatim
00370 // \throw mbl_exception_parse_error if the parse fails.
00371 void vpdfl_pc_gaussian_builder::config_from_stream(vcl_istream & is)
00372 {
00373   vcl_string s = mbl_parse_block(is);
00374 
00375   vcl_istringstream ss(s);
00376   mbl_read_props_type props = mbl_read_props_ws(ss);
00377 
00378   if (props.find("mode_choice")!=props.end())
00379   {
00380     if (props["mode_choice"]=="fixed")
00381       partitionMethod_=fixed;
00382     else
00383     if (props["mode_choice"]=="proportionate")
00384       partitionMethod_=proportionate;
00385     else
00386     {
00387       vcl_string err_msg = "Unknown mode_choice: "+props["mode_choice"];
00388       throw mbl_exception_parse_error(err_msg);
00389     }
00390 
00391     props.erase("mode_choice");
00392   }
00393 
00394   if (props.find("var_prop")!=props.end())
00395   {
00396     proportionOfVariance_=vul_string_atof(props["var_prop"]);
00397     props.erase("var_prop");
00398   }
00399 
00400   if (props.find("n_modes")!=props.end())
00401   {
00402     fixed_partition_=vul_string_atoi(props["n_modes"]);
00403     props.erase("n_modes");
00404   }
00405 
00406   double mv=1.0e-6;
00407   if (props.find("min_var")!=props.end())
00408   {
00409     mv=vul_string_atof(props["min_var"]);
00410     props.erase("min_var");
00411   }
00412   set_min_var(mv);
00413 
00414   try
00415   {
00416     mbl_read_props_look_for_unused_props(
00417         "vpdfl_axis_gaussian_builder::config_from_stream", props);
00418   }
00419   catch(mbl_exception_unused_props &e)
00420   {
00421     throw mbl_exception_parse_error(e.what());
00422   }
00423 }
00424 
00425 
00426 //=======================================================================
00427 
00428 vcl_string vpdfl_pc_gaussian_builder::is_a() const
00429 {
00430   static vcl_string class_name_ = "vpdfl_pc_gaussian_builder";
00431   return class_name_;
00432 }
00433 
00434 //=======================================================================
00435 // Method: is_class
00436 //=======================================================================
00437 
00438 bool vpdfl_pc_gaussian_builder::is_class(vcl_string const& s) const
00439 {
00440   return vpdfl_gaussian_builder::is_class(s) || s==vpdfl_pc_gaussian_builder::is_a();
00441 }
00442 
00443 //=======================================================================
00444 // Method: version_no
00445 //=======================================================================
00446 
00447 short vpdfl_pc_gaussian_builder::version_no() const
00448 {
00449   return 2;
00450 }
00451 
00452 //=======================================================================
00453 // Method: clone
00454 //=======================================================================
00455 
00456 vpdfl_builder_base* vpdfl_pc_gaussian_builder::clone() const
00457 {
00458   return new vpdfl_pc_gaussian_builder(*this);
00459 }
00460 
00461 //=======================================================================
00462 // Method: print
00463 //=======================================================================
00464 
00465 void vpdfl_pc_gaussian_builder::print_summary(vcl_ostream& os) const
00466 {
00467   vpdfl_gaussian_builder::print_summary(os);
00468   if (partitionMethod_==fixed) os<<" mode_choice: fixed ";
00469   if (partitionMethod_==proportionate)
00470     os<<" mode_choice: proportionate ";
00471   os<<" var_prop: "<<proportionOfVariance_
00472     <<" n_fixed: "<<fixed_partition_<<' ';
00473 }
00474 
00475 //=======================================================================
00476 // Method: save
00477 //=======================================================================
00478 
00479 void vpdfl_pc_gaussian_builder::b_write(vsl_b_ostream& bfs) const
00480 {
00481   vsl_b_write(bfs, is_a());
00482   vsl_b_write(bfs, version_no());
00483   vpdfl_gaussian_builder::b_write(bfs);
00484   vsl_b_write(bfs,(short)partitionMethod_);
00485   vsl_b_write(bfs, proportionOfVariance_);
00486   vsl_b_write(bfs, fixed_partition_);
00487 }
00488 
00489 //=======================================================================
00490 // Method: load
00491 //=======================================================================
00492 
00493 void vpdfl_pc_gaussian_builder::b_read(vsl_b_istream& bfs)
00494 {
00495   if (!bfs) return;
00496 
00497   vcl_string name;
00498   vsl_b_read(bfs,name);
00499   if (name != is_a())
00500   {
00501     vcl_cerr << "I/O ERROR: vsl_b_read(vsl_b_istream&, vpdfl_pc_gaussian_builder &)\n"
00502              << "           Attempted to load object of type "
00503              << name <<" into object of type " << is_a() << '\n';
00504     bfs.is().clear(vcl_ios::badbit); // Set an unrecoverable IO error on stream
00505     return;
00506   }
00507 
00508   short temp;
00509   short version;
00510   vsl_b_read(bfs,version);
00511   switch (version)
00512   {
00513     case 1:
00514       vpdfl_gaussian_builder::b_read(bfs);
00515       vsl_b_read(bfs, temp);
00516       partitionMethod_ = partitionMethods(temp);
00517       vsl_b_read(bfs, proportionOfVariance_);
00518       fixed_partition_ = 75;
00519       break;
00520     case 2:
00521       vpdfl_gaussian_builder::b_read(bfs);
00522       vsl_b_read(bfs, temp);
00523       partitionMethod_ = partitionMethods(temp);
00524       vsl_b_read(bfs, proportionOfVariance_);
00525       vsl_b_read(bfs, fixed_partition_);
00526       break;
00527     default:
00528       vcl_cerr << "I/O ERROR: vsl_b_read(vsl_b_istream&, vpdfl_pc_gaussian_builder &)\n"
00529                << "           Unknown version number "<< version << '\n';
00530       bfs.is().clear(vcl_ios::badbit); // Set an unrecoverable IO error on stream
00531       return;
00532   }
00533 }