contrib/rpl/rrel/rrel_ran_sam_search.cxx
Go to the documentation of this file.
00001 // This is rpl/rrel/rrel_ran_sam_search.cxx
00002 #include "rrel_ran_sam_search.h"
00003 #include <rrel/rrel_objective.h>
00004 #include <rrel/rrel_estimation_problem.h>
00005 #include <rrel/rrel_util.h>
00006 
00007 #include <vnl/vnl_vector.h>
00008 #include <vnl/vnl_random.h>
00009 
00010 #include <vcl_iostream.h>
00011 #include <vcl_cmath.h>
00012 #include <vcl_vector.h>
00013 #include <vcl_cstdlib.h>
00014 #include <vcl_cassert.h>
00015 
00016 // Random number generator. This will be shared by all ran_sam instances.
00017 static vnl_random global_generator_;
00018 
00019 
00020 rrel_ran_sam_search::rrel_ran_sam_search( )
00021   : generate_all_(false),
00022     generator_( &global_generator_ ),
00023     own_generator_( false ),
00024     params_(0), scale_(0),
00025     samples_to_take_(0),
00026     trace_level_(0)
00027 {
00028   set_sampling_params();
00029 }
00030 
00031 rrel_ran_sam_search::rrel_ran_sam_search( int seed )
00032   : generate_all_(false),
00033     generator_( new vnl_random(seed) ),
00034     own_generator_( true ),
00035     params_(0), scale_(0),
00036     samples_to_take_(0),
00037     trace_level_(0)
00038 {
00039   set_sampling_params();
00040 }
00041 
00042 
00043 rrel_ran_sam_search::~rrel_ran_sam_search( )
00044 {
00045   if ( own_generator_ )
00046     delete generator_;
00047 }
00048 
00049 
00050 // ------------------------------------------------------------
00051 void
00052 rrel_ran_sam_search::set_gen_all_samples()
00053 {
00054   generate_all_ = true;
00055 }
00056 
00057 
00058 // ------------------------------------------------------------
00059 void
00060 rrel_ran_sam_search::set_sampling_params( double max_outlier_frac,
00061                                           double desired_prob_good,
00062                                           unsigned int max_populations_expected,
00063                                           unsigned int min_samples )
00064 {
00065   generate_all_ = false;
00066   max_outlier_frac_ = max_outlier_frac;
00067   desired_prob_good_ = desired_prob_good;
00068   max_populations_expected_ = max_populations_expected;
00069   min_samples_ = min_samples;
00070 }
00071 
00072 
00073 // ------------------------------------------------------------
00074 bool
00075 rrel_ran_sam_search::estimate( const rrel_estimation_problem * problem,
00076                                const rrel_objective * obj_fcn )
00077 {
00078   //
00079   //  Initialize the random sampling.
00080   //
00081   this->calc_num_samples( problem );
00082   if ( trace_level_ >= 1 )
00083     vcl_cout << "\nSamples = " << samples_to_take_ << vcl_endl;
00084 
00085   if ( obj_fcn->requires_prior_scale() &&
00086        problem->scale_type() == rrel_estimation_problem::NONE )
00087   {
00088     vcl_cerr << "ran_sam::estimate: Objective function requires a prior scale,"
00089              << " and the problem does not provide one.\n"
00090              << "                   Aborting estimation.\n";
00091     return false;
00092   }
00093 
00094   unsigned int points_per = problem->num_samples_to_instantiate();
00095   unsigned int num_points = problem->num_samples();
00096   vcl_vector<int> point_indices( points_per );
00097   vnl_vector<double> new_params;
00098   vcl_vector<double> residuals( num_points );
00099   min_obj_ = 0.0;
00100   bool  obj_set=false;
00101 
00102   scale_ = -1;
00103 
00104   //
00105   //  The main loop repeatedly establishes a sample, generates fit
00106   //  parameters from the sample, calculates the objective function
00107   //  value, and tests the value against the best found thus far.  If
00108   //  a sample doesn't yield a parameter vector, it is still counted
00109   //  toward the total number to take.  This prevents errors that
00110   //  would arise when all samples are to be used, but still works
00111   //  correctly for probabilistic sampling because the possibility
00112   //  is rare.
00113   //
00114   for ( unsigned int s = 0; s<samples_to_take_; ++s ) {
00115     this->next_sample( s, num_points, point_indices, points_per );
00116     if ( trace_level_ >= 2 )
00117       this->trace_sample( point_indices );
00118     if ( problem->fit_from_minimal_set( point_indices, new_params ))
00119     {
00120       if ( trace_level_ >= 1 )
00121         vcl_cout << "Fit = " << new_params << vcl_endl;
00122       problem->compute_residuals( new_params, residuals );
00123       if ( trace_level_ >= 2)
00124         this->trace_residuals( residuals );
00125 
00126       double new_obj = 0.0;
00127       switch ( problem->scale_type() ) {
00128        case rrel_estimation_problem::NONE:
00129         new_obj = obj_fcn->fcn( residuals.begin(), residuals.end(), scale_, &new_params );
00130         break;
00131        case rrel_estimation_problem::SINGLE:
00132         new_obj = obj_fcn->fcn( residuals.begin(), residuals.end(), problem->prior_scale(), &new_params );
00133         break;
00134        case rrel_estimation_problem::MULTIPLE:
00135         new_obj = obj_fcn->fcn( residuals.begin(), residuals.end(), problem->prior_multiple_scales().begin(), &new_params );
00136         break;
00137        default:
00138         vcl_cerr << __FILE__ << ": unknown scale type\n";
00139         vcl_abort();
00140       }
00141       if ( trace_level_ >= 1)
00142         vcl_cout << "Objective = " << new_obj << vcl_endl;
00143       if ( !obj_set || new_obj<min_obj_ ) {
00144         if ( trace_level_ >= 2)
00145           vcl_cout << "New best\n";
00146         obj_set = true;
00147         min_obj_ = new_obj;
00148         params_ = new_params;
00149         indices_ = point_indices;
00150         residuals_ = residuals;
00151       }
00152     }
00153     else if (trace_level_ >= 1)
00154       vcl_cout << "No fit to sample.\n";
00155   }
00156 
00157   if ( ! obj_set ) {
00158     return false;
00159   }
00160 
00161   //
00162   // Estimation succeeded.  Now, estimate scale and then return.
00163   //
00164   problem->compute_residuals( params_, residuals );
00165   if ( trace_level_ >= 1)
00166     vcl_cout << "\nOptimum fit = " << params_ << vcl_endl;
00167   if ( trace_level_ >= 2)
00168     this->trace_residuals( residuals );
00169   if ( obj_fcn->can_estimate_scale() )
00170     scale_ = obj_fcn->scale( residuals.begin(), residuals.end() );
00171   else if ( residuals.size() > 1 )
00172     scale_ = rrel_util_median_abs_dev_scale( residuals.begin(), residuals.end() );
00173   else {
00174     scale_ = 0;
00175     vcl_cout << "Can't estimate scale from one residual!\n";
00176     return false;
00177   }
00178   if ( trace_level_ >= 1)
00179     vcl_cout << "Scale = " << scale_ << vcl_endl;
00180   return true;
00181 }
00182 
00183 
00184 // ------------------------------------------------------------
00185 void
00186 rrel_ran_sam_search::calc_num_samples( const rrel_estimation_problem* problem )
00187 {
00188   if ( generate_all_ ) {
00189     //
00190     //  Calculate C(num_points, points_per_sample)
00191     //
00192     unsigned long numer=1;
00193     unsigned long denom=1;
00194     unsigned int n=problem->num_samples();
00195     unsigned int k=problem->num_samples_to_instantiate();
00196     for ( ; k>0; --k, --n ) {
00197       numer *= n;
00198       denom *= k;
00199     }
00200     samples_to_take_ = (unsigned int)( numer / denom );
00201   }
00202   else {
00203     //
00204     //  Calculate the probability that a sample is good.  Then, use this
00205     //  to determine the minimum number of samples required.
00206     //
00207     double prob_pt_inlier = (1 - max_outlier_frac_) * problem->num_unique_samples() / double(problem->num_samples());
00208     double prob_pt_good
00209       = max_populations_expected_
00210         * vcl_pow( prob_pt_inlier / max_populations_expected_, (int)problem->num_samples_to_instantiate());
00211     samples_to_take_ = int(vcl_ceil( vcl_log(1.0 - desired_prob_good_) /
00212                                      vcl_log(1.0 - prob_pt_good) ));
00213     if ( samples_to_take_ < min_samples_ )
00214       samples_to_take_ = min_samples_;
00215   }
00216 }
00217 
00218 // ------------------------------------------------------------
00219 void
00220 rrel_ran_sam_search::next_sample( unsigned int taken,
00221                                   unsigned int num_points,
00222                                   vcl_vector<int>& sample,
00223                                   unsigned int points_per_sample )
00224 {
00225   assert( sample.size() == points_per_sample );
00226 
00227   if ( generate_all_ ) {
00228     if ( taken == 0 ) {  //  initial sample
00229       for ( unsigned int i=0; i<points_per_sample; ++i )
00230         sample[i] = i;
00231     }
00232     else if ( taken >= samples_to_take_ )
00233       vcl_cerr << "rrel_ran_sam_search::next_sample -- ERROR: used all samples\n";
00234     else {
00235       //
00236       //  Generate the subsets in lexicographic order.
00237       //
00238       unsigned int i=points_per_sample-1;
00239       unsigned int k=num_points-1;
00240       while ( sample[i] == (int)k ) { --i; --k; }
00241       k = ++ sample[i];
00242       for ( ++k, ++i; i<points_per_sample; ++i, ++k )
00243         sample[i]=k;
00244     }
00245   }
00246 
00247   else {
00248     if ( num_points == 1 ) {
00249       sample[0] = 0;
00250     } else {
00251       unsigned int k=0, counter=0;
00252       while ( k<points_per_sample ) // This might be an infinite loop!
00253       {
00254         int id = generator_->lrand32( 0, num_points-1 );
00255         if ( id >= int(num_points) ) {   //  safety check
00256           vcl_cerr << "rrel_ran_sam_search::next_sample --- "
00257                    << "WARNING: random value out of range\n";
00258         }
00259         else
00260         {
00261           ++counter;
00262           bool different = true;
00263           for ( int i=k-1; i>=0 && different; --i )
00264             different = (id != sample[i]);
00265           if ( different )
00266             sample[k++] = id, counter = 0;
00267           else if (counter > 100)
00268           {
00269             vcl_cerr << "rrel_ran_sam_search::next_sample --- WARNING: "
00270                      << "lrand32() generated 100x the same value "<< id
00271                      << " from the range [0," << num_points-1 << "]\n";
00272             sample[k++] = id+1;
00273           }
00274         }
00275       }
00276     }
00277   }
00278 }
00279 
00280 // ------------------------------------------------------------
00281 void
00282 rrel_ran_sam_search::print_params() const
00283 {
00284   vcl_cout << "  max_outlier_frac_ = " << max_outlier_frac_ << '\n'
00285            << "  desired_prob_good_ = " << desired_prob_good_ <<  '\n'
00286            << "  max_populations_expected_ = " << max_populations_expected_ << '\n'
00287            << "  min_samples_ = " << min_samples_ << '\n'
00288            << "  generate_all_ = " << generate_all_ << vcl_endl;
00289 }
00290 
00291 
00292 // ------------------------------------------------------------
00293 void
00294 rrel_ran_sam_search::trace_sample( const vcl_vector<int>& indices ) const
00295 {
00296   vcl_cout << "\nNew sample: ";
00297   for ( unsigned int i=0; i<indices.size(); ++i)
00298     vcl_cout << ' ' << indices[i];
00299   vcl_cout << vcl_endl;
00300 }
00301 
00302 // ------------------------------------------------------------
00303 void
00304 rrel_ran_sam_search::trace_residuals( const vcl_vector<double>& residuals ) const
00305 {
00306   vcl_cout << "\nResiduals:\n";
00307   for ( unsigned int i=0; i<residuals.size(); ++i )
00308     vcl_cout << "  " << i << ":  " << residuals[i] << '\n';
00309   vcl_cout << vcl_endl;
00310 }