contrib/mul/pdf1d/pdf1d_pdf.cxx
Go to the documentation of this file.
00001 // This is mul/pdf1d/pdf1d_pdf.cxx
00002 
00003 //:
00004 // \file
00005 // \author Tim Cootes
00006 // \brief Base class for Univariate Probability Density Function classes.
00007 // \verbatim
00008 //  Modifications
00009 //  IMS 28 Feb 2002 - Added inverse CDF function.
00010 // \endverbatim
00011 
00012 #include "pdf1d_pdf.h"
00013 #include <vcl_cstdlib.h>
00014 #include <vcl_cmath.h>
00015 #include <vcl_cassert.h>
00016 #include <vsl/vsl_indent.h>
00017 #include <vsl/vsl_binary_loader.h>
00018 #include <vnl/vnl_math.h>
00019 #include <mbl/mbl_priority_bounded_queue.h>
00020 #include <pdf1d/pdf1d_sampler.h>
00021 
00022 //=======================================================================
00023 
00024 pdf1d_pdf::pdf1d_pdf()
00025   : mean_(0.0),var_(0.0)
00026 {
00027 }
00028 
00029 //=======================================================================
00030 
00031 pdf1d_pdf::~pdf1d_pdf()
00032 {
00033 }
00034 
00035 //=======================================================================
00036 
00037 double pdf1d_pdf::operator()(double x) const
00038 {
00039   return vcl_exp(log_p(x));
00040 }
00041 
00042 //: Cumulative Probability (P(x'<x) for x' drawn from the distribution)
00043 //  By default this can be calculated by drawing random samples from
00044 //  the distribution and computing the number less than x.
00045 double pdf1d_pdf::cdf(double x0) const
00046 {
00047   pdf1d_sampler *sampler = new_sampler();
00048 
00049   int n_trials = 100;
00050   int sum = 0;
00051   for (int i=0;i<n_trials;++i)
00052     if (sampler->sample()<=x0) sum++;
00053 
00054   delete sampler;
00055 
00056   return double(sum)/n_trials;
00057 }
00058 
00059 //: Return true if cdf() uses an analytic implementation
00060 //  Default is false, as the base implementation is to draw samples
00061 //  from the distribution randomly to estimate cdf(x)
00062 bool pdf1d_pdf::cdf_is_analytic() const
00063 {
00064   return false;
00065 }
00066 
00067 
00068 double pdf1d_pdf::log_prob_thresh(double pass_proportion) const
00069 {
00070   assert(pass_proportion >= 0.0);
00071   assert(pass_proportion < 1.0);
00072 
00073   // The number of samples on the less likely side of the boundary.
00074   // Increase the number for greater reliabililty
00075   const unsigned n_stat = 30;
00076 
00077   unsigned nSamples;
00078   double right;
00079   double left;
00080 
00081   pdf1d_sampler *sampler = new_sampler();
00082   if (pass_proportion > 0.5)
00083   {
00084     //We want at n_stat samples outside the cut-off.
00085     pass_proportion = 1 - pass_proportion;
00086     nSamples = (unsigned)vcl_ceil((double)n_stat / pass_proportion);
00087 
00088     // Find lowest values
00089     mbl_priority_bounded_queue<double> lowest(n_stat+1);
00090     for (unsigned i=0; i < nSamples; i++)
00091       lowest.push(operator()(sampler->sample()));
00092 
00093     // bracket result
00094     right = lowest.top();
00095     lowest.pop();
00096     left = lowest.top();
00097   }
00098   else
00099   {
00100     //We want at n_stat samples inside the cut-off.
00101     nSamples = (unsigned)vcl_ceil((double)n_stat / pass_proportion);
00102 
00103     // Find highest values
00104     mbl_priority_bounded_queue<double, vcl_vector<double>, vcl_greater<double> >
00105       highest(n_stat+1);
00106     for (unsigned i=0; i < nSamples; i++)
00107       highest.push(operator()(sampler->sample()));
00108 
00109     // bracket result
00110     right = highest.top();
00111     highest.pop();
00112     left = highest.top();
00113   }
00114   delete sampler;
00115 
00116   // Interpolate left and right to find value
00117   // check interpolation is not extrapolation.
00118   assert (0.0 <= pass_proportion*nSamples - n_stat &&
00119           pass_proportion*nSamples - n_stat <= 1.0);
00120   return (n_stat + 1.0 - pass_proportion*nSamples) * vcl_log(left)
00121          + (pass_proportion*nSamples - n_stat) * vcl_log(right);
00122 }
00123 
00124 //=======================================================================
00125 
00126 bool pdf1d_pdf::is_valid_pdf() const
00127 {
00128   return true;
00129 }
00130 
00131 //: Fill x with samples drawn from distribution
00132 void pdf1d_pdf::get_samples(vnl_vector<double>& x) const
00133 {
00134   pdf1d_sampler *sampler = new_sampler();
00135   sampler->get_samples(x);
00136   delete sampler;
00137 }
00138 
00139 //: Write values (x,p(x)) to text file suitable for plotting
00140 //  Evaluate pdf at n points in range [min_x,max_x] and write a text file,
00141 //  each line of which is {x p(x)}, suitable for plotting with many graph packages
00142 bool pdf1d_pdf::write_plot_file(const vcl_string& plot_file, double min_x, double max_x, int n) const
00143 {
00144   vcl_ofstream ofs(plot_file.c_str(),vcl_ios::out);
00145   if (!ofs) return false;
00146   assert(n>1);
00147 
00148   double dx = (max_x-min_x)/(n-1);
00149   for (int i=0;i<n;++i)
00150   {
00151     double x = min_x + i*dx;
00152     ofs<<x<<' '<<operator()(x)<<'\n';
00153   }
00154   ofs.close();
00155 
00156   return true;
00157 }
00158 
00159 
00160 //=======================================================================
00161 
00162 short pdf1d_pdf::version_no() const
00163 {
00164   return 1;
00165 }
00166 
00167 //=======================================================================
00168 
00169 void vsl_add_to_binary_loader(const pdf1d_pdf& b)
00170 {
00171   vsl_binary_loader<pdf1d_pdf>::instance().add(b);
00172 }
00173 
00174 //=======================================================================
00175 
00176 vcl_string pdf1d_pdf::is_a() const
00177 {
00178   static vcl_string class_name_ = "pdf1d_pdf";
00179   return class_name_;
00180 }
00181 
00182 //=======================================================================
00183 
00184 bool pdf1d_pdf::is_class(vcl_string const& s) const
00185 {
00186   return s==pdf1d_pdf::is_a();
00187 }
00188 
00189 //=======================================================================
00190 
00191   // required if data is present in this base class
00192 void pdf1d_pdf::print_summary(vcl_ostream& os) const
00193 {
00194   os << "  Mean: " << mean_
00195      << "  Variance: " << var_;
00196 }
00197 
00198 //=======================================================================
00199 
00200   // required if data is present in this base class
00201 void pdf1d_pdf::b_write(vsl_b_ostream& bfs) const
00202 {
00203   vsl_b_write(bfs, version_no());
00204   vsl_b_write(bfs, mean_);
00205   vsl_b_write(bfs, var_);
00206 }
00207 
00208 //=======================================================================
00209 
00210   // required if data is present in this base class
00211 void pdf1d_pdf::b_read(vsl_b_istream& bfs)
00212 {
00213   if (!bfs) return;
00214 
00215   short version;
00216   vsl_b_read(bfs,version);
00217   switch (version)
00218   {
00219     case (1):
00220       vsl_b_read(bfs,mean_);
00221       vsl_b_read(bfs,var_);
00222       break;
00223     default:
00224       vcl_cerr << "I/O ERROR: vsl_b_read(vsl_b_istream&, pdf1d_pdf &)\n"
00225                << "           Unknown version number "<< version << '\n';
00226       bfs.is().clear(vcl_ios::badbit); // Set an unrecoverable IO error on stream
00227       return;
00228   }
00229 }
00230 
00231 
00232 //=======================================================================
00233 
00234 void vsl_b_write(vsl_b_ostream& bfs, const pdf1d_pdf& b)
00235 {
00236   b.b_write(bfs);
00237 }
00238 
00239 //=======================================================================
00240 
00241 void vsl_b_read(vsl_b_istream& bfs, pdf1d_pdf& b)
00242 {
00243   b.b_read(bfs);
00244 }
00245 
00246 //=======================================================================
00247 
00248 void vsl_print_summary(vcl_ostream& os,const pdf1d_pdf& b)
00249 {
00250   os << b.is_a() << ": ";
00251   vsl_indent_inc(os);
00252   b.print_summary(os);
00253   vsl_indent_dec(os);
00254 }
00255 
00256 //=======================================================================
00257 
00258 void vsl_print_summary(vcl_ostream& os,const pdf1d_pdf* b)
00259 {
00260   if (b)
00261     vsl_print_summary(os, *b);
00262   else
00263     os << "No pdf1d_pdf defined.";
00264 }
00265 
00266 //=======================================================================
00267 
00268 //: Stream output operator for class reference
00269 vcl_ostream& operator<<(vcl_ostream& os,const pdf1d_pdf& b)
00270 {
00271   vsl_print_summary(os,b);
00272   return os;
00273 }
00274 
00275 //=======================================================================
00276 
00277 //: Stream output operator for class pointer
00278 vcl_ostream& operator<<(vcl_ostream& os,const pdf1d_pdf* b)
00279 {
00280   vsl_print_summary(os,b);
00281   return os;
00282 }
00283 
00284 //=======================================================================
00285 
00286 //: The inverse cumulative distribution function.
00287 // The value of x: P(x'<x) = P for x' drawn from distribution pdf.
00288 double pdf1d_pdf::inverse_cdf(double P) const
00289 {
00290   if (cdf_is_analytic())
00291   {
00292   // Use Chebyshev inequality to get starting point.
00293   // P[ |x-mean| >= k] <= var/k^2.
00294     double x_init;
00295     if (P < 0.5)
00296       x_init = mean() - vcl_sqrt(variance() / (2*P));
00297     else
00298       x_init = mean() + vcl_sqrt(variance() / (2*(1-P)));
00299 
00300     double f_init = cdf(x_init);
00301 
00302     // guess initial step size assuming a rectangular distribution.
00303     // slope = 1 / sqrt(12 * variance)
00304     double step = 2.0 * vnl_math_abs(f_init - P)*vcl_sqrt(12 * variance());
00305 
00306     double x_above, x_below;
00307     if (f_init > P)
00308     {
00309       x_above = x_init;
00310       while (true)
00311       {
00312         x_below = x_above - step;
00313         double f_below = cdf(x_below);
00314         if (f_below < P) break;
00315         x_above = x_below;
00316         step *= 2.0;
00317       }
00318     }
00319     else
00320     {
00321       x_below = x_init;
00322       while (true)
00323       {
00324         x_above = x_below + step;
00325         double f_above = cdf(x_above);
00326         if (f_above > P) break;
00327         x_below = x_above;
00328         step *= 2.0;
00329       }
00330     }
00331 #if 0
00332     double x_middle = (x_above + x_below) / 2;
00333     while (x_above - x_below > vnl_math::sqrteps)
00334     {
00335       double f_middle = pdf.cdf(x_middle) - P;
00336       if (f_middle < 0) x_below=x_middle;
00337       else x_above = x_middle;
00338     }
00339     return (x_above + x_below) / 2;
00340 #endif
00341     // bracketed Newton-Raphson.
00342 
00343 
00344     double x_middle=0.5*(x_above+x_below); 
00345     double dxold= x_above-x_below; 
00346     double dx=dxold;
00347     double f_middle = cdf(x_middle)-P;
00348     double df_middle = operator() (x_middle);
00349     for (unsigned j=100;j>0;j--) 
00350     {
00351       if ( !((x_above - x_middle)*df_middle + f_middle > 0.0 &&
00352              (x_below - x_middle)*df_middle + f_middle < 0.0   ) || 
00353         (vnl_math_abs((2.0*f_middle)) > vnl_math_abs(dxold*df_middle)))
00354       { // Bisect if Newton-Raphson isn't working
00355         x_middle=0.5*(x_above+x_below);
00356         dxold=dx;
00357         dx=x_above-x_middle;
00358       } else // Newton-Raphson step
00359       {
00360         dxold=dx;
00361         dx=f_middle/df_middle;
00362         x_middle -= dx;
00363         assert (x_below <= x_middle && x_middle <= x_above);
00364       }
00365 
00366       if (vnl_math_abs(dx) < vnl_math_abs(x_middle * vnl_math::sqrteps))
00367       {
00368         return x_middle; // Converged .
00369       }
00370 
00371       f_middle = cdf(x_middle)-P;
00372       df_middle = operator()(x_middle);
00373 
00374       if (f_middle < 0) //bracket on the root.
00375         x_below=x_middle;
00376       else
00377         x_above=x_middle;
00378     }
00379     vcl_cerr << "ERROR: pdf1d_pdf::inverse_cdf() failed to converge.\n";
00380     vcl_abort();
00381     return 0.0; // dummy return
00382   }
00383   else // Use sampling.
00384   {
00385     const unsigned n_stat = 20;
00386     pdf1d_sampler * sampler = new_sampler();
00387     unsigned n;
00388     double left, right; // These bracket the result
00389     if (P < 0.5)
00390     {
00391       // we want n_stat samples below P
00392       n = vnl_math_rnd(vcl_ceil(n_stat / P));
00393 
00394       // find lowest values
00395       mbl_priority_bounded_queue<double> lowest(n_stat+1);
00396       for (unsigned i=0; i < n; i++)
00397         lowest.push(sampler->sample());
00398 
00399       // bracket result
00400       right = lowest.top();
00401       lowest.pop();
00402       left = lowest.top();
00403     }
00404     else
00405     {
00406       // we want n_stat samples above P
00407       P = 1.0 - P;
00408       n = vnl_math_rnd(vcl_ceil(n_stat / P));
00409 
00410       // find highest values
00411       mbl_priority_bounded_queue<double, vcl_vector<double>, vcl_greater<double> >
00412         highest(n_stat+1);
00413       for (unsigned i=0; i < n; i++)
00414         highest.push(sampler->sample());
00415 
00416       // bracket result
00417       right = highest.top();
00418       highest.pop();
00419       left = highest.top();
00420     }
00421     delete sampler;
00422 
00423     // Interpolate left and right to find value
00424     // check interpolation is not extrapolation.
00425     assert (0.0 <= P*n - n_stat && P*n - n_stat <= 1.0);
00426     return (n_stat + 1 - P*n) * left + (P*n - n_stat) * right;
00427   }
00428 }