contrib/mul/vpdfl/vpdfl_kernel_pdf_builder.cxx
Go to the documentation of this file.
00001 // This is mul/vpdfl/vpdfl_kernel_pdf_builder.cxx
00002 #ifdef VCL_NEEDS_PRAGMA_INTERFACE
00003 #pragma implementation
00004 #endif
00005 //:
00006 // \file
00007 // \author Tim Cootes
00008 // \brief Initialises kernel pdfs
00009 
00010 #include "vpdfl_kernel_pdf_builder.h"
00011 //
00012 #include <vcl_cassert.h>
00013 #include <vcl_string.h>
00014 #include <vcl_sstream.h>
00015 #include <vcl_cstdlib.h> // vcl_abort()
00016 #include <vcl_cmath.h>
00017 #include <vcl_vector.h>
00018 
00019 #include <mbl/mbl_data_wrapper.h>
00020 #include <mbl/mbl_data_array_wrapper.h>
00021 #include <vpdfl/vpdfl_kernel_pdf.h>
00022 #include <vnl/vnl_vector.h>
00023 #include <vpdfl/vpdfl_calc_mean_var.h>
00024 #if 0
00025 #include <mbl/mbl_priority_bounded_queue.h>
00026 #endif
00027 
00028 #include <mbl/mbl_parse_block.h>
00029 #include <mbl/mbl_read_props.h>
00030 #include <vul/vul_string.h>
00031 #include <mbl/mbl_exception.h>
00032 
00033 
00034 //=======================================================================
00035 // Dflt ctor
00036 //=======================================================================
00037 
00038 vpdfl_kernel_pdf_builder::vpdfl_kernel_pdf_builder()
00039     : min_var_(1.0e-6), build_type_(select_equal), fixed_width_(1.0)
00040 {
00041 }
00042 
00043 //=======================================================================
00044 // Destructor
00045 //=======================================================================
00046 
00047 vpdfl_kernel_pdf_builder::~vpdfl_kernel_pdf_builder()
00048 {
00049 }
00050 
00051 //=======================================================================
00052 
00053 vpdfl_kernel_pdf& vpdfl_kernel_pdf_builder::kernel_pdf(vpdfl_pdf_base& model) const
00054 {
00055   // require a vpdfl_kernel_pdf
00056   assert(model.is_class("vpdfl_kernel_pdf"));
00057   return static_cast<vpdfl_kernel_pdf&>(model);
00058 }
00059 
00060 //: Use fixed width kernels of given width when building.
00061 void vpdfl_kernel_pdf_builder::set_use_fixed_width(double width)
00062 {
00063   build_type_ = fixed_width;
00064   fixed_width_ = width;
00065 }
00066 
00067 //: Use equal width kernels of width depending on number of samples.
00068 void vpdfl_kernel_pdf_builder::set_use_equal_width()
00069 {
00070   build_type_ = select_equal;
00071 }
00072 
00073 //: Kernel width proportional to distance to nearby samples.
00074 void vpdfl_kernel_pdf_builder::set_use_width_from_separation()
00075 {
00076   build_type_ = width_from_sep;
00077 }
00078 
00079 //: Build adaptive kernel estimate.
00080 void vpdfl_kernel_pdf_builder::set_use_adaptive()
00081 {
00082   build_type_ = adaptive;
00083 }
00084 
00085 //=======================================================================
00086 //: Define lower threshold on variance for built models
00087 //=======================================================================
00088 void vpdfl_kernel_pdf_builder::set_min_var(double min_var)
00089 {
00090   min_var_ = min_var;
00091 }
00092 
00093 //=======================================================================
00094 //: Get lower threshold on variance for built models
00095 //=======================================================================
00096 double vpdfl_kernel_pdf_builder::min_var() const
00097 {
00098   return min_var_;
00099 }
00100 
00101 void vpdfl_kernel_pdf_builder::build(vpdfl_pdf_base& model,
00102                                      const vnl_vector<double>& mean) const
00103 {
00104   vpdfl_kernel_pdf& kpdf = kernel_pdf(model);
00105 
00106   vcl_vector<vnl_vector<double> >m(1);
00107   m[0] = mean;
00108   kpdf.set_centres(&m[0],1,vcl_sqrt(min_var_));
00109 }
00110 
00111 //: Build kernel_pdf from n elements in data[i]
00112 void vpdfl_kernel_pdf_builder::build_from_array(vpdfl_pdf_base& model,
00113                                                 const vnl_vector<double>* data, int n) const
00114 {
00115   vpdfl_kernel_pdf& kpdf = kernel_pdf(model);
00116 
00117   if (n<1)
00118   {
00119     vcl_cerr<<"vpdfl_kernel_pdf_builder::build() No examples available.\n";
00120     vcl_abort();
00121   }
00122 
00123   switch (build_type_)
00124   {
00125     case fixed_width:
00126     build_fixed_width(kpdf,data,n,fixed_width_);
00127     break;
00128     case select_equal:
00129     build_select_equal_width(kpdf,data,n);
00130     break;
00131     case width_from_sep:
00132     build_width_from_separation(kpdf,data,n);
00133     break;
00134     case adaptive:
00135     build_adaptive(kpdf,data,n);
00136     break;
00137     default:
00138     vcl_cerr<<"vpdfl_kernel_pdf_builder::build_from_array() Unknown build type.\n";
00139     vcl_abort();
00140   }
00141 }
00142 
00143 void vpdfl_kernel_pdf_builder::build(vpdfl_pdf_base& model, mbl_data_wrapper<vnl_vector<double> >& data) const
00144 {
00145   /* vpdfl_kernel_pdf& kpdf = */ kernel_pdf(model);
00146 
00147   unsigned long n = data.size();
00148 
00149   if (n<1L)
00150   {
00151     vcl_cerr<<"vpdfl_kernel_pdf_builder::build() No examples available.\n";
00152     vcl_abort();
00153   }
00154 
00155   if (data.is_class("mbl_data_array_wrapper<T>"))
00156   {
00157     mbl_data_array_wrapper<vnl_vector<double> >& data_array =
00158                    static_cast<mbl_data_array_wrapper<vnl_vector<double> >&>( data);
00159     build_from_array(model,data_array.data(),n);
00160     return;
00161   }
00162 
00163   // Fill array with data
00164   vcl_vector<vnl_vector<double> >x(n);
00165   data.reset();
00166   for (unsigned long i=0;i<n;++i)
00167   {
00168     x[i]=data.current();
00169     data.next();
00170   }
00171 
00172   build_from_array(model,&x[0],n);
00173 }
00174 
00175 void vpdfl_kernel_pdf_builder::weighted_build(vpdfl_pdf_base& model,
00176                                               mbl_data_wrapper<vnl_vector<double> >& data,
00177                                               const vcl_vector<double>& /*wts*/) const
00178 {
00179   vcl_cerr<<"vpdfl_kernel_pdf_builder::weighted_build() Ignoring weights.\n";
00180   build(model,data);
00181 }
00182 
00183 //: Build from n elements in data[i]
00184 void vpdfl_kernel_pdf_builder::build_fixed_width(vpdfl_kernel_pdf& kpdf,
00185                                                  const vnl_vector<double>* data,
00186                                                  int n, double width) const
00187 {
00188   kpdf.set_centres(data,n,width);
00189 }
00190 
00191 //: Build from n elements in data[i].  Chooses width.
00192 //  Same width selected for all points, using
00193 //  $w=(4/(2n+d.n)^{1/(d+4)}\sigma$, as suggested by Silverman
00194 //  Note: This value only suitable for gaussian kernels!
00195 void vpdfl_kernel_pdf_builder::build_select_equal_width(vpdfl_kernel_pdf& kpdf,
00196                                                         const vnl_vector<double>* data, int n) const
00197 {
00198   vnl_vector<double> m,var;
00199   vpdfl_calc_mean_var(m,var,data,n);
00200 
00201   double mean_var = var.mean();
00202   if (mean_var<min_var_) var=min_var_;
00203 
00204   double d = data[0].size();
00205 
00206   // See Silverman, p88-89 : This is suitable for Gaussian kernels
00207   double k_var = mean_var*vcl_pow(4.0/(n*(d+2)),2.0/(d+4));
00208   double w = vcl_sqrt(k_var);
00209 
00210   build_fixed_width(kpdf,data,n,w);
00211 }
00212 
00213 //: Kernel width proportional to distance to nearby samples.
00214 void vpdfl_kernel_pdf_builder::build_width_from_separation(vpdfl_kernel_pdf& kpdf,
00215                                                            const vnl_vector<double>* data, int n) const
00216 {
00217   vnl_vector<double> width(n);
00218   double* w=width.data_block();
00219   const double min_diff = 1e-6;
00220 
00221   //const unsigned k = 2;  // Second nearest neighbour
00222   for (int i=0;i<n;++i)
00223   {
00224 #if 0
00225     mbl_priority_bounded_queue<double,vcl_vector<double>,vcl_less<double> > d_sq(k);
00226 #endif
00227 
00228     // Number of repeats of the point
00229     // If resampling used, some points will be present several times
00230     int n_repeats=0;
00231     double min_d2= -1.0;
00232     for (int j=0;j<n;++j)
00233     {
00234       if (j!=i)
00235       {
00236         double d2 = vnl_vector_ssd(data[i],data[j]);
00237 
00238         if (d2<min_diff)
00239           n_repeats++;
00240         else
00241           if (d2<min_d2 || min_d2<0) min_d2=d2;
00242 #if 0
00243         d_sq.push(d2);
00244 #endif
00245       }
00246     }
00247 
00248     // Width set to distance to k-th nearest neighbour
00249 #if 0
00250     w[i] = vcl_sqrt(d_sq.top());
00251 #endif
00252 
00253     //: Width to nearest neighbour, allowing for repeats
00254     if (min_d2<min_var_) min_d2=min_var_;
00255     w[i] = vcl_sqrt(min_d2)/(n_repeats+1);
00256   }
00257 
00258   kpdf.set_centres(data,n,width);
00259 }
00260 
00261 //: Build adaptive kernel estimate.
00262 //  Use equal widths to create a pilot estimate, then use the prob at each
00263 //  data point to modify the widths
00264 void vpdfl_kernel_pdf_builder::build_adaptive(vpdfl_kernel_pdf& kpdf,
00265                                               const vnl_vector<double>* data, int n) const
00266 {
00267   // First build the pilot estimate
00268   build_select_equal_width(kpdf,data,n);
00269 
00270   // Evaluate the pdf at each point
00271   vnl_vector<double> log_p(n);
00272   for (int i=0;i<n;++i)
00273   {
00274     log_p[i]=kpdf.log_p(data[i]);
00275   }
00276 
00277   double log_mean = log_p.mean();
00278 
00279   vnl_vector<double> new_width = kpdf.width();
00280 
00281   for (int i=0;i<n;++i)
00282   {
00283     // Scale each inversely by sqrt(prob)
00284     // Check: Should there be a power of d in there?
00285     new_width[i] *= vcl_exp(-0.5*(log_p[i]-log_mean));
00286   }
00287 
00288   kpdf.set_centres(data,n,new_width);
00289 }
00290 
00291 //=======================================================================
00292 // Method: is_a
00293 //=======================================================================
00294 
00295 vcl_string vpdfl_kernel_pdf_builder::is_a() const
00296 {
00297   return vcl_string("vpdfl_kernel_pdf_builder");
00298 }
00299 
00300 //=======================================================================
00301 // Method: is_class
00302 //=======================================================================
00303 
00304 bool vpdfl_kernel_pdf_builder::is_class(vcl_string const& s) const
00305 {
00306   return vpdfl_builder_base::is_class(s) || s==vpdfl_kernel_pdf_builder::is_a();
00307 }
00308 
00309 //=======================================================================
00310 // Method: version_no
00311 //=======================================================================
00312 
00313 short vpdfl_kernel_pdf_builder::version_no() const
00314 {
00315   return 1;
00316 }
00317 
00318 //=======================================================================
00319 // Method: print
00320 //=======================================================================
00321 
00322 void vpdfl_kernel_pdf_builder::print_summary(vcl_ostream& os) const
00323 {
00324   os << "Min. var.: "<< min_var_;
00325 }
00326 
00327 //=======================================================================
00328 // Method: save
00329 //=======================================================================
00330 
00331 void vpdfl_kernel_pdf_builder::b_write(vsl_b_ostream& bfs) const
00332 {
00333   vsl_b_write(bfs,version_no());
00334   vsl_b_write(bfs,min_var_);
00335 }
00336 
00337 //=======================================================================
00338 // Method: load
00339 //=======================================================================
00340 
00341 void vpdfl_kernel_pdf_builder::b_read(vsl_b_istream& bfs)
00342 {
00343   if (!bfs) return;
00344 
00345   short version;
00346   vsl_b_read(bfs,version);
00347   switch (version)
00348   {
00349     case (1):
00350       vsl_b_read(bfs,min_var_);
00351       break;
00352     default:
00353       vcl_cerr << "I/O ERROR: vsl_b_read(vsl_b_istream&, vpdfl_kernel_pdf_builder &)\n"
00354                << "           Unknown version number "<< version << vcl_endl;
00355       bfs.is().clear(vcl_ios::badbit); // Set an unrecoverable IO error on stream
00356       return;
00357   }
00358 }
00359 
00360 //: Read initialisation settings from a stream.
00361 // Parameters:
00362 // \verbatim
00363 // {
00364 //   min_var: 1.0e-6
00365 //   // kernel_widths can be fixed_width,select_equal,width_from_sep,adaptive
00366 //   kernel_widths: fixed_width
00367 //   // Width to be used when it is fixed_width
00368 //   fixed_width: 1.0
00369 // }
00370 // \endverbatim
00371 // \throw mbl_exception_parse_error if the parse fails.
00372 void vpdfl_kernel_pdf_builder::config_from_stream(vcl_istream & is)
00373 {
00374   vcl_string s = mbl_parse_block(is);
00375 
00376   vcl_istringstream ss(s);
00377   mbl_read_props_type props = mbl_read_props_ws(ss);
00378 
00379   double mv=1.0e-6;
00380   if (props.find("min_var")!=props.end())
00381   {
00382     mv=vul_string_atof(props["min_var"]);
00383     props.erase("min_var");
00384   }
00385   set_min_var(mv);
00386 
00387   build_type bt=select_equal;
00388   if (props.find("kernel_widths")!=props.end())
00389   {
00390     if (props["kernel_widths"]=="fixed_width") bt=fixed_width;
00391     else
00392     if (props["kernel_widths"]=="select_equal") bt=select_equal;
00393     else
00394     if (props["kernel_widths"]=="width_from_sep") bt=width_from_sep;
00395     else
00396     if (props["kernel_widths"]=="adaptive") bt=adaptive;
00397     else
00398     {
00399       vcl_string msg="Unknown kernel_width type : "+props["kernel_widths"];
00400       throw mbl_exception_parse_error(msg);
00401     }
00402     props.erase("kernel_widths");
00403   }
00404   build_type_ = bt;
00405 
00406   fixed_width_=1.0;
00407   if (props.find("fixed_width")!=props.end())
00408   {
00409     fixed_width_=vul_string_atof(props["fixed_width"]);
00410     props.erase("fixed_width");
00411   }
00412 
00413   try
00414   {
00415     mbl_read_props_look_for_unused_props(
00416         "vpdfl_kernel_pdf_builder::config_from_stream", props);
00417   }
00418 
00419   catch(mbl_exception_unused_props &e)
00420   {
00421     throw mbl_exception_parse_error(e.what());
00422   }
00423 }
00424