contrib/mul/pdf1d/pdf1d_kernel_pdf_builder.cxx
Go to the documentation of this file.
00001 // This is mul/pdf1d/pdf1d_kernel_pdf_builder.cxx
00002 
00003 //:
00004 // \file
00005 
00006 #include "pdf1d_kernel_pdf_builder.h"
00007 #include <vcl_cassert.h>
00008 #include <vcl_string.h>
00009 #include <vcl_cstdlib.h> // vcl_abort()
00010 #include <vcl_cmath.h>
00011 
00012 #include <vnl/vnl_vector_ref.h>
00013 #include <mbl/mbl_data_wrapper.h>
00014 #include <mbl/mbl_data_array_wrapper.h>
00015 #include <mbl/mbl_index_sort.h>
00016 #include <pdf1d/pdf1d_kernel_pdf.h>
00017 #include <pdf1d/pdf1d_calc_mean_var.h>
00018 
00019 //=======================================================================
00020 
00021 pdf1d_kernel_pdf_builder::pdf1d_kernel_pdf_builder()
00022     : min_var_(1.0e-6), build_type_(select_equal), fixed_width_(1.0)
00023 {
00024 }
00025 
00026 //=======================================================================
00027 
00028 pdf1d_kernel_pdf_builder::~pdf1d_kernel_pdf_builder()
00029 {
00030 }
00031 
00032 //=======================================================================
00033 
00034 pdf1d_kernel_pdf& pdf1d_kernel_pdf_builder::kernel_pdf(pdf1d_pdf& model) const
00035 {
00036   // require a pdf1d_kernel_pdf
00037   assert(model.is_class("pdf1d_kernel_pdf"));
00038   return static_cast<pdf1d_kernel_pdf&>(model);
00039 }
00040 
00041 //: Use fixed width kernels of given width when building.
00042 void pdf1d_kernel_pdf_builder::set_use_fixed_width(double width)
00043 {
00044   build_type_ = fixed_width;
00045   fixed_width_ = width;
00046 }
00047 
00048 //: Use equal width kernels of width depending on number of samples.
00049 void pdf1d_kernel_pdf_builder::set_use_equal_width()
00050 {
00051   build_type_ = select_equal;
00052 }
00053 
00054 //: Kernel width proportional to distance to nearby samples.
00055 void pdf1d_kernel_pdf_builder::set_use_width_from_separation()
00056 {
00057   build_type_ = width_from_sep;
00058 }
00059 
00060 //: Build adaptive kernel estimate.
00061 void pdf1d_kernel_pdf_builder::set_use_adaptive()
00062 {
00063   build_type_ = adaptive;
00064 }
00065 
00066 //=======================================================================
00067 //: Define lower threshold on variance for built models
00068 void pdf1d_kernel_pdf_builder::set_min_var(double min_var)
00069 {
00070   min_var_ = min_var;
00071 }
00072 
00073 //=======================================================================
00074 //: Get lower threshold on variance for built models
00075 double pdf1d_kernel_pdf_builder::min_var() const
00076 {
00077   return min_var_;
00078 }
00079 
00080 void pdf1d_kernel_pdf_builder::build(pdf1d_pdf& model, double mean) const
00081 {
00082   pdf1d_kernel_pdf& kpdf = kernel_pdf(model);
00083 
00084   vnl_vector<double> m(1);
00085   m[0] = mean;
00086   kpdf.set_centres(m,vcl_sqrt(min_var_));
00087 }
00088 
00089 //: Build kernel_pdf from n elements in data[i]
00090 void pdf1d_kernel_pdf_builder::build_from_array(pdf1d_pdf& model,
00091                                                 const double* data, int n) const
00092 {
00093   pdf1d_kernel_pdf& kpdf = kernel_pdf(model);
00094 
00095   if (n<1)
00096   {
00097     vcl_cerr<<"pdf1d_kernel_pdf_builder::build() No examples available.\n";
00098     vcl_abort();
00099   }
00100 
00101   // Initially just use Silverman's estimate
00102   // Later allow switching of alternative algorithms
00103   // e.g. width proportional to dist. to nearest point,
00104   //    adaptive kernel estimates etc.
00105 
00106   switch (build_type_)
00107   {
00108     case fixed_width:
00109     build_fixed_width(kpdf,data,n,fixed_width_);
00110     break;
00111     case select_equal:
00112     build_select_equal_width(kpdf,data,n);
00113     break;
00114     case width_from_sep:
00115     build_width_from_separation(kpdf,data,n);
00116     break;
00117     case adaptive:
00118     build_adaptive(kpdf,data,n);
00119     break;
00120     default:
00121     vcl_cerr<<"pdf1d_kernel_pdf_builder::build() Unknown build type.\n";
00122     vcl_abort();
00123   }
00124 }
00125 
00126 void pdf1d_kernel_pdf_builder::build(pdf1d_pdf& model, mbl_data_wrapper<double>& data) const
00127 {
00128   // pdf1d_kernel_pdf& kpdf = kernel_pdf(model); // unused
00129 
00130   int n = data.size();
00131 
00132   if (n<1)
00133   {
00134     vcl_cerr<<"pdf1d_kernel_pdf_builder::build() No examples available.\n";
00135     vcl_abort();
00136   }
00137 
00138   if (data.is_class("mbl_data_array_wrapper<T>"))
00139   {
00140     mbl_data_array_wrapper<double>& data_array =
00141                    static_cast<mbl_data_array_wrapper<double>&>(data);
00142     build_from_array(model,data_array.data(),n);
00143     return;
00144   }
00145 
00146   // Fill array with data
00147   vnl_vector<double> x(n);
00148   data.reset();
00149   for (int i=0;i<n;++i)
00150   {
00151     x[i]=data.current();
00152     data.next();
00153   }
00154 
00155   build_from_array(model,x.data_block(),n);
00156 }
00157 
00158 void pdf1d_kernel_pdf_builder::weighted_build(pdf1d_pdf& model,
00159                                               mbl_data_wrapper<double>& data,
00160                                               const vcl_vector<double>& /*wts*/) const
00161 {
00162   vcl_cerr<<"pdf1d_kernel_pdf_builder::weighted_build() Ignoring weights.\n";
00163   build(model,data);
00164 }
00165 
00166 //: Build from n elements in data[i]
00167 void pdf1d_kernel_pdf_builder::build_fixed_width(pdf1d_kernel_pdf& kpdf,
00168                                                  const double* data, int n, double width) const
00169 {
00170   vnl_vector_ref<double> v_data(n,const_cast<double*>(data)); // const violation
00171   kpdf.set_centres(v_data,width);
00172 }
00173 
00174 //: Build from n elements in data[i].  Chooses width.
00175 //  Same width selected for all points, using
00176 //  $w=(3n/4)^{-0.2}\sigma$, as suggested by Silverman
00177 void pdf1d_kernel_pdf_builder::build_select_equal_width(pdf1d_kernel_pdf& kpdf,
00178                                                         const double* data, int n) const
00179 {
00180   double m,var;
00181   pdf1d_calc_mean_var(m,var,data,n);
00182   if (var<min_var_) var=min_var_;
00183 
00184   double k_var = var*vcl_pow(4.0/(3*n),0.4);
00185   double w = vcl_sqrt(k_var);
00186 
00187   build_fixed_width(kpdf,data,n,w);
00188 }
00189 
00190 //: Return distance to closest neighbour to i0, not identical to i0
00191 //  Assumes index is sorted: data[index[i]] <= *data[index[i+1]]
00192 static double dist_to_neighbour(int i0, const double* data, const int *index, int n)
00193 {
00194   int k = 3;
00195   int ilo = i0-k; if (ilo<0) ilo=0;
00196   int ihi = i0+k; if (ihi>=n) ihi=n-1;
00197 
00198   return vcl_fabs(data[index[ihi]]-data[index[ilo]]);
00199 
00200 #if 0
00201   double di0 = data[index[i0]];
00202 
00203   const double min_diff = 1.0e-6;
00204   // Look below i0
00205   int i=i0-1;
00206   while (i>0 && vcl_fabs(data[i]-di0)<min_diff) --i;
00207   double d_lo = vcl_fabs(data[i]-di0);
00208 
00209   // Look above i0
00210   i=i0+1;
00211   int n1 = n-1;
00212   while (i<n1 && vcl_fabs(data[i]-di0)<min_diff) ++i;
00213   double d_hi = vcl_fabs(data[i]-di0);
00214 
00215   return d_hi+d_lo;
00216 #endif
00217 }
00218 
00219 //: Kernel width proportional to distance to nearby samples.
00220 void pdf1d_kernel_pdf_builder::build_width_from_separation(pdf1d_kernel_pdf& kpdf,
00221                                                            const double* data, int n) const
00222 {
00223   // Sort the data
00224   vcl_vector<int> index;
00225   mbl_index_sort(data, n, index);
00226 
00227   vnl_vector<double> width(n);
00228   double* w=width.data_block();
00229 
00230   double min_w = vcl_sqrt(min_var_)/n;
00231 
00232   for (int i=0;i<n;++i)
00233   {
00234     w[index[i]] = dist_to_neighbour(i, data, &index.front(), n);
00235     if (w[index[i]]<min_w) w[index[i]]=min_w;
00236   }
00237 
00238   kpdf.set_centres(vnl_vector_ref<double>(n, const_cast<double *>(data)), width);
00239 }
00240 
00241 //: Build adaptive kernel estimate.
00242 //  Use equal widths to create a pilot estimate, then use the prob at each
00243 //  data point to modify the widths
00244 void pdf1d_kernel_pdf_builder::build_adaptive(pdf1d_kernel_pdf& kpdf,
00245                                               const double* data, int n) const
00246 {
00247   // First build the pilot estimate
00248   build_select_equal_width(kpdf,data,n);
00249 
00250   // Evaluate the pdf at each point
00251   vnl_vector<double> log_p(n);
00252   for (int i=0;i<n;++i)
00253   {
00254     log_p[i]=kpdf.log_p(data[i]);
00255   }
00256 
00257   double log_mean = log_p.mean();
00258 
00259   vnl_vector<double> new_width = kpdf.width();
00260 
00261   for (int i=0;i<n;++i)
00262   {
00263     // Scale each inversely by sqrt(prob)
00264     new_width[i] *= vcl_exp(-0.5*(log_p[i]-log_mean));
00265   }
00266 
00267   kpdf.set_centres(kpdf.centre(),new_width);
00268 }
00269 
00270 //=======================================================================
00271 
00272 vcl_string pdf1d_kernel_pdf_builder::is_a() const
00273 {
00274   return vcl_string("pdf1d_kernel_pdf_builder");
00275 }
00276 
00277 //=======================================================================
00278 
00279 bool pdf1d_kernel_pdf_builder::is_class(vcl_string const& s) const
00280 {
00281   return pdf1d_builder::is_class(s) || s==pdf1d_kernel_pdf_builder::is_a();
00282 }
00283 
00284 //=======================================================================
00285 
00286 short pdf1d_kernel_pdf_builder::version_no() const
00287 {
00288   return 1;
00289 }
00290 
00291 //=======================================================================
00292 
00293 void pdf1d_kernel_pdf_builder::print_summary(vcl_ostream& os) const
00294 {
00295   os << "Min. var.: "<< min_var_;
00296 }
00297 
00298 //=======================================================================
00299 
00300 void pdf1d_kernel_pdf_builder::b_write(vsl_b_ostream& bfs) const
00301 {
00302   vsl_b_write(bfs,version_no());
00303   vsl_b_write(bfs,min_var_);
00304 }
00305 
00306 //=======================================================================
00307 
00308 void pdf1d_kernel_pdf_builder::b_read(vsl_b_istream& bfs)
00309 {
00310   if (!bfs) return;
00311 
00312   short version;
00313   vsl_b_read(bfs,version);
00314   switch (version)
00315   {
00316     case (1):
00317       vsl_b_read(bfs,min_var_);
00318       break;
00319     default:
00320       vcl_cerr << "I/O ERROR: vsl_b_read(vsl_b_istream&, pdf1d_kernel_pdf_builder &)\n"
00321                << "           Unknown version number "<< version << vcl_endl;
00322       bfs.is().clear(vcl_ios::badbit); // Set an unrecoverable IO error on stream
00323       return;
00324   }
00325 }
00326