contrib/mul/pdf1d/pdf1d_kernel_pdf_builder.h
Go to the documentation of this file.
00001 // This is mul/pdf1d/pdf1d_kernel_pdf_builder.h
00002 #ifndef pdf1d_kernel_pdf_builder_h
00003 #define pdf1d_kernel_pdf_builder_h
00004 
00005 //:
00006 // \file
00007 // \author Tim Cootes
00008 // \brief Initialises kernel pdfs
00009 
00010 #include <pdf1d/pdf1d_builder.h>
00011 #include <vcl_iosfwd.h>
00012 
00013 //=======================================================================
00014 
00015 class pdf1d_kernel_pdf;
00016 
00017 //: Build kernel pdf objects.
00018 //  Contains algorithms for selecting kernel widths.
00019 //
00020 //  Simplest is to use equal widths (set_use_equal_width()).
00021 //
00022 //  More interesting is an adaptive kernel estimate (set_use_adaptive()).
00023 //  This tends to get results comparable with the equal width method for
00024 //  simple cases, but can match to more complex distributions more easily.
00025 //  In particular, it tends to approximate the tails more accurately.
00026 //
00027 //  See book on Density Estimation by B.W.Silverman (Pub. Chapman and Hall, 1986)
00028 //  for details.
00029 class pdf1d_kernel_pdf_builder : public pdf1d_builder
00030 {
00031  public:
00032   enum build_type { fixed_width, select_equal, width_from_sep, adaptive };
00033  private:
00034   //: Minimum variance of whole model
00035   double min_var_;
00036 
00037   //: Type of building to be performed
00038   build_type build_type_;
00039 
00040   //: Width set if fixed_width option on build used
00041   double fixed_width_;
00042 
00043   pdf1d_kernel_pdf& kernel_pdf(pdf1d_pdf& model) const;
00044  public:
00045 
00046   //: Dflt ctor
00047   pdf1d_kernel_pdf_builder();
00048 
00049   //: Destructor
00050   virtual ~pdf1d_kernel_pdf_builder();
00051 
00052   //: Use fixed width kernels of given width when building.
00053   void set_use_fixed_width(double width);
00054 
00055   //: Use equal width kernels of width depending on number of samples.
00056   // This method appears to give a lower density near the tails
00057   void set_use_equal_width();
00058 
00059   //: Kernel width proportional to distance to nearby samples.
00060   void set_use_width_from_separation();
00061 
00062   //: Build adaptive kernel estimate.
00063   void set_use_adaptive();
00064 
00065   //: Define lower threshold on variance for built models
00066   virtual void set_min_var(double min_var);
00067 
00068   //: Get lower threshold on variance for built models
00069   virtual double min_var() const;
00070 
00071   //: Build from n elements in data[i]
00072   virtual void build_from_array(pdf1d_pdf& model, const double* data, int n) const;
00073 
00074   //: Build default model with given mean
00075   virtual void build(pdf1d_pdf& model, double mean) const;
00076 
00077   //: Build model from data
00078   // The kernel centres in the pdf will have same value and order as
00079   // the training data
00080   virtual void build(pdf1d_pdf& model,
00081                      mbl_data_wrapper<double>& data) const;
00082 
00083   //: Build model from weighted data
00084   virtual void weighted_build(pdf1d_pdf& model,
00085                               mbl_data_wrapper<double>& data,
00086                               const vcl_vector<double>& wts) const;
00087 
00088   //: Build from n elements in data[i].  Fixed kernel width.
00089   // The kernel centres in the pdf will have same value and order as
00090   // the training data
00091   void build_fixed_width(pdf1d_kernel_pdf& kpdf,
00092                          const double* data, int n, double width) const;
00093 
00094   //: Build from n elements in data[i].  Chooses width.
00095   //  Same width selected for all points, using
00096   //  $w=(3n/4)^{-0.2}\sigma$, as suggested by Silverman
00097   //
00098   // The kernel centres in the pdf will have same value and order as
00099   // the training data
00100   void build_select_equal_width(pdf1d_kernel_pdf& kpdf,
00101                                 const double* data, int n) const;
00102 
00103   //: Kernel width proportional to distance to nearby samples.
00104   // The kernel centres in the pdf will have same value and order as
00105   // the training data
00106   void build_width_from_separation(pdf1d_kernel_pdf& kpdf,
00107                                    const double* data, int n) const;
00108 
00109   //: Build adaptive kernel estimate.
00110   //  Use equal widths to create a pilot estimate, then use the prob at each
00111   //  data point to modify the widths.
00112   // Uses Silverman's equation 5.8 with alpha = 0.5 as suggested, and
00113   // based on a pilot estimate as calculated by build_select_equal_width().
00114   // The kernel centres in the pdf will have same value and order as
00115   // the training data.
00116   //
00117   // This method gives a significantly higher density near the edges
00118   // of the distribution than suggested by the cumulative histogram of
00119   // the training data,
00120   // pushing the cdf estimate closer to 0.5 at the edges. If you want to
00121   // approximate the cumulative histogram more closely, then
00122   // build_select_equal_width() may be more effective.
00123   void build_adaptive(pdf1d_kernel_pdf& kpdf,
00124                       const double* data, int n) const;
00125 
00126   //: Version number for I/O
00127   short version_no() const;
00128 
00129   //: Name of the class
00130   virtual vcl_string is_a() const;
00131 
00132   //: Does the name of the class match the argument?
00133   virtual bool is_class(vcl_string const& s) const;
00134 
00135   //: Print class to os
00136   virtual void print_summary(vcl_ostream& os) const;
00137 
00138   //: Save class to binary file stream
00139   virtual void b_write(vsl_b_ostream& bfs) const;
00140 
00141   //: Load class from binary file stream
00142   virtual void b_read(vsl_b_istream& bfs);
00143 };
00144 
00145 #endif // pdf1d_kernel_pdf_builder_h