00001 // This is mul/pdf1d/pdf1d_kernel_pdf_builder.h 00002 #ifndef pdf1d_kernel_pdf_builder_h 00003 #define pdf1d_kernel_pdf_builder_h 00004 00005 //: 00006 // \file 00007 // \author Tim Cootes 00008 // \brief Initialises kernel pdfs 00009 00010 #include <pdf1d/pdf1d_builder.h> 00011 #include <vcl_iosfwd.h> 00012 00013 //======================================================================= 00014 00015 class pdf1d_kernel_pdf; 00016 00017 //: Build kernel pdf objects. 00018 // Contains algorithms for selecting kernel widths. 00019 // 00020 // Simplest is to use equal widths (set_use_equal_width()). 00021 // 00022 // More interesting is an adaptive kernel estimate (set_use_adaptive()). 00023 // This tends to get results comparable with the equal width method for 00024 // simple cases, but can match to more complex distributions more easily. 00025 // In particular, it tends to approximate the tails more accurately. 00026 // 00027 // See book on Density Estimation by B.W.Silverman (Pub. Chapman and Hall, 1986) 00028 // for details. 00029 class pdf1d_kernel_pdf_builder : public pdf1d_builder 00030 { 00031 public: 00032 enum build_type { fixed_width, select_equal, width_from_sep, adaptive }; 00033 private: 00034 //: Minimum variance of whole model 00035 double min_var_; 00036 00037 //: Type of building to be performed 00038 build_type build_type_; 00039 00040 //: Width set if fixed_width option on build used 00041 double fixed_width_; 00042 00043 pdf1d_kernel_pdf& kernel_pdf(pdf1d_pdf& model) const; 00044 public: 00045 00046 //: Dflt ctor 00047 pdf1d_kernel_pdf_builder(); 00048 00049 //: Destructor 00050 virtual ~pdf1d_kernel_pdf_builder(); 00051 00052 //: Use fixed width kernels of given width when building. 00053 void set_use_fixed_width(double width); 00054 00055 //: Use equal width kernels of width depending on number of samples. 00056 // This method appears to give a lower density near the tails 00057 void set_use_equal_width(); 00058 00059 //: Kernel width proportional to distance to nearby samples. 00060 void set_use_width_from_separation(); 00061 00062 //: Build adaptive kernel estimate. 00063 void set_use_adaptive(); 00064 00065 //: Define lower threshold on variance for built models 00066 virtual void set_min_var(double min_var); 00067 00068 //: Get lower threshold on variance for built models 00069 virtual double min_var() const; 00070 00071 //: Build from n elements in data[i] 00072 virtual void build_from_array(pdf1d_pdf& model, const double* data, int n) const; 00073 00074 //: Build default model with given mean 00075 virtual void build(pdf1d_pdf& model, double mean) const; 00076 00077 //: Build model from data 00078 // The kernel centres in the pdf will have same value and order as 00079 // the training data 00080 virtual void build(pdf1d_pdf& model, 00081 mbl_data_wrapper<double>& data) const; 00082 00083 //: Build model from weighted data 00084 virtual void weighted_build(pdf1d_pdf& model, 00085 mbl_data_wrapper<double>& data, 00086 const vcl_vector<double>& wts) const; 00087 00088 //: Build from n elements in data[i]. Fixed kernel width. 00089 // The kernel centres in the pdf will have same value and order as 00090 // the training data 00091 void build_fixed_width(pdf1d_kernel_pdf& kpdf, 00092 const double* data, int n, double width) const; 00093 00094 //: Build from n elements in data[i]. Chooses width. 00095 // Same width selected for all points, using 00096 // $w=(3n/4)^{-0.2}\sigma$, as suggested by Silverman 00097 // 00098 // The kernel centres in the pdf will have same value and order as 00099 // the training data 00100 void build_select_equal_width(pdf1d_kernel_pdf& kpdf, 00101 const double* data, int n) const; 00102 00103 //: Kernel width proportional to distance to nearby samples. 00104 // The kernel centres in the pdf will have same value and order as 00105 // the training data 00106 void build_width_from_separation(pdf1d_kernel_pdf& kpdf, 00107 const double* data, int n) const; 00108 00109 //: Build adaptive kernel estimate. 00110 // Use equal widths to create a pilot estimate, then use the prob at each 00111 // data point to modify the widths. 00112 // Uses Silverman's equation 5.8 with alpha = 0.5 as suggested, and 00113 // based on a pilot estimate as calculated by build_select_equal_width(). 00114 // The kernel centres in the pdf will have same value and order as 00115 // the training data. 00116 // 00117 // This method gives a significantly higher density near the edges 00118 // of the distribution than suggested by the cumulative histogram of 00119 // the training data, 00120 // pushing the cdf estimate closer to 0.5 at the edges. If you want to 00121 // approximate the cumulative histogram more closely, then 00122 // build_select_equal_width() may be more effective. 00123 void build_adaptive(pdf1d_kernel_pdf& kpdf, 00124 const double* data, int n) const; 00125 00126 //: Version number for I/O 00127 short version_no() const; 00128 00129 //: Name of the class 00130 virtual vcl_string is_a() const; 00131 00132 //: Does the name of the class match the argument? 00133 virtual bool is_class(vcl_string const& s) const; 00134 00135 //: Print class to os 00136 virtual void print_summary(vcl_ostream& os) const; 00137 00138 //: Save class to binary file stream 00139 virtual void b_write(vsl_b_ostream& bfs) const; 00140 00141 //: Load class from binary file stream 00142 virtual void b_read(vsl_b_istream& bfs); 00143 }; 00144 00145 #endif // pdf1d_kernel_pdf_builder_h