contrib/mul/pdf1d/tools/compare_kernel_estimates.cxx
Go to the documentation of this file.
00001 // This is mul/pdf1d/tools/compare_kernel_estimates.cxx
00002 
00003 //:
00004 // \file
00005 // \brief Program to test different kernel approximations
00006 // \author Tim Cootes
00007 //
00008 // We generate N samples from a known distribution
00009 //  e.g. unit Gaussian, flat or exponential.
00010 // We then use a variety of kernel estimation techniques
00011 // (e.g. fixed width, adaptive kernels etc)
00012 // to generate a kernel pdf from the samples.
00013 // We then estimate the Bhat. overlap with the true pdf.
00014 
00015 #include <vcl_iostream.h>
00016 #include <mbl/mbl_stats_1d.h>
00017 #include <vnl/vnl_vector.h>
00018 #include <pdf1d/pdf1d_compare_to_pdf_bhat.h>
00019 #include <pdf1d/pdf1d_compare_to_pdf_ks.h>
00020 #include <pdf1d/pdf1d_sampler.h>
00021 #include <pdf1d/pdf1d_exponential.h>
00022 #include <pdf1d/pdf1d_gaussian_kernel_pdf_builder.h>
00023 
00024 //: Compute how well data in x matches true pdf using n different comparators
00025 void test_comparison(vcl_vector<mbl_stats_1d>& B_stats,
00026                      const vnl_vector<double>& x,
00027                      const pdf1d_pdf& true_pdf,
00028                      vcl_vector<pdf1d_compare_to_pdf*> comparator)
00029 {
00030   int n = comparator.size();
00031   B_stats.resize(n);
00032 
00033   for (int i=0;i<n;++i)
00034   {
00035     double B = comparator[i]->compare(x.data_block(),x.size(),true_pdf);
00036     B_stats[i].obs(B);
00037   }
00038 }
00039 
00040 //: Compute how well data sampled from true_pdf matches true_pdf using n different comparators
00041 void test_comparison(vcl_vector<mbl_stats_1d>& B_stats,
00042                      int n_samples, int n_repeats,
00043                      const pdf1d_pdf& true_pdf,
00044                      vcl_vector<pdf1d_compare_to_pdf*> comparator)
00045 {
00046   vnl_vector<double> x(n_samples);
00047   pdf1d_sampler *sampler = true_pdf.new_sampler();
00048 
00049   for (int i=0;i<n_repeats;++i)
00050   {
00051     sampler->get_samples(x);
00052     test_comparison(B_stats,x,true_pdf,comparator);
00053   }
00054 
00055   delete sampler;
00056 }
00057 
00058 void test_comparison(int n_samples, int n_trials,
00059                      const pdf1d_pdf& true_pdf,
00060                      vcl_vector<pdf1d_compare_to_pdf*> comparator,
00061                      const vcl_vector<vcl_string>& name)
00062 {
00063   vcl_vector<mbl_stats_1d> B_stats;
00064 
00065   test_comparison(B_stats,n_samples,n_trials,true_pdf,comparator);
00066 
00067   vcl_cout<<"PDF: "<<true_pdf<<vcl_endl
00068           <<"Sampling "<<n_samples
00069           <<" values from pdf and computing overlap with kernel estimate.\n"
00070           <<"Averaging over "<<n_trials<<" trials."<<vcl_endl;
00071   for (unsigned int i=0;i<B_stats.size();++i)
00072   {
00073     vcl_cout<<name[i]<<" :\n"
00074             <<"Mean: "<<B_stats[i].mean()
00075             <<" Std.Err: "<<B_stats[i].stdError()<<vcl_endl;
00076   }
00077 }
00078 
00079 int main()
00080 {
00081   vcl_vector<pdf1d_compare_to_pdf*> comparator;
00082   vcl_vector<vcl_string> name;
00083 
00084 #if 0
00085   // Set up Gaussian estimator
00086   pdf1d_compare_to_pdf_bhat comp1;
00087   comp1.set_builder(pdf1d_gaussian_builder());
00088   comparator.push_back(&comp1);
00089   name.push_back("Single Gaussian estimate");
00090 
00091 
00092   // Set up Gaussian kernel estimator
00093   pdf1d_compare_to_pdf_bhat comp2;
00094   pdf1d_gaussian_kernel_pdf_builder k_builder2;
00095   k_builder2.set_use_fixed_width(0.1);
00096   comp2.set_builder(k_builder2);
00097   comparator.push_back(&comp2);
00098   name.push_back("Gaussian kernel estimate, width=0.1");
00099 #endif
00100 
00101   // Set up Gaussian kernel estimator
00102   pdf1d_compare_to_pdf_bhat comp3;
00103   pdf1d_gaussian_kernel_pdf_builder k_builder3;
00104   k_builder3.set_use_equal_width();
00105   comp3.set_builder(k_builder3);
00106   comparator.push_back(&comp3);
00107   name.push_back("Bhat. using Gaussian kernel estimate, width depends on n.samples");
00108 
00109   // Set up Gaussian kernel estimator
00110   pdf1d_compare_to_pdf_bhat comp4;
00111   pdf1d_gaussian_kernel_pdf_builder k_builder4;
00112   k_builder4.set_use_width_from_separation();
00113   comp4.set_builder(k_builder4);
00114   comparator.push_back(&comp4);
00115   name.push_back("Bhat. using Gaussian kernel estimate, width depends on local sample separation");
00116 
00117   // Set up adaptive Gaussian kernel estimator
00118   pdf1d_compare_to_pdf_bhat comp5;
00119   pdf1d_gaussian_kernel_pdf_builder k_builder5;
00120   k_builder5.set_use_adaptive();
00121   comp5.set_builder(k_builder5);
00122   comparator.push_back(&comp5);
00123   name.push_back("Bhat. using Adaptive Gaussian kernel estimate");
00124 
00125   // Try with KS statistic
00126   pdf1d_compare_to_pdf_ks comp_ks;
00127   comparator.push_back(&comp_ks);
00128   name.push_back("KS Statistic");
00129 
00130   int n_samples = 100;
00131   int n_trials = 1000;
00132 
00133   pdf1d_exponential true_pdf(1.0);
00134 #if 0
00135   pdf1d_gaussian true_pdf(0,1);
00136   pdf1d_flat true_pdf(0,1);
00137   // Let true pdf be n Gaussians at 0,1,2 with sd of 0.25
00138   pdf1d_gaussian_kernel_pdf true_pdf(2,1,0.25);
00139 #endif // 0
00140   test_comparison(n_samples,n_trials,true_pdf,comparator,name);
00141 
00142   return 0;
00143 }