contrib/mul/pdf1d/tools/select_n_mixtures.cxx
Go to the documentation of this file.
00001 // This is mul/pdf1d/tools/select_n_mixtures.cxx
00002 
00003 //:
00004 // \file
00005 // \brief Program to explore selecting optimal number of mixture components
00006 // \author Tim Cootes
00007 //
00008 // We generate N samples from a known mixture model.
00009 // We then try to fit mixture models with different numbers
00010 // of components to the data, and select the one that
00011 // gives the largest mean overlap.
00012 
00013 #include <vcl_iostream.h>
00014 #include <vnl/vnl_vector.h>
00015 #include <pdf1d/pdf1d_compare_to_pdf_bhat.h>
00016 #include <pdf1d/pdf1d_sampler.h>
00017 #include <pdf1d/pdf1d_mixture_builder.h>
00018 #include <pdf1d/pdf1d_gaussian_builder.h>
00019 #include <pdf1d/pdf1d_gaussian_kernel_pdf.h>
00020 #include <pdf1d/pdf1d_gaussian_kernel_pdf_builder.h>
00021 
00022 vcl_ofstream ofs;
00023 
00024 //: Compute how well different forms of pdf match to data x.
00025 //  pdf[i] is the model built by builder[i]
00026 void test_form(vcl_vector<double>& B,
00027                const vnl_vector<double>& x,
00028                vcl_vector<pdf1d_builder*>& builder,
00029                pdf1d_compare_to_pdf_bhat& comparator)
00030 {
00031   // This is inefficient
00032   unsigned int n = builder.size();
00033   if (B.size()!=n) B.resize(n);
00034   vnl_vector<double> b;
00035   for (unsigned int i=0;i<n;++i)
00036   {
00037 //    B[i] = comparator.compare_form(x.data_block(),x.size(),*builder[i]);
00038     B[i] = comparator.bootstrap_compare_form(b,x.data_block(),x.size(),*builder[i],10);
00039     vcl_cout<<i+1<<") B: "<<B[i]<<vcl_endl;
00040     ofs<<i+1<<' '<<B[i]<<vcl_endl;
00041     for (unsigned int j=0;j<b.size();++j)
00042       ofs<<' '<<j+1<<' '<<b[j]<<vcl_endl;
00043   }
00044   vcl_cout<<"------------------------\n";
00045 }
00046 
00047 void select_form(vnl_vector<int>& histo,
00048                  int n_samples, int n_trials,
00049                  const pdf1d_pdf& true_pdf,
00050                  vcl_vector<pdf1d_builder*>& builder,
00051                  pdf1d_compare_to_pdf_bhat& comparator)
00052 {
00053   vnl_vector<double> x(n_samples);
00054   pdf1d_sampler *sampler = true_pdf.new_sampler();
00055 
00056   unsigned int n = builder.size();
00057   if (histo.size()!=n)
00058   {
00059     histo.set_size(n);
00060     histo.fill(0);
00061   }
00062 
00063   vcl_vector<double> B(n);
00064 
00065   for (int i=0;i<n_trials;++i)
00066   {
00067     sampler->get_samples(x);
00068     test_form(B,x,builder,comparator);
00069 
00070     int best_j=0;
00071     double best_B = B[0];
00072     for (unsigned int j=1;j<n;++j)
00073       if (B[j]>best_B)
00074       {
00075         best_B=B[j];
00076         best_j=j;
00077       }
00078 
00079     histo[best_j]+=1;
00080   }
00081 
00082   delete sampler;
00083 }
00084 
00085 void select_form(int n_samples, int n_trials, int max_comp,
00086                  const pdf1d_pdf& true_pdf,
00087                  pdf1d_compare_to_pdf_bhat& comparator)
00088 {
00089   vnl_vector<int> histo;
00090   pdf1d_gaussian_builder gauss_builder;
00091 
00092   vcl_vector<pdf1d_builder*> builder(max_comp);
00093   for (int i=0;i<max_comp;++i)
00094   {
00095     pdf1d_mixture_builder *b = new pdf1d_mixture_builder;
00096     b->init(gauss_builder,i+1);
00097     builder[i] = b;
00098   }
00099 
00100   select_form(histo,n_samples,n_trials,true_pdf,builder,comparator);
00101 
00102   vcl_cout<<"PDF: "<<true_pdf<<vcl_endl
00103           <<"Sampling "<<n_samples
00104           <<" values from pdf and computing overlap with kernel estimate.\n"
00105           <<"Averaging over "<<n_trials<<" trials.\n"
00106           <<"Number of times each number of components preferred:\n";
00107   for (int i=0;i<max_comp;++i)
00108   {
00109     vcl_cout<<i+1<<" components: "<<histo[i]<<vcl_endl;
00110   }
00111 
00112     // Tidy up
00113   for (int i=0;i<max_comp;++i)
00114     delete builder[i];
00115 }
00116 
00117 int main()
00118 {
00119   // Set up Gaussian kernel estimator
00120   pdf1d_compare_to_pdf_bhat comparator;
00121   pdf1d_gaussian_kernel_pdf_builder k_builder;
00122 //  k_builder.set_use_equal_width();
00123   k_builder.set_use_width_from_separation();
00124   comparator.set_builder(k_builder);
00125 
00126   int n_samples = 100;
00127   int n_trials = 1;
00128   int max_comp = 15;
00129   int n_mix = 3;
00130 
00131   // Let true pdf be n Gaussians at 0,1,2 with sd of 0.25
00132   pdf1d_gaussian_kernel_pdf true_pdf(n_mix,1,0.25);
00133 
00134   ofs.open("B_vs_N_mix.txt",vcl_ios::out);
00135   select_form(n_samples,n_trials,max_comp,true_pdf,comparator);
00136   ofs.close();
00137 
00138   vcl_cout<<"See B_vs_N_mix.txt\n";
00139 
00140   return 0;
00141 }