contrib/mul/clsfy/clsfy_direct_boost_builder.h
Go to the documentation of this file.
00001 // This is mul/clsfy/clsfy_direct_boost_builder.h
00002 // Copyright: (C) 2000 British Telecommunications plc
00003 #ifndef clsfy_direct_boost_builder_h_
00004 #define clsfy_direct_boost_builder_h_
00005 //:
00006 // \file
00007 // \brief Describe a concrete classifier
00008 // \author dac
00009 // \date 2000-05-10
00010 
00011 #include <clsfy/clsfy_builder_base.h> // parent class
00012 #include <vcl_vector.h>
00013 #include <vcl_string.h>
00014 #include <vcl_iosfwd.h>
00015 #include <mbl/mbl_data_wrapper.h>
00016 #include <vnl/vnl_vector.h>
00017 class clsfy_builder_1d;
00018 class clsfy_classifier_base;
00019 class clsfy_direct_boost;
00020 
00021 //: Base for classes to build clsfy_classifier_base objects
00022 class clsfy_direct_boost_builder : public clsfy_builder_base
00023 {
00024   // Parameters of builder
00025 
00026   //: calc all threshold not just last one
00027   bool calc_all_thresholds_;
00028 
00029   //: proportion of sum of square distance from mean used to say whether two classifiers are too similar.
00030   // typically = 0.1 ish.
00031   double prop_;
00032 
00033 
00034   //: bool indicating whether or not to save data to disk.
00035   // NB useful to save data to disk, if don't have enough RAM
00036   // but also makes training very slow
00037   bool save_data_to_disk_;
00038 
00039   //: batch size
00040   // ie number of training examples held in RAM whilst sorting takes place
00041   int bs_;
00042 
00043   //: maximum number of classifiers found by Adaboost algorithm
00044   int max_n_clfrs_;
00045 
00046   //: pointer to 1d builder used to build each weak classifier
00047   clsfy_builder_1d* weak_builder_;
00048 
00049 //==============================private methods============================
00050 
00051   //: Calc threshold for current version of strong classifier
00052   double calc_threshold(clsfy_direct_boost& strong_classifier,
00053                         mbl_data_wrapper<vnl_vector<double> >& inputs,
00054                         const vcl_vector<unsigned>& outputs) const;
00055 
00056   //: Calc similarity between two 1d input vectors
00057   double calc_prop_same(const vcl_vector<bool>& vec1,
00058                         const vcl_vector<bool>& vec2) const;
00059 
00060  public:
00061 
00062   // Dflt ctor
00063   clsfy_direct_boost_builder();
00064 
00065   // Destructor
00066   virtual ~clsfy_direct_boost_builder();
00067 
00068   //: Create empty model
00069   virtual clsfy_classifier_base* new_classifier() const;
00070 
00071   //: set batch size
00072   void set_batch_size(int bs) { bs_ = bs; }
00073 
00074   //: set save data to disk bool
00075   void set_save_data_to_disk(bool x) { save_data_to_disk_ = x; }
00076 
00077   //: set max_n_clfrs
00078   void set_max_n_clfrs(int max_n_clfrs) { max_n_clfrs_ = max_n_clfrs; }
00079 
00080   //: set weak builder ( a pointer is retained )
00081   void set_weak_builder(clsfy_builder_1d& weak_builder)
00082   { weak_builder_ = &weak_builder; }
00083 
00084    //: set stuff
00085   void set_calc_all_thresholds(bool x) { calc_all_thresholds_ = x; }
00086   void set_prop(double prop) { prop_ = prop; }
00087 
00088   //: Build model from data
00089   // Return the mean error over the training set.
00090   // For many classifiers, you may use nClasses==1 to
00091   // indicate a binary classifier
00092   virtual double build(clsfy_classifier_base& model,
00093                        mbl_data_wrapper<vnl_vector<double> >& inputs,
00094                        unsigned nClasses,
00095                        const vcl_vector<unsigned> &outputs) const;
00096 
00097   //: Name of the class
00098   virtual vcl_string is_a() const;
00099 
00100   //: Name of the class
00101   virtual bool is_class(vcl_string const& s) const;
00102 
00103   //: Create a copy on the heap and return base class pointer
00104   virtual clsfy_builder_base* clone() const;
00105 
00106   //: Print class to os
00107   virtual void print_summary(vcl_ostream& os) const;
00108 
00109   //: Save class to binary file stream
00110   virtual void b_write(vsl_b_ostream& bfs) const;
00111 
00112   //: Load class from binary file stream
00113   virtual void b_read(vsl_b_istream& bfs);
00114 };
00115 
00116 //: Allows derived class to be loaded by base-class pointer
00117 void vsl_add_to_binary_loader(const clsfy_direct_boost_builder& b);
00118 
00119 //: Binary file stream output operator for class reference
00120 void vsl_b_write(vsl_b_ostream& bfs, const clsfy_direct_boost_builder& b);
00121 
00122 //: Binary file stream input operator for class reference
00123 void vsl_b_read(vsl_b_istream& bfs, clsfy_direct_boost_builder& b);
00124 
00125 //: Stream output operator for class reference
00126 void vsl_print_summary(vcl_ostream& os,const clsfy_direct_boost_builder& b);
00127 
00128 //: Stream output operator for class pointer
00129 void vsl_print_summary(vcl_ostream& os,const clsfy_direct_boost_builder* b);
00130 
00131 //: Stream output operator for class reference
00132 vcl_ostream& operator<<(vcl_ostream& os,const clsfy_direct_boost_builder& b);
00133 
00134 //: Stream output operator for class pointer
00135 vcl_ostream& operator<<(vcl_ostream& os,const clsfy_direct_boost_builder* b);
00136 
00137 #endif // clsfy_direct_boost_builder_h_