Go to the documentation of this file.00001
00002 #ifdef VCL_NEEDS_PRAGMA_INTERFACE
00003 #pragma implementation
00004 #endif
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "clsfy_adaboost_sorted_builder.h"
00019 #include "clsfy_simple_adaboost.h"
00020 #include "clsfy_builder_1d.h"
00021
00022 #include <vcl_iostream.h>
00023 #include <vcl_cstdlib.h>
00024 #include <vcl_cmath.h>
00025 #include <vcl_ctime.h>
00026 #include <vcl_algorithm.h>
00027 #include <vcl_cassert.h>
00028 #include <vbl/vbl_triple.h>
00029 #include <mbl/mbl_file_data_collector.h>
00030 #include <mbl/mbl_data_collector_list.h>
00031
00032
00033
00034 clsfy_adaboost_sorted_builder::clsfy_adaboost_sorted_builder()
00035 : save_data_to_disk_(false), bs_(-1), max_n_clfrs_(-1), weak_builder_(0)
00036 {
00037 }
00038
00039
00040
00041 clsfy_adaboost_sorted_builder::~clsfy_adaboost_sorted_builder()
00042 {
00043 }
00044
00045
00046
00047
00048 bool clsfy_adaboost_sorted_builder::is_class(vcl_string const& s) const
00049 {
00050 return s == clsfy_adaboost_sorted_builder::is_a() || clsfy_builder_base::is_class(s);
00051 }
00052
00053
00054
00055 vcl_string clsfy_adaboost_sorted_builder::is_a() const
00056 {
00057 return vcl_string("clsfy_adaboost_sorted_builder");
00058 }
00059
00060
00061
00062
00063
00064 double clsfy_adaboost_sorted_builder::build(clsfy_classifier_base& model,
00065 mbl_data_wrapper<vnl_vector<double> >& inputs,
00066 unsigned ,
00067 const vcl_vector<unsigned> &outputs) const
00068 {
00069
00070
00071 assert( model.is_class("clsfy_simple_adaboost") );
00072 clsfy_simple_adaboost &strong_classifier = (clsfy_simple_adaboost&) model;
00073
00074
00075
00076 if ( max_n_clfrs_ < 0 )
00077 {
00078 vcl_cout<<"Error: clsfy_adaboost_sorted_builder::build\n"
00079 <<"max_n_clfrs_ = "<<max_n_clfrs_<<" ie < 0\n"
00080 <<"set using set_max_n_clfrs()\n";
00081 vcl_abort();
00082 }
00083 else
00084 {
00085 vcl_cout<<"Maximum number of classifiers to be found by Adaboost = "
00086 <<max_n_clfrs_<<'\n';
00087 }
00088
00089 if ( weak_builder_ == 0 )
00090 {
00091 vcl_cout<<"Error: clsfy_adaboost_sorted_builder::build\n"
00092 <<"weak_builder_ pointer has not been set\n"
00093 <<"need to provide a builder to build each weak classifier\n"
00094 <<"set using set_weak_builder()\n";
00095 vcl_abort();
00096 }
00097 else
00098 {
00099 vcl_cout<<"Weak learner used by AdaBoost = "
00100 <<weak_builder_->is_a()<<'\n';
00101 }
00102
00103 if ( bs_ < 0 )
00104 {
00105 vcl_cout<<"Error: clsfy_adaboost_sorted_builder::build\n"
00106 <<"bs_ = "<<bs_<<" ie < 0\n"
00107 <<"set using set_batch_size()\n";
00108 vcl_abort();
00109 }
00110 else
00111 {
00112 vcl_cout<<"Batch size when sorting data =" <<bs_<<'\n';
00113 }
00114
00115
00116 assert(bs_>0);
00117 assert(bs_!=1);
00118 assert (max_n_clfrs_ >= 0);
00119
00120
00121
00122
00123
00124
00125
00126 unsigned n= inputs.size();
00127
00128
00129
00130 inputs.reset();
00131 int d = inputs.current().size();
00132
00133
00134
00135
00136
00137 vcl_string temp_path= "temp.dat";
00138 mbl_file_data_collector<
00139 vcl_vector< vbl_triple<double,int,int> >
00140 >
00141 file_collector( temp_path );
00142
00143 mbl_data_collector_list<
00144 vcl_vector< vbl_triple<double,int,int> >
00145 >
00146 ram_collector;
00147
00148 mbl_data_collector<vcl_vector< vbl_triple<double,int,int> >
00149 >* collector;
00150
00151 if (save_data_to_disk_)
00152 {
00153 vcl_cout<<"saving data to disk!\n";
00154 collector= &file_collector;
00155 }
00156 else
00157 {
00158
00159 vcl_cout<<"saving data to ram!\n";
00160 collector= &ram_collector;
00161 }
00162
00163
00164
00165
00166
00167
00168
00169 vcl_vector< vcl_vector< vbl_triple<double,int,int> > >vec(bs_);
00170 vbl_triple<double,int,int> t;
00171
00172 vcl_cout<<"d= "<<d<<'\n';
00173 int b=0;
00174 while ( b+1<d )
00175 {
00176 int r= vcl_min ( bs_, (d-b) );
00177 assert(r>0);
00178
00179 vcl_cout<<"sorting weak classifiers = "<<b<<" to "
00180 <<(b+r)-1<<" of "<<d<<'\n';
00181
00182
00183
00184 for (int i=0; i< bs_; ++i)
00185 vec[i].resize(0);
00186
00187
00188 inputs.reset();
00189 for (unsigned int j=0;j<n;++j)
00190 {
00191 for (int i=0; i< r; ++i)
00192 {
00193 t.first=inputs.current()[b+i];
00194 t.second=outputs[j];
00195 t.third = j;
00196 vec[i].push_back(t);
00197 }
00198 inputs.next();
00199 }
00200
00201
00202 for (int i=0; i< r; ++i)
00203 {
00204
00205 assert (vec[i].size() == n);
00206 assert (n != 0);
00207
00208 vcl_sort(vec[i].begin(), vec[i].end() );
00209
00210
00211 collector->record(vec[i]);
00212 }
00213
00214 b+=bs_;
00215 }
00216
00217
00218 mbl_data_wrapper< vcl_vector< vbl_triple<double,int,int> > >&
00219 wrapper=collector->data_wrapper();
00220
00221
00222
00223 wrapper.reset();
00224 assert ( wrapper.current().size() == n );
00225 assert ( d == (int)wrapper.size() );
00226
00227
00228
00229 unsigned n0=0;
00230 unsigned n1=0;
00231 for (unsigned int i=0;i<n;++i)
00232 {
00233 if ( outputs[i] == 0 )
00234 n0++;
00235 else if ( outputs[i] == 1 )
00236 n1++;
00237 else
00238 {
00239 vcl_cout<<"Error : clsfy_adaboost_sorted_builder\n"
00240 <<"unrecognised output value : outputs["<<i<<"]= "
00241 <<outputs[i]<<'\n';
00242 vcl_abort();
00243 }
00244 }
00245
00246 assert (n0+n1==n );
00247
00248 vnl_vector<double> wts(n);
00249 for (unsigned int i=0; i<n; ++i)
00250 {
00251 if ( outputs[i] == 0 )
00252 wts(i)=0.5/n0;
00253 else if ( outputs[i] == 1 )
00254 wts(i)=0.5/n1;
00255 else
00256 {
00257 vcl_cout<<"Error : clsfy_adaboost_sorted_builder\n"
00258 <<"unrecognised output value : outputs["<<i<<"]= "
00259 <<outputs[i]<<'\n';
00260 vcl_abort();
00261 }
00262 }
00263
00264
00265
00266
00267
00268
00269 strong_classifier.clear();
00270 strong_classifier.set_n_dims(d);
00271
00272 clsfy_classifier_1d* c1d = weak_builder_->new_classifier();
00273 clsfy_classifier_1d* best_c1d= weak_builder_->new_classifier();
00274
00275 double beta, alpha;
00276 long old_time = vcl_clock();
00277 double tot_time=0;
00278
00279 for (unsigned int r=0;r<(unsigned)max_n_clfrs_;++r)
00280 {
00281 vcl_cout<<"adaboost training round = "<<r<<'\n';
00282
00283
00284
00285 int best_i=-1;
00286 double min_error= 100000;
00287 wrapper.reset();
00288 for (int i=0;i<d;++i)
00289 {
00290 const vcl_vector< vbl_triple<double,int,int> >& vec = wrapper.current();
00291
00292 double error = weak_builder_->build_from_sorted_data(*c1d,&vec[0],wts);
00293 if (i==0 || error<min_error)
00294 {
00295 min_error = error;
00296 delete best_c1d;
00297 best_c1d= c1d->clone();
00298 best_i = i;
00299 }
00300
00301 wrapper.next();
00302 }
00303
00304 assert(best_i != -1);
00305
00306 vcl_cout<<"best_i= "<<best_i<<'\n'
00307 <<"min_error= "<<min_error<<'\n';
00308
00309 if (min_error<1e-10)
00310 {
00311 vcl_cout<<"min_error<1e-10 !!!\n";
00312 alpha = vcl_log(2.0*n);
00313 strong_classifier.add_classifier( best_c1d, alpha, best_i);
00314
00315
00316 delete c1d;
00317 delete best_c1d;
00318 return clsfy_test_error(strong_classifier, inputs, outputs);
00319 }
00320
00321
00322 if (0.5-min_error<1e-10)
00323 {
00324 vcl_cout<<"min_error => 0.5 !!!\n";
00325 beta=1.0;
00326
00327
00328 delete c1d;
00329 delete best_c1d;
00330 return clsfy_test_error(strong_classifier, inputs, outputs);
00331 }
00332
00333
00334 beta = min_error/(1.0-min_error);
00335 alpha = -1.0*vcl_log(beta);
00336 strong_classifier.add_classifier( best_c1d, alpha, best_i);
00337
00338
00339
00340 wrapper.set_index(best_i);
00341 const vcl_vector< vbl_triple<double,int,int> >& vec = wrapper.current();
00342
00343
00344 for (unsigned int j=0;j<n;++j)
00345 if ( best_c1d-> classify( vec[j].first ) == (unsigned) vec[j].second)
00346 wts[vec[j].third]*=beta;
00347
00348 double w_sum= wts.mean()*n;
00349 wts/=w_sum;
00350
00351 long new_time = vcl_clock();
00352
00353 double dt = (1.0*(new_time-old_time))/CLOCKS_PER_SEC;
00354 vcl_cout<<"Time for AdaBoost round: "<<dt<<" secs\n";
00355 tot_time+=dt;
00356 vcl_cout<<"Total time for rounds so far: "<<tot_time<<" secs\n";
00357
00358 old_time = new_time;
00359 }
00360
00361 delete c1d;
00362 delete best_c1d;
00363
00364
00365
00366
00367 vcl_cout<<"calculating training error\n";
00368 return clsfy_test_error(strong_classifier, inputs, outputs);
00369 }
00370
00371
00372
00373
00374 clsfy_classifier_base* clsfy_adaboost_sorted_builder::new_classifier() const
00375 {
00376 return new clsfy_simple_adaboost();
00377 }
00378
00379
00380
00381
00382 #if 0
00383
00384 clsfy_adaboost_sorted_builder::clsfy_adaboost_sorted_builder(const clsfy_adaboost_sorted_builder& new_b):
00385 data_ptr_(0)
00386 {
00387 *this = new_b;
00388 }
00389
00390
00391
00392
00393
00394 clsfy_adaboost_sorted_builder& clsfy_adaboost_sorted_builder::operator=(const clsfy_adaboost_sorted_builder& new_b)
00395 {
00396 if (&new_b==this) return *this;
00397
00398
00399 delete data_ptr_; data_ptr_=0;
00400
00401 if (new_b.data_ptr_)
00402 data_ptr_ = new_b.data_ptr_->clone();
00403
00404
00405 data_ = new_b.data_;
00406
00407 return *this;
00408 }
00409 #endif // 0
00410
00411
00412
00413 clsfy_builder_base* clsfy_adaboost_sorted_builder::clone() const
00414 {
00415 return new clsfy_adaboost_sorted_builder(*this);
00416 }
00417
00418
00419
00420
00421 void clsfy_adaboost_sorted_builder::print_summary(vcl_ostream& ) const
00422 {
00423 #if 0
00424 clsfy_builder_base::print_summary(os);
00425 vsl_print_summary(os, data_);
00426 #endif
00427
00428 vcl_cerr << "clsfy_adaboost_sorted_builder::print_summary() NYI\n";
00429 }
00430
00431
00432
00433
00434 void clsfy_adaboost_sorted_builder::b_write(vsl_b_ostream& ) const
00435 {
00436 #if 0
00437 vsl_b_write(bfs, version_no());
00438 clsfy_builder_base::b_write(bfs);
00439 vsl_b_write(bfs, data_);
00440 #endif
00441 vcl_cerr << "clsfy_adaboost_sorted_builder::b_write() NYI\n";
00442 }
00443
00444
00445
00446
00447 void clsfy_adaboost_sorted_builder::b_read(vsl_b_istream& )
00448 {
00449 vcl_cerr << "clsfy_adaboost_sorted_builder::b_read() NYI\n";
00450 #if 0
00451 if (!bfs) return;
00452
00453 short version;
00454 vsl_b_read(bfs,version);
00455 switch (version)
00456 {
00457 case 1:
00458
00459 vsl_b_read(bfs,data_);
00460 break;
00461 default:
00462 vcl_cerr << "I/O ERROR: vsl_b_read(vsl_b_istream&, clsfy_adaboost_sorted_builder&)\n"
00463 << " Unknown version number "<< version << '\n';
00464 bfs.is().clear(vcl_ios::badbit);
00465 return;
00466 }
00467 #endif // 0
00468 }