00001
00002 #ifndef bsta_sample_set_h_
00003 #define bsta_sample_set_h_
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #include <bsta/bsta_parzen_sphere.h>
00017 #include <bsta/bsta_mixture.h>
00018 #include <bsta/bsta_attributes.h>
00019 #include <bsta/bsta_gaussian_sphere.h>
00020 #include <bsta/bsta_gaussian_full.h>
00021 #include <vnl/vnl_vector_fixed.h>
00022 #include <vnl/vnl_matrix_fixed.h>
00023 #include <vcl_fstream.h>
00024 #include <vcl_utility.h>
00025 #include <vcl_iostream.h>
00026
00027 #define MIN_VAR_ 0.0001
00028
00029
00030
00031 template <class T, unsigned n>
00032 class bsta_sample_set : public bsta_parzen_sphere<T,n>
00033 {
00034 public:
00035
00036 typedef typename bsta_parzen_sphere<T,n>::vector_type vector_;
00037
00038
00039 bsta_sample_set(T bandwidth = T(1)) : bsta_parzen_sphere<T,n>() { this->set_bandwidth(bandwidth); }
00040
00041
00042
00043 bool mean(vector_ const& pt, vector_& out);
00044
00045
00046 void insert_sample(vector_ const& sample, T weight = T(1.0));
00047
00048 T weight(unsigned i) const { return weights_[i]; }
00049 vcl_vector<T>& weights() { return weights_; }
00050
00051
00052 void normalize_weights();
00053
00054
00055 void initialize_assignments();
00056 vcl_vector<int>& assignments() { return assignments_; }
00057 int assignment(unsigned i) const { return assignments_[i]; }
00058 void set_assignment(unsigned i, int mode) { assignments_[i] = mode; }
00059
00060
00061 bool mode_mean(int mode, vector_& out) const;
00062
00063 typename vcl_vector<typename bsta_parzen_sphere<T,n>::vector_type >::const_iterator samples_begin() const { return bsta_parzen<T,n>::samples_.begin(); }
00064 typename vcl_vector<typename bsta_parzen_sphere<T,n>::vector_type >::const_iterator samples_end() const { return bsta_parzen<T,n>::samples_.end(); }
00065
00066 typename vcl_vector<T >::const_iterator weights_begin() const { return weights_.begin(); }
00067 typename vcl_vector<T >::const_iterator weights_end() const { return weights_.end(); }
00068
00069 vcl_vector<int >::const_iterator assignments_begin() const { return assignments_.begin(); }
00070 vcl_vector<int >::const_iterator assignments_end() const { return assignments_.end(); }
00071
00072 bool check_initializations() const { return bsta_parzen<T,n>::samples_.size() == weights_.size() &&
00073 bsta_parzen<T,n>::samples_.size() == assignments_.size(); }
00074
00075
00076 int mode_size(int mode) const;
00077
00078
00079 T mode_weight(int mode) const;
00080
00081
00082 unsigned mode_cnt() const;
00083
00084
00085 T total_weight() const;
00086
00087 private:
00088
00089
00090
00091 vcl_vector<T> weights_;
00092
00093 vcl_vector<int> assignments_;
00094 };
00095
00096
00097 template <class T>
00098 bool bsta_sample_set_variance(const bsta_sample_set<T,1>& set, int mode, T min_var, T& out)
00099 {
00100 typedef typename vcl_vector<T >::const_iterator sit_t;
00101 typedef typename vcl_vector<T >::const_iterator wit_t;
00102 typedef typename vcl_vector<int >::const_iterator ait_t;
00103
00104 if (!set.check_initializations()) {
00105 vcl_cout << "Error in - bsta_sample_set<T,n>::mean() : assignments not initialized!\n";
00106 return false;
00107 }
00108
00109 T mv;
00110 set.mode_mean(mode, mv);
00111
00112 T sum(T(0));
00113 sit_t sit = set.samples_begin();
00114 wit_t wit = set.weights_begin();
00115 ait_t ait = set.assignments_begin();
00116 T nsamp = 0;
00117 for (; sit != set.samples_end(); ++sit, ++wit, ++ait) {
00118 if (*ait != mode)
00119 continue;
00120
00121 T s = (*sit-mv)*(*sit-mv);
00122 sum += (*wit)*s;
00123 nsamp += (*wit);
00124 }
00125 if (nsamp > 0) {
00126 out = sum / nsamp;
00127 if (out < min_var)
00128 out = min_var;
00129 return true;
00130 }
00131
00132 return false;
00133 }
00134
00135
00136 template <class T, unsigned n>
00137 bool bsta_sample_set_variance(const bsta_sample_set<T,n>& set, int mode, vnl_matrix_fixed<T,n,n>& out)
00138 {
00139 typedef typename vcl_vector<vnl_vector_fixed<T,n> >::const_iterator sit_t;
00140 typedef typename vcl_vector<T >::const_iterator wit_t;
00141 typedef typename vcl_vector<int >::const_iterator ait_t;
00142
00143 if (!set.check_initializations()) {
00144 vcl_cout << "Error in - bsta_sample_set<T,n>::mean() : assignments not initialized!\n";
00145 return false;
00146 }
00147
00148 vnl_vector_fixed<T,n> mv;
00149 set.mode_mean(mode, mv);
00150
00151 vnl_matrix_fixed<T,n,n> sum(T(0));
00152 sit_t sit = set.samples_begin();
00153 wit_t wit = set.weights_begin();
00154 ait_t ait = set.assignments_begin();
00155 T nsamp = 0;
00156 for (; sit != set.samples_end(); ++sit, ++wit, ++ait) {
00157 if (*ait != mode)
00158 continue;
00159
00160 vnl_vector_fixed<T,n> diff = (*sit)-mv;
00161 sum += (*wit)*outer_product(diff,diff);
00162 nsamp += (*wit);
00163 }
00164 if (nsamp > 0) {
00165 out = sum / nsamp;
00166
00167 return true;
00168 }
00169
00170 return false;
00171 }
00172
00173
00174 template <class T, unsigned n>
00175 bool bsta_sample_set_marginalize(const bsta_sample_set<T,n>& set, unsigned component, bsta_sample_set<T,1>& out_set)
00176 {
00177 typedef typename vcl_vector<vnl_vector_fixed<T,n> >::const_iterator sit_t;
00178 typedef typename vcl_vector<T >::const_iterator wit_t;
00179
00180 if (n <= component)
00181 return false;
00182
00183 sit_t sit = set.samples_begin();
00184 wit_t wit = set.weights_begin();
00185
00186 for (; sit != set.samples_end(); ++sit, ++wit) {
00187 out_set.insert_sample((*sit)[component], (*wit));
00188 }
00189
00190 return true;
00191 }
00192
00193 template <class T>
00194 bool bsta_sample_set_fit_distribution(const bsta_sample_set<T,1>& set, bsta_mixture<bsta_num_obs<bsta_gaussian_sphere<T,1> > >& out)
00195 {
00196 if (!set.check_initializations()) {
00197 vcl_cout << "Error in - bsta_sample_set<T,n>::mean() : assignments not initialized!\n";
00198 return false;
00199 }
00200
00201
00202 unsigned mode_cnt = set.mode_cnt();
00203
00204 while (out.num_components() != 0) {
00205 out.remove_last();
00206 }
00207
00208 T total_weight = set.total_weight();
00209 for (unsigned mi = 0; mi < mode_cnt; mi++) {
00210 T meanv;
00211 set.mode_mean(mi, meanv);
00212 T var;
00213 if (!bsta_sample_set_variance(set, mi, T(MIN_VAR_), var))
00214 return false;
00215 T w = set.mode_weight(mi);
00216 bsta_gaussian_sphere<T,1> gauss_d(meanv,var);
00217 bsta_num_obs<bsta_gaussian_sphere<T,1> > gauss(gauss_d, w);
00218 if (!out.insert(gauss, w/total_weight))
00219 return false;
00220 }
00221
00222 return true;
00223 }
00224
00225 template <class T, unsigned n>
00226 bool bsta_sample_set_fit_distribution(const bsta_sample_set<T,n>& set, bsta_mixture<bsta_num_obs<bsta_gaussian_full<T,n> > >& out)
00227 {
00228 if (!set.check_initializations()) {
00229 vcl_cout << "Error in - bsta_sample_set<T,n>::mean() : assignments not initialized!\n";
00230 return false;
00231 }
00232
00233
00234 unsigned mode_cnt = set.mode_cnt();
00235
00236 while (out.num_components() != 0) {
00237 out.remove_last();
00238 }
00239
00240 T total_weight = set.total_weight();
00241 for (unsigned mi = 0; mi < mode_cnt; mi++) {
00242 vnl_vector_fixed<T,n> meanv;
00243 set.mode_mean(mi, meanv);
00244 vnl_matrix_fixed<T,n,n> covar;
00245 if (!bsta_sample_set_variance(set, mi, covar))
00246 return false;
00247 T w = set.mode_weight(mi);
00248 bsta_gaussian_full<T,n> gauss_d(meanv,covar);
00249 bsta_num_obs<bsta_gaussian_full<T,n> > gauss(gauss_d, w);
00250 if (!out.insert(gauss, w/total_weight))
00251 return false;
00252 }
00253
00254 return true;
00255 }
00256
00257
00258
00259
00260 template <class T>
00261 T bsta_sample_set_log_likelihood(const bsta_sample_set<T,1>& set, bsta_num_obs<bsta_gaussian_sphere<T,1> >& dist, T total_weight)
00262 {
00263 if (!set.size()) {
00264 vcl_cout << "Error in - bsta_sample_set<T,n>::bsta_sample_set_log_likelihood() : assignments not initialized!\n";
00265 return T(0);
00266 }
00267
00268 T w = dist.num_observations;
00269 T p_dist = w/total_weight;
00270 T sum = T(0);
00271 for (unsigned i = 0; i < set.size(); i++) {
00272 if (vcl_sqrt(dist.sqr_mahalanobis_dist(set.sample(i))) < 3) {
00273 T p = T(vcl_log(dist.prob_density(set.sample(i))));
00274 T pw = T(vcl_log(p_dist));
00275 sum += p + pw;
00276 }
00277 }
00278
00279 return sum;
00280 }
00281
00282
00283
00284
00285 template <class T, unsigned n>
00286 T bsta_sample_set_log_likelihood(const bsta_sample_set<T,n>& set, bsta_num_obs<bsta_gaussian_full<T,n> >& dist, T total_weight)
00287 {
00288 if (!set.size()) {
00289 vcl_cout << "Error in - bsta_sample_set<T,n>::bsta_sample_set_log_likelihood() : assignments not initialized!\n";
00290 return T(0);
00291 }
00292
00293 T w = dist.num_observations;
00294 T p_dist = w/total_weight;
00295 T sum = T(0);
00296 for (unsigned i = 0; i < set.size(); i++) {
00297 if (vcl_sqrt(dist.sqr_mahalanobis_dist(set.sample(i))) < 3) {
00298 T p = T(vcl_log(dist.prob_density(set.sample(i))));
00299 T pw = T(vcl_log(p_dist));
00300 sum += p + pw;
00301 }
00302 }
00303
00304 return sum;
00305 }
00306
00307
00308
00309
00310 template <class T>
00311 T bsta_sample_set_log_likelihood(const bsta_sample_set<T,2>& set, bsta_num_obs<bsta_gaussian_sphere<T,1> >& dist0, T w0, bsta_num_obs<bsta_gaussian_sphere<T,1> >& dist1, T w1, T& w_sum)
00312 {
00313 if (!set.size()) {
00314 vcl_cout << "Error in - bsta_sample_set<T,n>::bsta_sample_set_log_likelihood() : set is empty!\n";
00315 return T(0);
00316 }
00317
00318 T total_weight = T(0);
00319 w_sum = T(0);
00320 T sum = T(0);
00321 unsigned cnt = 0;
00322 for (unsigned i = 0; i < set.size(); i++) {
00323
00324 T d0 = dist0.sqr_mahalanobis_dist(set.sample(i)[0]);
00325 T d1 = dist1.sqr_mahalanobis_dist(set.sample(i)[1]);
00326 T d0_sqrt = vcl_sqrt(d0);
00327 T d1_sqrt = vcl_sqrt(d1);
00328
00329 if (d0_sqrt < 3) {
00330 if (d1_sqrt < 3) {
00331 w_sum += set.weight(i);
00332 cnt++;
00333 } else {
00334 d1 = 9;
00335 }
00336 } else {
00337 d0 = 9;
00338 }
00339
00340 T p = dist0.dist_prob_density(d0);
00341 p *= w0;
00342 T p0 = T(vcl_log(p));
00343 p = dist1.dist_prob_density(d1);
00344 p *= w1;
00345 T p1 = T(vcl_log(p));
00346
00347 sum += p0 + p1;
00348
00349 total_weight += set.weight(i);
00350 }
00351
00352
00353 T prior = w_sum/total_weight;
00354 T log_prior = T(vcl_log(prior));
00355 T tot_log_prior = set.size()*log_prior;
00356 sum += tot_log_prior;
00357
00358 return sum;
00359 }
00360
00361
00362
00363 template<class T>
00364 bool bsta_sample_set_print_to_m(const bsta_sample_set<T,2>& set, vcl_ofstream& of)
00365 {
00366
00367 unsigned mode_cnt = set.mode_cnt();
00368
00369 of << "cmap = colormap(lines(" << mode_cnt << "));\n";
00370
00371 for (unsigned m = 0; m < mode_cnt; m++) {
00372 vcl_vector<vcl_pair<T,T> > points;
00373 for (unsigned i = 0; i < set.size(); i++) {
00374 if (set.assignment(i) == m)
00375 points.push_back(vcl_pair<T,T>(T(set.sample(i)[0]), T(set.sample(i)[1])));
00376 }
00377 if (points.size() > 0) {
00378 of << "x = [" << points[0].first;
00379 for (unsigned i = 1; i < points.size(); i++) {
00380 of << ", " << points[i].first;
00381 }
00382 of << "];\n";
00383 of << "y = [" << points[0].second;
00384 for (unsigned i = 1; i < points.size(); i++) {
00385 of << ", " << points[i].second;
00386 }
00387 of << "];\n";
00388 of << "h = plot(x,y,'or');\nset(h, 'Color', cmap(" << m+1 << ",:));\n";
00389 of << "hold on\n";
00390 }
00391
00392 vnl_vector_fixed<T,2> mode;
00393 set.mode_mean(m, mode);
00394 of << "xx = [" << mode[0] << "];\n";
00395 of << "yy = [" << mode[1] << "];\n";
00396 of << "h = plot(xx,yy,'+r');\nset(h, 'Color', cmap(" << m+1 << ",:));\n";
00397 of << "hold on\n";
00398 }
00399
00400 return true;
00401 }
00402
00403
00404 template<class T>
00405 bool bsta_sample_set_dist_print_to_m(const bsta_sample_set<T,2>& set, vcl_ofstream& of)
00406 {
00407
00408 unsigned mode_cnt = set.mode_cnt();
00409
00410 of << "cmap = colormap(lines(" << mode_cnt << "));\n";
00411
00412
00413 T min = T(10000), max = T(0);
00414 for (unsigned i = 0; i < set.size(); i++) {
00415 if (set.sample(i)[0] < min)
00416 min = set.sample(i)[0];
00417 if (set.sample(i)[0] > max)
00418 max = set.sample(i)[0];
00419 if (set.sample(i)[1] < min)
00420 min = set.sample(i)[1];
00421 if (set.sample(i)[1] > max)
00422 max = set.sample(i)[1];
00423 }
00424
00425 of << "[x,y] = meshgrid(" << min << ":.2:" << max << ", " << min << ":.2:" << max << ");\n";
00426
00427 T total_weight = set.total_weight();
00428 for (unsigned m = 0; m < mode_cnt; m++) {
00429 vnl_vector_fixed<T,2> mode;
00430 set.mode_mean(m, mode);
00431 of << "mu = [" << mode[0] << ' ' << mode[1] << "];\n";
00432 vnl_matrix_fixed<T,2,2> covar;
00433 if (!bsta_sample_set_variance(set, m, covar))
00434 return false;
00435 of << "sigma = [";
00436 for (unsigned r = 0; r < 2; r++) {
00437 for (unsigned c = 0; c < 2; c++)
00438 of << covar[r][c] << ' ';
00439 if (r == 0)
00440 of << ';';
00441 }
00442 of << "];\n";
00443 of << "X = [x(:) y(:)];\n";
00444 of << "p = " << set.mode_weight(m)/total_weight << "*mvnpdf(X, mu, sigma);\n";
00445 of << "c = " << (m+1) << "*ones(size(x));\n";
00446 of << "surf(x,y,reshape(p,size(x,1),size(x,2)),c);\n";
00447 of << "hold on\n";
00448 }
00449
00450 return true;
00451 }
00452
00453
00454 template<class T>
00455 bool bsta_sample_set_dist_print_to_m(const bsta_sample_set<T,1>& set, vcl_ofstream& of)
00456 {
00457
00458 unsigned mode_cnt = set.mode_cnt();
00459
00460 of << "cmap = colormap(lines(" << mode_cnt << "));\n";
00461
00462
00463 T min = T(10000), max = T(0);
00464 for (unsigned i = 0; i < set.size(); i++) {
00465 if (set.sample(i) < min)
00466 min = set.sample(i);
00467 if (set.sample(i) > max)
00468 max = set.sample(i);
00469 }
00470
00471 of << "X = [" << min << ":.2:" << max << "]';\n";
00472
00473 T total_weight = set.total_weight();
00474 for (unsigned m = 0; m < mode_cnt; m++) {
00475 T mode;
00476 set.mode_mean(m, mode);
00477 of << "mu = [" << mode << "];\n";
00478 T var;
00479 if (!bsta_sample_set_variance(set, m, MIN_VAR_, var))
00480 return false;
00481 of << "sigma = [" << var << "];\n";
00482 of << "p = " << set.mode_weight(m)/total_weight << "*mvnpdf(X, mu, sigma);\n";
00483 of << "hh = plot(X,p,'+r');\nset(hh, 'Color', cmap(" << m+1 << ",:));\n";
00484 of << "hold on\n";
00485 }
00486
00487 return true;
00488 }
00489
00490 #endif // bsta_sample_set_h_