contrib/brl/bbas/bsta/bsta_k_medoid.cxx
Go to the documentation of this file.
00001 #include "bsta_k_medoid.h"
00002 //:
00003 // \file
00004 #include <vcl_cmath.h> //for HUGE_VAL
00005 #include <vcl_algorithm.h> //for find
00006 #include <vcl_iostream.h>
00007 #include <vcl_cassert.h>
00008 
00009 bsta_k_medoid::bsta_k_medoid(const unsigned n_elements, bool verbose)
00010 {
00011   n_elements_ = n_elements;
00012   distance_array_.resize(n_elements, n_elements);
00013   distance_array_.fill(0.0);
00014   verbose_ = verbose;
00015 }
00016 
00017 //------------------------------------------------
00018 // Is an element a medoid?
00019 bool bsta_k_medoid::is_medoid(const unsigned i) const
00020 {
00021   vcl_vector<unsigned>::const_iterator result;
00022   result = vcl_find(medoids_.begin(), medoids_.end(), i);
00023   return result != medoids_.end();
00024 }
00025 
00026 //------------------------------------------------
00027 // is an element in cluster k ?
00028 bool bsta_k_medoid::in_cluster(const unsigned i, const unsigned k) const
00029 {
00030   //Easy checks first
00031   if (k>=this->k())
00032     return false;
00033   if (is_medoid(i))
00034     return i==k;
00035 
00036   vcl_vector<unsigned>::const_iterator result;
00037   result = vcl_find(clusters_[k].begin(), clusters_[k].end(), i);
00038   return result != clusters_[k].end();
00039 }
00040 
00041 //------------------------------------------------
00042 //the distance between an element and its medoid
00043 double bsta_k_medoid::medoid_distance(const unsigned i) const
00044 {
00045   double d = 0;
00046   for (unsigned k = 0; k<this->k(); ++k)
00047     if (this->in_cluster(i, k))
00048       d = distance(i, this->medoid(k));
00049   return d;
00050 }
00051 
00052 //---------------------------------------------------
00053 // The total distance between elements in the cluster and the cluster medoid
00054 double bsta_k_medoid::total_distance(const unsigned k) const
00055 {
00056   assert(k<this->k());
00057   double d = 0;
00058   unsigned m = medoid(k);//the medoid corresponding to index k
00059   for (unsigned i = 0; i<this->size(k); ++i)
00060     d += distance(clusters_[k][i], m);
00061   return d;
00062 }
00063 
00064 //--------------------------------------------------------
00065 // Compute the change in distance of swapping medoid j with
00066 // current medoid k with respect to element i
00067 double bsta_k_medoid::dc(const unsigned i, const unsigned j, const unsigned k)
00068 {
00069   //current distance
00070   double d_ik = distance(i, k);
00071 
00072   //distance to j
00073   double d_ij = distance(i, j);
00074 
00075   // distance change if swap were done
00076   return d_ij - d_ik;
00077 }
00078 
00079 //--------------------------------------------------------
00080 // Compute the change in distance of swapping medoid j with
00081 // current medoid k with respect to total distance between medoids
00082 double bsta_k_medoid::dcm(const unsigned j, const unsigned k)
00083 {
00084   //iterate over medoids
00085   double dk = 0, dj = 0;
00086   unsigned kmax = this->k();
00087   if (!kmax)
00088     return 0.0;
00089   unsigned count = 0;
00090   for (unsigned nk=0; nk<kmax; ++nk, ++count)
00091   {
00092     unsigned m = this->medoid(nk);
00093     if (m==k)
00094       continue;
00095     dk += this->distance(k, m);
00096     dj += this->distance(j, m);
00097   }
00098   if (!count)
00099     return 0;
00100   //negative change if inter-medoid distance increases
00101   double result = (dk-dj)/count;
00102   return result;
00103 }
00104 
00105 
00106 //: Clear the cluster vectors
00107 void bsta_k_medoid::clear_clusters()
00108 {
00109   for (unsigned j=0; j<this->k(); ++j)
00110     clusters_[j].clear();
00111 }
00112 
00113 
00114 //:assign non-medoids to clusters
00115 void bsta_k_medoid::form_clusters()
00116 {
00117   this->clear_clusters();
00118   for (unsigned i = 0; i<n_elements_;++i)
00119     if (is_medoid(i))
00120       continue;
00121     else
00122     {
00123       //find closest medoid
00124       double dmin = HUGE_VAL;
00125       unsigned jmin=0;
00126       for (unsigned j=0; j<this->k(); ++j)
00127         if (distance(i,this->medoid(j))<dmin)
00128         {
00129           jmin = j;
00130           dmin = distance(i,this->medoid(j));
00131         }
00132       //assign i to the closest (jmin)
00133       clusters_[jmin].push_back(i);
00134     }
00135   //put medoids into their own clusters
00136   for (unsigned j=0; j<this->k(); ++j)
00137     clusters_[j].push_back(medoids_[j]);
00138 }
00139 
00140 
00141 //:replace medoid k with medoid j
00142 bool bsta_k_medoid::replace_medoid(const unsigned j, const unsigned k)
00143 {
00144   vcl_vector<unsigned>::iterator result;
00145   result = vcl_find(medoids_.begin(), medoids_.end(), k);
00146   if (result == medoids_.end())
00147     return false;
00148   (*result) = j;
00149   return true;
00150 }
00151 
00152 
00153 //: Returns false if swap is not warranted
00154 bool bsta_k_medoid::test_medoid_swap(unsigned& mj, unsigned& mk)
00155 {
00156   //Impossible values
00157   mj = n_elements_, mk = n_elements_;
00158 
00159   // for each j not a medoid
00160   double Sdc_min = HUGE_VAL;
00161   unsigned jmin=0, kmin=0;
00162   for ( unsigned j = 0; j<n_elements_; ++j)
00163     if (is_medoid(j))
00164       continue;
00165     else
00166       for (unsigned k = 0; k<this->k(); ++k)
00167       {
00168         if (verbose_)
00169         {
00170           vcl_cout << "\n===== Current Medoids(";
00171           for (unsigned m = 0; m<this->k(); ++m)
00172             vcl_cout << medoid(m) << ' ';
00173           vcl_cout << ")\n"
00174                    << "Checking Swap " << j << "->" << medoid(k) << '\n';
00175         }
00176         double Sdc = 0;
00177         unsigned count = 0;
00178         //Sum up the effect of swapping j for k on each non-medoid element
00179         for (unsigned i = 0; i<n_elements_;++i)
00180           if (is_medoid(i)||i==j)
00181             continue;
00182           else{
00183             Sdc += dc(i,j,medoid(k));
00184             ++count;}
00185 
00186         if (count>0)
00187           Sdc /= count;
00188         double med_dist=this->dcm(j, medoid(k));
00189         double total = Sdc + med_dist;
00190 
00191         if (verbose_)
00192         {
00193           vcl_cout << "Inter-element distance change " << Sdc << '\n'
00194                    << "Inter-medoid distance change " << med_dist << '\n'
00195                    << "Total change " << total << '\n';
00196         }
00197 
00198         if (total < Sdc_min)
00199         {
00200           jmin = j;
00201           kmin = medoid(k);
00202           Sdc_min = total;
00203         }
00204       }
00205 
00206   if (Sdc_min<0)
00207   {
00208     mj = jmin;
00209     mk = kmin;
00210     return true;
00211   }
00212   return false;
00213 }
00214 
00215 //-------------------------------------------------------
00216 // Find the best set of k medoids
00217 //
00218 void bsta_k_medoid::do_clustering(const unsigned nk)
00219 {
00220   assert(nk<=n_elements_);
00221 
00222   // arbitrarily select k medoids
00223   medoids_.clear();
00224   // choose first k elements as medoids
00225   for (unsigned i = 0; i<nk; ++i)
00226     medoids_.push_back(i);
00227   // reset the clusters
00228   clusters_.clear();
00229   clusters_.resize(nk);
00230   //We are done if there are no non-medoids
00231   if (nk==n_elements_)
00232   {
00233     for (unsigned i = 0; i<nk; ++i)
00234       clusters_[i].push_back(i);
00235     return;
00236   }
00237   //otherwise
00238   this->form_clusters();
00239   unsigned mj, mk;//potential swap for medoids( swap j for k)
00240   while (test_medoid_swap(mj, mk))
00241   {
00242     replace_medoid(mj, mk);
00243     form_clusters();
00244     if (verbose_)
00245     {
00246       vcl_cout << "***Swapping " << mj << "->" << mk << "***\n";
00247       for (unsigned k = 0; k<this->k(); ++k)
00248       {
00249         vcl_cout << "Medoid[" << k << "] = " << medoid(k) << '\n'
00250                  << "with cluster\n";
00251         for (unsigned j = 0; j<size(k); ++j)
00252           vcl_cout << clusters_[k][j] << ' ' ;
00253         vcl_cout << '\n'
00254                  << "Total Cluster Distance = "
00255                  << total_distance(k)<< '\n';
00256       }
00257     }
00258   }
00259   return;
00260 }