Go to the documentation of this file.00001 #include "bsta_k_medoid.h"
00002
00003
00004 #include <vcl_cmath.h>
00005 #include <vcl_algorithm.h>
00006 #include <vcl_iostream.h>
00007 #include <vcl_cassert.h>
00008
00009 bsta_k_medoid::bsta_k_medoid(const unsigned n_elements, bool verbose)
00010 {
00011 n_elements_ = n_elements;
00012 distance_array_.resize(n_elements, n_elements);
00013 distance_array_.fill(0.0);
00014 verbose_ = verbose;
00015 }
00016
00017
00018
00019 bool bsta_k_medoid::is_medoid(const unsigned i) const
00020 {
00021 vcl_vector<unsigned>::const_iterator result;
00022 result = vcl_find(medoids_.begin(), medoids_.end(), i);
00023 return result != medoids_.end();
00024 }
00025
00026
00027
00028 bool bsta_k_medoid::in_cluster(const unsigned i, const unsigned k) const
00029 {
00030
00031 if (k>=this->k())
00032 return false;
00033 if (is_medoid(i))
00034 return i==k;
00035
00036 vcl_vector<unsigned>::const_iterator result;
00037 result = vcl_find(clusters_[k].begin(), clusters_[k].end(), i);
00038 return result != clusters_[k].end();
00039 }
00040
00041
00042
00043 double bsta_k_medoid::medoid_distance(const unsigned i) const
00044 {
00045 double d = 0;
00046 for (unsigned k = 0; k<this->k(); ++k)
00047 if (this->in_cluster(i, k))
00048 d = distance(i, this->medoid(k));
00049 return d;
00050 }
00051
00052
00053
00054 double bsta_k_medoid::total_distance(const unsigned k) const
00055 {
00056 assert(k<this->k());
00057 double d = 0;
00058 unsigned m = medoid(k);
00059 for (unsigned i = 0; i<this->size(k); ++i)
00060 d += distance(clusters_[k][i], m);
00061 return d;
00062 }
00063
00064
00065
00066
00067 double bsta_k_medoid::dc(const unsigned i, const unsigned j, const unsigned k)
00068 {
00069
00070 double d_ik = distance(i, k);
00071
00072
00073 double d_ij = distance(i, j);
00074
00075
00076 return d_ij - d_ik;
00077 }
00078
00079
00080
00081
00082 double bsta_k_medoid::dcm(const unsigned j, const unsigned k)
00083 {
00084
00085 double dk = 0, dj = 0;
00086 unsigned kmax = this->k();
00087 if (!kmax)
00088 return 0.0;
00089 unsigned count = 0;
00090 for (unsigned nk=0; nk<kmax; ++nk, ++count)
00091 {
00092 unsigned m = this->medoid(nk);
00093 if (m==k)
00094 continue;
00095 dk += this->distance(k, m);
00096 dj += this->distance(j, m);
00097 }
00098 if (!count)
00099 return 0;
00100
00101 double result = (dk-dj)/count;
00102 return result;
00103 }
00104
00105
00106
00107 void bsta_k_medoid::clear_clusters()
00108 {
00109 for (unsigned j=0; j<this->k(); ++j)
00110 clusters_[j].clear();
00111 }
00112
00113
00114
00115 void bsta_k_medoid::form_clusters()
00116 {
00117 this->clear_clusters();
00118 for (unsigned i = 0; i<n_elements_;++i)
00119 if (is_medoid(i))
00120 continue;
00121 else
00122 {
00123
00124 double dmin = HUGE_VAL;
00125 unsigned jmin=0;
00126 for (unsigned j=0; j<this->k(); ++j)
00127 if (distance(i,this->medoid(j))<dmin)
00128 {
00129 jmin = j;
00130 dmin = distance(i,this->medoid(j));
00131 }
00132
00133 clusters_[jmin].push_back(i);
00134 }
00135
00136 for (unsigned j=0; j<this->k(); ++j)
00137 clusters_[j].push_back(medoids_[j]);
00138 }
00139
00140
00141
00142 bool bsta_k_medoid::replace_medoid(const unsigned j, const unsigned k)
00143 {
00144 vcl_vector<unsigned>::iterator result;
00145 result = vcl_find(medoids_.begin(), medoids_.end(), k);
00146 if (result == medoids_.end())
00147 return false;
00148 (*result) = j;
00149 return true;
00150 }
00151
00152
00153
00154 bool bsta_k_medoid::test_medoid_swap(unsigned& mj, unsigned& mk)
00155 {
00156
00157 mj = n_elements_, mk = n_elements_;
00158
00159
00160 double Sdc_min = HUGE_VAL;
00161 unsigned jmin=0, kmin=0;
00162 for ( unsigned j = 0; j<n_elements_; ++j)
00163 if (is_medoid(j))
00164 continue;
00165 else
00166 for (unsigned k = 0; k<this->k(); ++k)
00167 {
00168 if (verbose_)
00169 {
00170 vcl_cout << "\n===== Current Medoids(";
00171 for (unsigned m = 0; m<this->k(); ++m)
00172 vcl_cout << medoid(m) << ' ';
00173 vcl_cout << ")\n"
00174 << "Checking Swap " << j << "->" << medoid(k) << '\n';
00175 }
00176 double Sdc = 0;
00177 unsigned count = 0;
00178
00179 for (unsigned i = 0; i<n_elements_;++i)
00180 if (is_medoid(i)||i==j)
00181 continue;
00182 else{
00183 Sdc += dc(i,j,medoid(k));
00184 ++count;}
00185
00186 if (count>0)
00187 Sdc /= count;
00188 double med_dist=this->dcm(j, medoid(k));
00189 double total = Sdc + med_dist;
00190
00191 if (verbose_)
00192 {
00193 vcl_cout << "Inter-element distance change " << Sdc << '\n'
00194 << "Inter-medoid distance change " << med_dist << '\n'
00195 << "Total change " << total << '\n';
00196 }
00197
00198 if (total < Sdc_min)
00199 {
00200 jmin = j;
00201 kmin = medoid(k);
00202 Sdc_min = total;
00203 }
00204 }
00205
00206 if (Sdc_min<0)
00207 {
00208 mj = jmin;
00209 mk = kmin;
00210 return true;
00211 }
00212 return false;
00213 }
00214
00215
00216
00217
00218 void bsta_k_medoid::do_clustering(const unsigned nk)
00219 {
00220 assert(nk<=n_elements_);
00221
00222
00223 medoids_.clear();
00224
00225 for (unsigned i = 0; i<nk; ++i)
00226 medoids_.push_back(i);
00227
00228 clusters_.clear();
00229 clusters_.resize(nk);
00230
00231 if (nk==n_elements_)
00232 {
00233 for (unsigned i = 0; i<nk; ++i)
00234 clusters_[i].push_back(i);
00235 return;
00236 }
00237
00238 this->form_clusters();
00239 unsigned mj, mk;
00240 while (test_medoid_swap(mj, mk))
00241 {
00242 replace_medoid(mj, mk);
00243 form_clusters();
00244 if (verbose_)
00245 {
00246 vcl_cout << "***Swapping " << mj << "->" << mk << "***\n";
00247 for (unsigned k = 0; k<this->k(); ++k)
00248 {
00249 vcl_cout << "Medoid[" << k << "] = " << medoid(k) << '\n'
00250 << "with cluster\n";
00251 for (unsigned j = 0; j<size(k); ++j)
00252 vcl_cout << clusters_[k][j] << ' ' ;
00253 vcl_cout << '\n'
00254 << "Total Cluster Distance = "
00255 << total_distance(k)<< '\n';
00256 }
00257 }
00258 }
00259 return;
00260 }