00001
00002 #include "sdet_k_means.h"
00003
00004
00005
00006 #include <vcl_algorithm.h>
00007 #include <vcl_iostream.h>
00008 #include <vcl_cstdlib.h>
00009 #include <vcl_cassert.h>
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 unsigned sdet_k_means(vcl_vector<vnl_vector<double> > &data, unsigned& k,
00031 vcl_vector<vnl_vector<double> >* cluster_centres,
00032 vcl_vector<unsigned> * partition
00033 )
00034 {
00035 unsigned n_data = data.size();
00036 if(n_data==0){
00037 vcl_cout << "no data to process in sdet_k_means\n";
00038 return 0;
00039 }
00040 vcl_vector<vnl_vector<double> > & centres = *cluster_centres;
00041
00042 vcl_vector<unsigned> * p_partition;
00043
00044 unsigned didx = 0;
00045
00046 unsigned dims = data[didx].size();
00047
00048 vcl_vector<vnl_vector<double> > sums(k, vnl_vector<double>(dims, 0.0));
00049 vcl_vector<unsigned> nNearest(k,0);
00050 unsigned i;
00051 unsigned iterations =0;
00052
00053 assert(data.size() >= k);
00054
00055 bool initialise_from_clusters = false;
00056
00057
00058 if (partition)
00059 {
00060 p_partition = partition;
00061 if (p_partition->size() != data.size())
00062 {
00063 p_partition->resize(data.size());
00064 vcl_fill(p_partition->begin(), p_partition->end(), 0);
00065 }
00066 else initialise_from_clusters = true;
00067 }
00068 else
00069 p_partition = new vcl_vector<unsigned>(int(data.size()), 0u);
00070
00071
00072
00073
00074 if (centres.size() != k)
00075 {
00076 centres.resize(k);
00077 for (i=0; i<k; ++i)
00078 {
00079 centres[i] = data[didx];
00080 sums[i] += data[didx];
00081 nNearest[i]++;
00082 didx++;
00083 }
00084 }
00085 else if (initialise_from_clusters)
00086 {
00087 do
00088 {
00089 sums[(*p_partition)[didx] ] += data[didx];
00090 nNearest[(*p_partition)[didx] ]++;
00091 } while (didx++<n_data);
00092
00093 for (i=0; i<k; ++i)
00094 centres[i] = sums[i]/nNearest[i];
00095 didx = 0;
00096 vcl_fill(sums.begin(), sums.end(), vnl_vector<double>(dims, 0.0));
00097 vcl_fill(nNearest.begin(), nNearest.end(), 0);
00098 }
00099
00100 bool changed = true;
00101 while (changed)
00102 {
00103 changed = false;
00104 do
00105 {
00106 unsigned bestCentre = 0;
00107 double bestDist = vnl_vector_ssd(centres[0], data[didx]);
00108 for (i=1; i<k; ++i)
00109 {
00110 double dist = vnl_vector_ssd(centres[i], data[didx]);
00111 if (dist < bestDist)
00112 {
00113 bestDist = dist;
00114 bestCentre = i;
00115 }
00116 }
00117 sums[bestCentre] += data[didx];
00118 nNearest[bestCentre] ++;
00119 if (bestCentre != (*p_partition)[didx])
00120 {
00121 changed = true;
00122 (*p_partition)[didx] = bestCentre;
00123 }
00124 } while (++didx<n_data);
00125
00126
00127
00128 for (i=0; i<k; ++i)
00129 {
00130 if ( nNearest[i] == 0)
00131 {
00132 k--;
00133 centres.erase(centres.begin()+i);
00134 sums.erase(sums.begin()+i);
00135 nNearest.erase(nNearest.begin()+i);
00136
00137 for (unsigned j=0; j<p_partition->size(); ++j)
00138 {
00139 assert ((*p_partition)[j] = i);
00140 if ((*p_partition)[j] > i) (*p_partition)[j]--;
00141 }
00142
00143 changed= true;
00144 }
00145 }
00146
00147
00148 for (i=0; i<k; ++i)
00149 centres[i] = sums[i]/nNearest[i];
00150
00151
00152
00153 didx = 0;
00154 vcl_fill(sums.begin(), sums.end(), vnl_vector<double>(dims, 0.0));
00155 vcl_fill(nNearest.begin(), nNearest.end(), 0);
00156 iterations ++;
00157 }
00158
00159 if (!partition)
00160 delete p_partition;
00161
00162 return iterations;
00163 }
00164
00165 static inline void incXbyYv(vnl_vector<double> *X, const vnl_vector<double> &Y, double v)
00166 {
00167 assert(X->size() == Y.size());
00168 assert(X->size() > 0);
00169 int i = ((int)X->size()) - 1;
00170 double * const pX=X->data_block();
00171 while (i >= 0)
00172 {
00173 pX[i] += Y[i] * v;
00174 i--;
00175 }
00176 }
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200 unsigned sdet_k_means_weighted(vcl_vector<vnl_vector<double> > &data,
00201 unsigned& k,
00202 const vcl_vector<double>& wts,
00203 vcl_vector<vnl_vector<double> >* cluster_centres,
00204 vcl_vector<unsigned> * partition
00205 )
00206 {
00207 unsigned n_data = data.size();
00208 if(n_data==0){
00209 vcl_cout << "no data to process in sdet_k_means\n";
00210 return 0;
00211 }
00212 vcl_vector<vnl_vector<double> > & centres = *cluster_centres;
00213
00214 vcl_vector<unsigned> * p_partition;
00215 unsigned didx = 0;
00216 unsigned dims = data[didx].size();
00217 vcl_vector<vnl_vector<double> > sums(k, vnl_vector<double>(dims, 0.0));
00218 vcl_vector<double> nNearest(k,0.0);
00219 unsigned i;
00220 unsigned iterations =0;
00221
00222 bool initialise_from_clusters = false;
00223
00224 assert(data.size() >= k);
00225 assert(data.size() == wts.size());
00226
00227
00228 if (partition)
00229 {
00230 p_partition = partition;
00231 if (p_partition->size() != data.size())
00232 {
00233 p_partition->resize(data.size());
00234 vcl_fill(p_partition->begin(), p_partition->end(), 0);
00235 }
00236 else initialise_from_clusters = true;
00237 }
00238 else
00239 p_partition = new vcl_vector<unsigned>(int(data.size()), 0u);
00240
00241 const vnl_vector<double> vcl_vector_double_dims_0(dims, 0.0);
00242
00243
00244
00245
00246 if (centres.size() != k)
00247 {
00248 centres.resize(k);
00249 for (i=0; i<k; ++i)
00250 {
00251 while (wts[didx] == 0.0)
00252 {
00253 #ifdef NDEBUG
00254 didx++;
00255 #else
00256 if (++didx<n_data)
00257 {
00258 vcl_cerr << "ERROR: sdet_k_means_weighted, while initialising centres from data\n"
00259 << "Not enough non-zero-weighted data\n";
00260 vcl_abort();
00261 }
00262 #endif //NDEBUG
00263 }
00264 centres[i] = data[didx];
00265 incXbyYv(&sums[i], data[didx], wts[didx]);
00266
00267 nNearest[i]+= wts[didx];
00268 didx++;
00269 }
00270 }
00271 else if (initialise_from_clusters)
00272 {
00273
00274 do
00275 {
00276 incXbyYv(&sums[(*p_partition)[didx] ], data[didx], wts[didx]);
00277 nNearest[(*p_partition)[didx] ]+=wts[didx];
00278 } while (++didx<n_data);
00279
00280 for (i=0; i<k; ++i)
00281 centres[i] = sums[i]/nNearest[i];
00282 didx = 0;
00283 vcl_fill(sums.begin(), sums.end(), vcl_vector_double_dims_0);
00284 vcl_fill(nNearest.begin(), nNearest.end(), 0.0);
00285 }
00286
00287 bool changed = true;
00288 while (changed)
00289 {
00290 changed = false;
00291 do
00292 {
00293 const double w = wts[didx];
00294 if (w != 0.0)
00295 {
00296 unsigned bestCentre = 0;
00297 double bestDist = vnl_vector_ssd(centres[0], data[didx]);
00298 for (i=1; i<k; ++i)
00299 {
00300 double dist = vnl_vector_ssd(centres[i], data[didx]);
00301 if (dist < bestDist)
00302 {
00303 bestDist = dist;
00304 bestCentre = i;
00305 }
00306 }
00307 incXbyYv(&sums[bestCentre], data[didx], w);
00308 nNearest[bestCentre] += w;
00309 if (bestCentre != (*p_partition)[didx])
00310 {
00311 changed = true;
00312 (*p_partition)[didx] = bestCentre;
00313 }
00314 }
00315 } while (++didx<n_data);
00316
00317
00318
00319 for (i=0; i<k; ++i)
00320 {
00321 if ( nNearest[i] == 0.0)
00322 {
00323 k--;
00324 centres.erase(centres.begin()+i);
00325 sums.erase(sums.begin()+i);
00326 nNearest.erase(nNearest.begin()+i);
00327 for (unsigned j=0; j<p_partition->size(); ++j)
00328 {
00329 if (wts[j] != 0.0)
00330 {
00331 assert ((*p_partition)[j] != i);
00332 if ((*p_partition)[j] > i) (*p_partition)[j]--;
00333 }
00334 }
00335 }
00336 }
00337
00338
00339 for (i=0; i<k; ++i)
00340 centres[i] = sums[i]/nNearest[i];
00341
00342
00343 didx = 0;
00344 vcl_fill(sums.begin(), sums.end(), vcl_vector_double_dims_0);
00345 vcl_fill(nNearest.begin(), nNearest.end(), 0.0);
00346 iterations ++;
00347 }
00348
00349 if (!partition)
00350 delete p_partition;
00351 else
00352 {
00353 didx = 0;
00354 do
00355 {
00356 if (wts[didx] == 0.0)
00357 {
00358 unsigned bestCentre = 0;
00359 double bestDist = vnl_vector_ssd(centres[0], data[didx]);
00360 for (i=1; i<k; ++i)
00361 {
00362 double dist = vnl_vector_ssd(centres[i], data[didx]);
00363 if (dist < bestDist)
00364 {
00365 bestDist = dist;
00366 bestCentre = i;
00367 }
00368 }
00369 (*p_partition)[didx] = bestCentre;
00370 }
00371 } while (++didx<n_data);
00372 }
00373
00374 return iterations;
00375 }
00376