contrib/brl/bbas/bsta/bsta_k_medoid.h
Go to the documentation of this file.
00001 // This is brl/bbas/bsta/bsta_k_medoid.h
00002 #ifndef bsta_k_medoid_h_
00003 #define bsta_k_medoid_h_
00004 //:
00005 // \file
00006 // \brief Form k clusters using distance to representative objects (medoids)
00007 // \author Joseph Mundy
00008 // \date June 11, 2005
00009 //
00010 // A clustering algorithm based on the distance within the cluster to
00011 // a representative element and the total distance between representatives.
00012 // The input is a n x n matrix of pairwise distances
00013 // The output is a set of k representatives (medoids) that minimize the
00014 // sum of the average distance from each medoid to other elements
00015 // closest to the medoid and the average distance between medoids.
00016 //
00017 // For k = 1 the medoid would be the element that minimizes the average
00018 // distance to all other elements. The elements are indexed from 0 to n-1.
00019 //
00020 // Fairly computationally expensive:  The space requirement is n x n
00021 // The time is k x (n - k) x (n - k) x number of swaps to minimize
00022 // total distance.  There might be on the order of k swaps (or worse).
00023 
00024 #include <vcl_vector.h>
00025 #include <vcl_cassert.h>
00026 #include <vcl_iostream.h>
00027 #include <vbl/vbl_array_2d.h>
00028 
00029 class bsta_k_medoid
00030 {
00031  public:
00032   bsta_k_medoid(unsigned n_elements, bool verbose = false);
00033   ~bsta_k_medoid(){}
00034 
00035   //: insert a distance into the array, the entry j, i is automatically added
00036   inline void insert_distance(const unsigned i, const unsigned j, double d)
00037   {assert((i<n_elements_)&&(j<n_elements_));
00038   distance_array_[i][j] = d; distance_array_[j][i] = d;}
00039 
00040   //: The distance between two elements
00041   inline double distance(const unsigned i, const unsigned j) const
00042     {assert((i<n_elements_)&&(j<n_elements_)); return distance_array_[i][j];}
00043 
00044   //: cluster the elements using k medoids
00045   void do_clustering(const unsigned k);
00046 
00047   //:get number of medoids
00048   inline unsigned k() const
00049     {return medoids_.size();}
00050 
00051   //: get a medoid
00052   unsigned medoid(const unsigned i) const
00053     {assert(i<medoids_.size()); return medoids_[i];}
00054 
00055   //: is an element a medoid?
00056   bool is_medoid(const unsigned i) const;
00057 
00058   //: number of elements in cluster k
00059   inline unsigned size(const unsigned k) const
00060     {assert(k<this->k());return clusters_[k].size();}
00061 
00062   //: the elements in cluster k
00063   inline vcl_vector<unsigned> elements(const unsigned k)
00064     {assert(k<this->k());return clusters_[k];}
00065 
00066   //: is an element in cluster k ?
00067   bool in_cluster(const unsigned i, const unsigned k) const;
00068 
00069   //:the distance between an element and its medoid
00070   double medoid_distance(const unsigned i) const;
00071 
00072   //: the total distance between elements and the medoid in cluster k
00073   double total_distance(const unsigned k) const;
00074 
00075   //: print distance array (for debugging)
00076   inline void print_distance_array(vcl_ostream & str = vcl_cout)
00077     {str << '\n' << distance_array_ << '\n';}
00078 
00079  protected:
00080 
00081   //: avg distance change for element i resulting from swapping medoids, j->k.
00082   double dc(const unsigned i, const unsigned j, const unsigned k);
00083 
00084   //: avg inter-medoid distance change resulting from swapping medoids, j->k.
00085   double dcm(const unsigned j, const unsigned k);
00086 
00087   //: replace medoid k with medoid k in the set of medoids
00088   bool replace_medoid(const unsigned j, const unsigned k);
00089 
00090   //: determine if a swap of j with k leads to a reduction in distance
00091   bool test_medoid_swap(unsigned& mj, unsigned& mk);
00092 
00093   //: clear the cluster sets
00094   void clear_clusters();
00095 
00096   //: assign non-medoids to their nearest medoid, forming clusters
00097   void form_clusters();
00098 
00099  private:
00100   //: print useful debug messages
00101   bool verbose_;
00102 
00103   //: the size of the distance array
00104   unsigned n_elements_;
00105 
00106   //: the k medoids
00107   vcl_vector<unsigned> medoids_;
00108 
00109   //: The set of elements closest to a given medoid
00110   vcl_vector<vcl_vector<unsigned> > clusters_;
00111 
00112   //: The array of pair-wise distances between elements
00113   vbl_array_2d<double> distance_array_;
00114 };
00115 
00116 #endif // bsta_k_medoid_h_