contrib/mul/mbl/mbl_k_means.h
Go to the documentation of this file.
00001 // This is mul/mbl/mbl_k_means.h
00002 #ifndef mbl_k_means_h
00003 #define mbl_k_means_h
00004 //:
00005 // \file
00006 // \author Ian Scott
00007 // \date 18-May-2001
00008 // \brief K Means clustering functions
00009 
00010 #include <vcl_vector.h>
00011 #include <vnl/vnl_vector.h>
00012 #include <mbl/mbl_data_wrapper.h>
00013 
00014 
00015 //: Find k cluster centres
00016 // Uses batch k-means clustering.
00017 // If you provide parameter partition, it will return the
00018 // cluster index for each data sample. The number of iterations
00019 // performed is returned.
00020 //
00021 // \par Initial Cluster Centres
00022 // If centres contain the correct number of centres, they will
00023 // be used as the initial centres, If not, and if partition is
00024 // given, and it is the correct size, then this will be used
00025 // to find the initial centres.
00026 //
00027 // \par Degenerate Cases
00028 // If at any point the one of the centres has no data points allocated to it
00029 // the number of centres will be reduced below k. This is most likely to
00030 // happen if you start the function with one or more centre identical, or
00031 // if some of the centres start off outside the convex hull of the data set.
00032 // In particular if you let the function initialise the centres, it will
00033 // occur if any of the first k data samples are identical.
00034 unsigned mbl_k_means(mbl_data_wrapper<vnl_vector<double> > &data, unsigned k,
00035                      vcl_vector<vnl_vector<double> >* cluster_centres,
00036                      vcl_vector<unsigned> * partition =0 );
00037 
00038 
00039 //: Find k cluster centres with weighted data
00040 // Uses batch k-means clustering.
00041 // If you provide parameter partition, it will return the
00042 // cluster index for each data sample. The number of iterations
00043 // performed is returned.
00044 //
00045 // \par Initial Cluster Centres
00046 // If centres contain the correct number of centres, they will
00047 // be used as the initial centres, If not, and if partition is
00048 // given, and it is the correct size, then this will be used
00049 // to find the initial centres.
00050 //
00051 // \par Degenerate Cases
00052 // If at any point the one of the centres has no data points allocated to it
00053 // the number of centres will be reduced below k. This is most likely to
00054 // happen if you start the function with one or more centre identical, or
00055 // if some of the centres start off outside the convex hull of the data set.
00056 // In particular if you let the function initialise the centres, it will
00057 // occur if any of the first k data samples are identical.
00058 unsigned mbl_k_means_weighted(mbl_data_wrapper<vnl_vector<double> > &data, unsigned k,
00059                               const vcl_vector<double>& wts,
00060                               vcl_vector<vnl_vector<double> >* cluster_centres,
00061                               vcl_vector<unsigned> * partition =0);
00062 
00063 #endif // mbl_k_means_h