00001 // This is mul/mbl/mbl_k_means.h 00002 #ifndef mbl_k_means_h 00003 #define mbl_k_means_h 00004 //: 00005 // \file 00006 // \author Ian Scott 00007 // \date 18-May-2001 00008 // \brief K Means clustering functions 00009 00010 #include <vcl_vector.h> 00011 #include <vnl/vnl_vector.h> 00012 #include <mbl/mbl_data_wrapper.h> 00013 00014 00015 //: Find k cluster centres 00016 // Uses batch k-means clustering. 00017 // If you provide parameter partition, it will return the 00018 // cluster index for each data sample. The number of iterations 00019 // performed is returned. 00020 // 00021 // \par Initial Cluster Centres 00022 // If centres contain the correct number of centres, they will 00023 // be used as the initial centres, If not, and if partition is 00024 // given, and it is the correct size, then this will be used 00025 // to find the initial centres. 00026 // 00027 // \par Degenerate Cases 00028 // If at any point the one of the centres has no data points allocated to it 00029 // the number of centres will be reduced below k. This is most likely to 00030 // happen if you start the function with one or more centre identical, or 00031 // if some of the centres start off outside the convex hull of the data set. 00032 // In particular if you let the function initialise the centres, it will 00033 // occur if any of the first k data samples are identical. 00034 unsigned mbl_k_means(mbl_data_wrapper<vnl_vector<double> > &data, unsigned k, 00035 vcl_vector<vnl_vector<double> >* cluster_centres, 00036 vcl_vector<unsigned> * partition =0 ); 00037 00038 00039 //: Find k cluster centres with weighted data 00040 // Uses batch k-means clustering. 00041 // If you provide parameter partition, it will return the 00042 // cluster index for each data sample. The number of iterations 00043 // performed is returned. 00044 // 00045 // \par Initial Cluster Centres 00046 // If centres contain the correct number of centres, they will 00047 // be used as the initial centres, If not, and if partition is 00048 // given, and it is the correct size, then this will be used 00049 // to find the initial centres. 00050 // 00051 // \par Degenerate Cases 00052 // If at any point the one of the centres has no data points allocated to it 00053 // the number of centres will be reduced below k. This is most likely to 00054 // happen if you start the function with one or more centre identical, or 00055 // if some of the centres start off outside the convex hull of the data set. 00056 // In particular if you let the function initialise the centres, it will 00057 // occur if any of the first k data samples are identical. 00058 unsigned mbl_k_means_weighted(mbl_data_wrapper<vnl_vector<double> > &data, unsigned k, 00059 const vcl_vector<double>& wts, 00060 vcl_vector<vnl_vector<double> >* cluster_centres, 00061 vcl_vector<unsigned> * partition =0); 00062 00063 #endif // mbl_k_means_h