contrib/mul/mcal/mcal_pca.h
Go to the documentation of this file.
00001 #ifndef mcal_pca_h
00002 #define mcal_pca_h
00003 //:
00004 // \file
00005 // \author Tim Cootes
00006 // \brief Class to perform Principle Component Analysis
00007 
00008 #include <mcal/mcal_component_analyzer.h>
00009 #include <vnl/io/vnl_io_vector.h>
00010 #include <vnl/io/vnl_io_matrix.h>
00011 #include <mbl/mbl_data_wrapper.h>
00012 #include <vcl_iosfwd.h>
00013 
00014 //: Class to perform Principle Component Analysis
00015 //  Applies a PCA to compute mean, variance and eigenvectors/values
00016 //  of covariance matrix of supplied data.
00017 class mcal_pca : public mcal_component_analyzer
00018 {
00019  private:
00020     //: Define how many modes to use
00021   double var_prop_;
00022   unsigned int min_modes_,max_modes_;
00023 
00024   //: Return the number of modes to retain
00025   unsigned choose_n_modes(const vnl_vector<double>& evals);
00026 
00027   //: Utility function
00028   void fillDDt(vnl_matrix<double>& DDt, const vnl_matrix<double>& A,
00029                int rlo, int rhi, int clo, int chi);
00030 
00031   //: Compute eigenvectors assuming fewer dimensions than samples
00032   void build_evecs_nd_smaller(mbl_data_wrapper<vnl_vector<double> >& data,
00033                               const vnl_vector<double>& mean,
00034                               vnl_matrix<double>& evecs,
00035                               vnl_vector<double>& evals);
00036 
00037   //: Compute eigenvectors assuming fewer samples than dimensions
00038   void build_evecs_ns_smaller(mbl_data_wrapper<vnl_vector<double> >& data,
00039                               const vnl_vector<double>& mean,
00040                               vnl_matrix<double>& evecs,
00041                               vnl_vector<double>& evals);
00042 
00043   //: Max number of doubles allowed in memory
00044   double max_d_in_memory_;
00045 
00046   //: Whether to use chunks if required
00047   bool use_chunks_;
00048 
00049  public:
00050 
00051     //: Dflt ctor
00052   mcal_pca();
00053 
00054     //: Destructor
00055   virtual ~mcal_pca();
00056 
00057     //: Define limits on number of parameters to use in model
00058     // \param var_proportion  Proportion of variance in data to explain
00059   void set_mode_choice(unsigned min, unsigned max, double var_proportion);
00060 
00061 
00062   //: Set the choice for the minimum number of model
00063   void set_min_modes( unsigned min );
00064   //: Current lower limit on number of parameters
00065   unsigned min_modes() const;
00066 
00067     //: Define upper limit on number of parameters
00068   void set_max_modes(unsigned max);
00069     //: Current upper limit on number of parameters
00070   unsigned max_modes() const;
00071 
00072     //: Define proportion of data variance to explain
00073   virtual void set_var_prop(double v);
00074 
00075     //: Current proportion of data variance to explain
00076   virtual double var_prop() const;
00077 
00078   //: Max number of doubles allowed in memory
00079   void set_max_d_in_memory(double max_n);
00080 
00081   //: Max number of doubles allowed in memory
00082   double max_d_in_memory() const { return max_d_in_memory_; }
00083 
00084   //: Set whether we may build in chunks if required
00085   void set_use_chunks(bool chunks);
00086 
00087   //: Indicate whether to use chunks if required
00088   bool use_chunks() const { return use_chunks_; }
00089 
00090   //: Compute modes of the supplied data relative to the supplied mean
00091   //  Model is x = mean + modes*b,  where b is a vector of weights on each mode.
00092   //  mode_var[i] gives the variance of the data projected onto that mode.
00093   virtual void build_about_mean(mbl_data_wrapper<vnl_vector<double> >& data,
00094                                 const vnl_vector<double>& mean,
00095                                 vnl_matrix<double>& modes,
00096                                 vnl_vector<double>& mode_var);
00097 
00098     //: Version number for I/O
00099   short version_no() const;
00100 
00101     //: Name of the class
00102   virtual vcl_string is_a() const;
00103 
00104     //: Create a copy on the heap and return base class pointer
00105   virtual  mcal_component_analyzer*  clone()  const;
00106 
00107     //: Print class to os
00108   virtual void print_summary(vcl_ostream& os) const;
00109 
00110     //: Save class to binary file stream
00111   virtual void b_write(vsl_b_ostream& bfs) const;
00112 
00113     //: Load class from binary file stream
00114   virtual void b_read(vsl_b_istream& bfs);
00115 
00116   //: Read initialisation settings from a stream.
00117   // Parameters:
00118   // \verbatim
00119   // {
00120   //   min_modes: 0 max_modes: 99 var_prop: 0.99
00121   //   // Maximum number of doubles to store in memory at once
00122   //   max_d_in_memory: 1e8
00123   //   // Indicate how to build from large amounts of data
00124   //   use_chunks: false
00125   // }
00126   // \endverbatim
00127   // \throw mbl_exception_parse_error if the parse fails.
00128   virtual void config_from_stream(vcl_istream & is);
00129 };
00130 
00131 #endif // mcal_pca_h