contrib/brl/bseg/sdet/sdet_texture_classifier.h
Go to the documentation of this file.
00001 #ifndef sdet_texture_classifier_h_
00002 #define sdet_texture_classifier_h_
00003 //:
00004 // \file
00005 // \brief  A class for classifying texture regions
00006 // \author J.L. Mundy
00007 // \date   December 10, 2011
00008 //
00009 // Texture is classified using a texton dictionary. A texton is a
00010 // k-means cluster center in a n-dimensional space of filter responses.
00011 // The filters here are patterned after those proposed in the paper
00012 // M. Varma and  A. Zisserman, "A Statistical Approach to Texture
00013 // Classification from Single Images," International Journal of Computer
00014 // Vision, Volume 62, Number 1--2, page 61--81, 2005
00015 // In this class only 2nd order Gaussian derivatives are used for the
00016 // anisotropic filters. The isotropic Laplace and Gaussian spot filters
00017 // are the same as in the Varma and Zisserman work. Thus the filter space
00018 // dimension is n_scales + 2, where n_scales is the number of scales for
00019 // the anisotropic filters. The processing is carried out in two stages:
00020 // 1) training - a set of texture classes is defined along with associated
00021 //               training images. It is possible to specify a polygon or
00022 //               multiple polygons to denote image regions corresponding to
00023 //               the texture samples. The training images are processed to
00024 //               produce a filter response vector at each training pixel.
00025 //               The filter responses are randomly selected to form a
00026 //               training set for each texture category. The training set
00027 //               is clustered by the k-means algorithm and the resulting
00028 //               cluster centers define the textons for the given category
00029 //               The full set of textons define the texton dictionary.
00030 //               Histograms of frequency of texton occurrence in each
00031 //               training set for each category are formed. A texton histogram
00032 //               bin is incremented if the training sample is nearest to
00033 //               that texton. The ultimate outcome of training is the
00034 //               set of texton histograms for each category. The bin
00035 //               frequencies are decreased if the associated textons appear in
00036 //               more than one category. This weighting is defined by the
00037 //               member texton_weights_.
00038 //
00039 // 2) classification - A test image is decomposed into blocks and each block
00040 //               is used to compute a texton histogram. The histogram is
00041 //               compared with each category histogram and the probability
00042 //               is defined by the sum of joint probabilities between the
00043 //               test and training histograms. In this implementation the
00044 //               joint probability is taken as the minimum of the probability
00045 //               of corresponding bins. This approach is more invariant to
00046 //               the large disparity in sample populations used to compute
00047 //               the histograms, e.g. 400,000 in training vs. 4096 for
00048 //               a test image block.
00049 
00050 #include <sdet/sdet_texture_classifier_params.h>
00051 #include <brip/brip_filter_bank.h>
00052 #include <vil/vil_image_resource.h>
00053 #include <vil/vil_image_view.h>
00054 #include <vgl/vgl_polygon.h>
00055 #include <vnl/vnl_vector.h>
00056 #include <vnl/vnl_vector_fixed.h>
00057 #include <vbl/vbl_ref_count.h>
00058 #include <vcl_vector.h>
00059 #include <vcl_map.h>
00060 #include <vcl_iosfwd.h>
00061 
00062 struct sdet_neighbor
00063 {
00064   sdet_neighbor(vcl_string const& category, vnl_vector<double> const& k_mean)
00065   : cat_(category), k_mean_(k_mean){}
00066   vcl_string cat_;
00067   vnl_vector<double> k_mean_;
00068 };
00069 
00070 class sdet_neighbor_less
00071 {
00072  public:
00073   sdet_neighbor_less(vnl_vector<double> const& query): query_(query){}
00074   //the predicate function
00075   bool operator()(sdet_neighbor const& na, sdet_neighbor const& nb) const
00076   {
00077     double da = vnl_vector_ssd(na.k_mean_, query_),
00078       db = vnl_vector_ssd(nb.k_mean_, query_);
00079     return da < db;
00080   }
00081  private:
00082   vnl_vector<double> query_;
00083 };
00084 
00085 class sdet_texture_classifier : public sdet_texture_classifier_params,
00086                                 public vbl_ref_count
00087 {
00088  public:
00089   //: constructor from parameter block
00090   sdet_texture_classifier(sdet_texture_classifier_params const& params);
00091 
00092   //: compute filter responses for a given texture category training image
00093   bool compute_filter_bank(vil_image_view<float> const& img);
00094 
00095   //: the max image border width eaten up by filter kernels
00096   unsigned max_filter_radius() const;
00097 
00098   //: retrieve the filter responses
00099   brip_filter_bank& filter_responses()
00100     {return filter_responses_;}
00101 
00102   //: append to training data (current filter responses).
00103   //  randomly select training samples from full training image for category
00104   bool compute_training_data(vcl_string const& category);
00105   //: randomly select training samples from within the specified region
00106   bool compute_training_data(vcl_string const& category,
00107                              vgl_polygon<double> const& texture_region);
00108   //: randomly select training samples from within the specified regions
00109   bool compute_training_data(vcl_string const& category,
00110                              vcl_vector<vgl_polygon<double> >const& texture_regions);
00111   //: randomly select training samples from within a region loaded from file
00112   bool compute_training_data(vcl_string const& category,
00113                              vcl_string const& poly_path = "");
00114 
00115   //: compute textons with k_means for the specified texture category
00116   bool compute_textons(vcl_string const& category);
00117 
00118   //: compute textons from set of images (and polygons).
00119   //  If polygon_paths is empty or some element contains a null string
00120   //  the entire image(s) is(are) used
00121   //  This method is the main driver function to carry out training
00122   bool compute_textons(vcl_vector<vcl_string> const& image_paths,
00123                        vcl_string const& category,
00124                        vcl_vector<vcl_string> const& poly_paths=
00125                        vcl_vector<vcl_string>());
00126 
00127   //: The texton histograms derived from the training data
00128   void compute_category_histograms();
00129 
00130   //: save texton dictionary, binary (includes classifier params at top of file)
00131   bool save_dictionary(vcl_string const& path) const;
00132   //: load dictionary, binary
00133   bool load_dictionary(vcl_string const& path);
00134 
00135   //: set category colors
00136   void set_category_colors(vcl_map< vcl_string, vnl_vector_fixed<float, 3> > const& color_map)
00137   {color_map_ = color_map; color_map_valid_ = true;}
00138   //: image of category probabilities expressed as colors
00139   vil_image_view<float> classify_image_blocks(vcl_string const& img_path);
00140 
00141   //: print
00142   void print_dictionary() const;
00143   void print_distances() const;
00144   void print_color_map() const;
00145   void print_category_histograms() const;
00146   void print_interclass_probs() const;
00147   void print_texton_weights() const;
00148 
00149   // ===  debug utilities ===
00150 
00151  protected:
00152   sdet_texture_classifier();
00153   vil_image_view<float> scale_image(vil_image_resource_sptr const& resc);
00154   vcl_vector<vnl_vector<double> >
00155     random_centers(vcl_vector<vnl_vector<double> > const& training_data,
00156                    unsigned k) const;
00157   void compute_distances();
00158   void compute_interclass_probs();
00159   void compute_texton_weights();
00160 
00161   void init_color_map();
00162 
00163   void compute_texton_index();
00164 
00165   unsigned nearest_texton_index(vnl_vector<double> const& query);
00166 
00167   //: update the texton histogram with a filter vector
00168   void update_hist(vnl_vector<double> const& f, float weight,
00169                    vcl_vector<float>& hist);
00170   //: compute the vector of texture probabilities
00171   vcl_map<vcl_string, float> texture_probabilities(vcl_vector<float> const& hist);
00172   //: color representing the mix of texture probabilites
00173   void category_color_mix(vcl_map<vcl_string, float>& probs,
00174                           vnl_vector_fixed<float, 3>& color_mix);
00175   //: color representing the mix of texture probabilites with atomspheric quality (should be removed to another class)
00176   void category_quality_color_mix(vcl_map<vcl_string, float>& probs,
00177                                   vnl_vector_fixed<float, 3>& color_mix);
00178   // === members ===
00179   brip_filter_bank filter_responses_;
00180   vil_image_view<float> laplace_;
00181   vil_image_view<float> gauss_;
00182   vil_image_view<float> frac_counts_;
00183   // the training data for a given category
00184   vcl_map< vcl_string, vcl_vector<vnl_vector<double> > > training_data_;
00185   // the texton dictionary. The class is identified by a string name
00186   vcl_map< vcl_string, vcl_vector<vnl_vector<double> > > texton_dictionary_;
00187   // min distance between categories if different, max distance if the same
00188   vcl_map< vcl_string, vcl_map< vcl_string, double> > dist_;
00189   bool distances_valid_;
00190   // inter class texton probability
00191   vcl_map< vcl_string, vcl_map< vcl_string, double> > inter_prob_;
00192   bool inter_prob_valid_;
00193   // category color table
00194   vcl_map< vcl_string, vnl_vector_fixed<float, 3> > color_map_;
00195   bool color_map_valid_;
00196   vcl_vector<sdet_neighbor> texton_index_;
00197   bool texton_index_valid_;
00198   vcl_map<vcl_string, vcl_vector<float> > category_histograms_;
00199   vcl_vector<float> texton_weights_;
00200   bool texton_weights_valid_;
00201 };
00202 #include <sdet/sdet_texture_classifier_sptr.h>
00203 //: Binary save parameters to stream.
00204 void vsl_b_write(vsl_b_ostream & os, sdet_texture_classifier const &tc);
00205 
00206 //: Binary load parameters from stream.
00207 void vsl_b_read(vsl_b_istream & is, sdet_texture_classifier &tc);
00208 
00209 void vsl_print_summary(vcl_ostream &os, const sdet_texture_classifier &tc);
00210 
00211 void vsl_b_read(vsl_b_istream& is, sdet_texture_classifier* tc);
00212 
00213 void vsl_b_write(vsl_b_ostream& os, const sdet_texture_classifier* &tc);
00214 
00215 void vsl_print_summary(vcl_ostream& os, const sdet_texture_classifier* &tc);
00216 
00217 void vsl_b_read(vsl_b_istream& is, sdet_texture_classifier_sptr& tc);
00218 
00219 void vsl_b_write(vsl_b_ostream& os, const sdet_texture_classifier_sptr &tc);
00220 
00221 void vsl_print_summary(vcl_ostream& os, const sdet_texture_classifier_sptr &tc);
00222 #endif // sdet_texture_classifier_h_