contrib/mul/mbl/mbl_stochastic_data_collector.h
Go to the documentation of this file.
00001 #ifndef mbl_stochastic_data_collector_h_
00002 #define mbl_stochastic_data_collector_h_
00003 //:
00004 // \file
00005 // \brief Describe class that collects random subset of arbitrary length data.
00006 // \author Ian Scott
00007 //=======================================================================
00008 
00009 #include <mbl/mbl_data_collector.h>
00010 #include <mbl/mbl_data_array_wrapper.h>
00011 #include <vcl_vector.h>
00012 #include <vcl_iosfwd.h>
00013 #include <vnl/vnl_random.h>
00014 
00015 //=======================================================================
00016 
00017 
00018 //: Collects vectors, but only stores a subsample of them.
00019 // This is useful if you are unsure how many vectors examples a routine might store,
00020 // but you only want a randomly selected fixed number of them.
00021 // The stored vectors are (in the limit) unbiased w.r.t. the order in which they were presented,
00022 //
00023 // If calculating the values to be stored is expensive, this class
00024 // can be used as follows.
00025 // \code
00026 // mbl_stochastic_data_collector<double> c(100);
00027 // while (..)
00028 // {
00029 //   if (c.store_next()) c.force_record(f());
00030 // }
00031 // \endcode
00032 template <class T>
00033 class mbl_stochastic_data_collector: public mbl_data_collector<T>
00034 {
00035  private:
00036   //: Recorded samples are stored here
00037   vcl_vector<T > samples_;
00038 
00039   //: Provides iterator access to the data via data_wrapper()
00040   mbl_data_array_wrapper<T > v_data_;
00041 
00042   //: The number of samples presented to record() so far.
00043   unsigned long nPresented_;
00044 
00045   //: Random number generator used to decide whether to store a particular vector.
00046   vnl_random rand;
00047 
00048  public:
00049 
00050   //: Dflt ctor
00051   mbl_stochastic_data_collector();
00052 
00053   //: Set number of samples to be stored.
00054   // This is the number of vectors that can be actually retrieved.
00055   explicit mbl_stochastic_data_collector(unsigned n);
00056 
00057   //: Destructor
00058   virtual ~mbl_stochastic_data_collector();
00059 
00060   //: Clear any stored data
00061   virtual void clear();
00062 
00063   //: Set number of samples to be stored
00064   // If not set, the value defaults to 1000.
00065   // Calling this function implicitly calls clean().
00066   virtual void set_n_samples(int n);
00067 
00068   //: Record given value
00069   virtual void record(const T& v);
00070 
00071   //: Force recording of this given value
00072   // This does not increment n_presented()
00073   // Used with next(), to avoid calculating values that will not be stored.
00074   void force_record(const T& v);
00075 
00076   //: Will decide whether to store the next value
00077   // This will increment n_presented()
00078   // \return true if the value was actually stored.
00079   bool store_next();
00080 
00081   //: Return object describing stored data
00082   virtual mbl_data_wrapper<T >& data_wrapper();
00083 
00084   //: Reseed the internal random number generator.
00085   void  reseed (unsigned long seed);
00086 
00087   //: The number of vectors that have been presented so far.
00088   unsigned long n_presented() const {return nPresented_;}
00089 
00090   //: Version number for I/O
00091   short version_no() const;
00092 
00093   //: Name of the class
00094   virtual vcl_string is_a() const;
00095 
00096   //: Does the name of the class match the argument?
00097   virtual bool is_class(vcl_string const& s) const;
00098 
00099   //: Create a copy on the heap and return base class pointer
00100   virtual mbl_data_collector_base* clone() const;
00101 
00102   //: Print class to os
00103   virtual void print_summary(vcl_ostream& os) const;
00104 
00105   //: Save class to binary file stream
00106   virtual void b_write(vsl_b_ostream& bfs) const;
00107 
00108   //: Load class from binary file stream
00109   virtual void b_read(vsl_b_istream& bfs);
00110 };
00111 
00112 #endif // mbl_stochastic_data_collector_h_