00001 #ifndef mbl_stochastic_data_collector_h_ 00002 #define mbl_stochastic_data_collector_h_ 00003 //: 00004 // \file 00005 // \brief Describe class that collects random subset of arbitrary length data. 00006 // \author Ian Scott 00007 //======================================================================= 00008 00009 #include <mbl/mbl_data_collector.h> 00010 #include <mbl/mbl_data_array_wrapper.h> 00011 #include <vcl_vector.h> 00012 #include <vcl_iosfwd.h> 00013 #include <vnl/vnl_random.h> 00014 00015 //======================================================================= 00016 00017 00018 //: Collects vectors, but only stores a subsample of them. 00019 // This is useful if you are unsure how many vectors examples a routine might store, 00020 // but you only want a randomly selected fixed number of them. 00021 // The stored vectors are (in the limit) unbiased w.r.t. the order in which they were presented, 00022 // 00023 // If calculating the values to be stored is expensive, this class 00024 // can be used as follows. 00025 // \code 00026 // mbl_stochastic_data_collector<double> c(100); 00027 // while (..) 00028 // { 00029 // if (c.store_next()) c.force_record(f()); 00030 // } 00031 // \endcode 00032 template <class T> 00033 class mbl_stochastic_data_collector: public mbl_data_collector<T> 00034 { 00035 private: 00036 //: Recorded samples are stored here 00037 vcl_vector<T > samples_; 00038 00039 //: Provides iterator access to the data via data_wrapper() 00040 mbl_data_array_wrapper<T > v_data_; 00041 00042 //: The number of samples presented to record() so far. 00043 unsigned long nPresented_; 00044 00045 //: Random number generator used to decide whether to store a particular vector. 00046 vnl_random rand; 00047 00048 public: 00049 00050 //: Dflt ctor 00051 mbl_stochastic_data_collector(); 00052 00053 //: Set number of samples to be stored. 00054 // This is the number of vectors that can be actually retrieved. 00055 explicit mbl_stochastic_data_collector(unsigned n); 00056 00057 //: Destructor 00058 virtual ~mbl_stochastic_data_collector(); 00059 00060 //: Clear any stored data 00061 virtual void clear(); 00062 00063 //: Set number of samples to be stored 00064 // If not set, the value defaults to 1000. 00065 // Calling this function implicitly calls clean(). 00066 virtual void set_n_samples(int n); 00067 00068 //: Record given value 00069 virtual void record(const T& v); 00070 00071 //: Force recording of this given value 00072 // This does not increment n_presented() 00073 // Used with next(), to avoid calculating values that will not be stored. 00074 void force_record(const T& v); 00075 00076 //: Will decide whether to store the next value 00077 // This will increment n_presented() 00078 // \return true if the value was actually stored. 00079 bool store_next(); 00080 00081 //: Return object describing stored data 00082 virtual mbl_data_wrapper<T >& data_wrapper(); 00083 00084 //: Reseed the internal random number generator. 00085 void reseed (unsigned long seed); 00086 00087 //: The number of vectors that have been presented so far. 00088 unsigned long n_presented() const {return nPresented_;} 00089 00090 //: Version number for I/O 00091 short version_no() const; 00092 00093 //: Name of the class 00094 virtual vcl_string is_a() const; 00095 00096 //: Does the name of the class match the argument? 00097 virtual bool is_class(vcl_string const& s) const; 00098 00099 //: Create a copy on the heap and return base class pointer 00100 virtual mbl_data_collector_base* clone() const; 00101 00102 //: Print class to os 00103 virtual void print_summary(vcl_ostream& os) const; 00104 00105 //: Save class to binary file stream 00106 virtual void b_write(vsl_b_ostream& bfs) const; 00107 00108 //: Load class from binary file stream 00109 virtual void b_read(vsl_b_istream& bfs); 00110 }; 00111 00112 #endif // mbl_stochastic_data_collector_h_