contrib/mul/mbl/tools/sample_stats.cxx
Go to the documentation of this file.
00001 //////////////////////////////////////////////////////////////////////////
00002 //
00003 // Calculate various statistics on a sample of 1D data.
00004 //
00005 //////////////////////////////////////////////////////////////////////////
00006 
00007 #include <vcl_iostream.h>
00008 #include <vcl_iterator.h>
00009 #include <vcl_fstream.h>
00010 #include <vcl_string.h>
00011 #include <vcl_exception.h>
00012 #include <vcl_map.h>
00013 #include <vcl_typeinfo.h>
00014 #include <vul/vul_arg.h>
00015 #include <vul/vul_sprintf.h>
00016 #include <mbl/mbl_log.h>
00017 #include <mbl/mbl_exception.h>
00018 #include <mbl/mbl_sample_stats_1d.h>
00019 
00020 
00021 //! Identify which format to use for output.
00022 enum OutputFormat
00023 {
00024   LIST,   //!< Write each statistic on a separate line, including its title
00025   TABLE   //!< Write all statistics on one line, arranged in columns
00026 };
00027 
00028 
00029 //=========================================================================
00030 // Static function to create a static logger when first required
00031 //=========================================================================
00032 static mbl_logger& logger()
00033 {
00034   static mbl_logger l("mul.mbl.tools.sample_stats");
00035   return l;
00036 }
00037 
00038 
00039 //=========================================================================
00040 // Report an error message in several ways, then throw an exception.
00041 //=========================================================================
00042 static void do_error(const vcl_string& msg)
00043 {
00044   MBL_LOG(ERR, logger(), msg);
00045   throw mbl_exception_abort(msg);
00046 }
00047 
00048 
00049 //=========================================================================
00050 // Actual main function (apart from exception-handling wrapper)
00051 //=========================================================================
00052 int main2(int argc, char *argv[])
00053 {
00054   vul_arg_base::set_help_precis("Compute various statistics of 1D data");
00055   vul_arg_base::set_help_description("DETAILED INFORMATION\n"
00056     "1. By default, data is read from stdin, unless the -i option is used to specify an input filename.\n\n"
00057     "2. By default, output is written to stdout, unless the -o option is used to specify an output filename.\n"
00058     "   When writing to an output file, results are *appended* to the specified file.\n"
00059     "   This permits the program to be run multiple times *sequentially* on different data, building up a list of comparable statistics.\n"
00060     "   Results are undefined if this program is run multiple times *simultaneously* with the -o option specifying the same file.\n\n"
00061     "3. A choice of 2 output formats is provided with the -fmt option: \"table\" and \"list\". The default is \"list\".\n"
00062     "   list: write each statistic on a separate line, including its title.\n"
00063     "   table: write all statistics on one line, arranged in columns, with optional column headers.\n"
00064     "     In \"table\" format, column headers are printed by default, unless the -h option is specified.\n"
00065     "4. The desired statistics may be specified in any order, but the output is printed in alphabetical order.\n"
00066   );
00067 
00068   // Parse the program arguments
00069   
00070   // These options are I/O and format-related
00071   vul_arg<vcl_string> in_file("-i", "input file containing scalar values (whitespace-separated); otherwise uses stdin", "");
00072   vul_arg<vcl_string> out_file("-o", "output file to append statistics; otherwise write to stdout", "");
00073   vul_arg<vcl_string> label("-label","Adds this label to each line outputting a statistic - useful for later grep");
00074   vul_arg<vcl_string> format("-fmt","Specify the output format, e.g. \"table\", \"list\" (default is list). See help for more details.");
00075   vul_arg<bool> nohead("-h","Specify this to SUPPRESS column headers in tabular format", false);
00076   vul_arg<vcl_string> sep("-sep", "String to use as a separator between columns in tabular format, e.g. \", \" or \"  \" (default=TAB)", "\t");
00077   // These options are statistical measures:
00078   vul_arg<bool> n("-n", "Specify this to record the number of samples", false);
00079   vul_arg<bool> mean("-mean", "Specify this to record the mean", false);
00080   vul_arg<bool> variance("-var", "Specify this to record the variance", false);
00081   vul_arg<bool> sd("-sd", "Specify this to record the standard deviation", false);
00082   vul_arg<bool> se("-se", "Specify this to record the standard error", false);
00083   vul_arg<bool> med("-med", "Specify this to record the median", false);
00084   vul_arg<bool> min("-min", "Specify this to record the minimum", false);
00085   vul_arg<bool> max("-max", "Specify this to record the maximum", false);
00086   vul_arg<vcl_vector<int> > pc("-pc", "Specify this switch with 1 or more comma-separated integers (e.g. 5,50,95) to record percentile(s)");
00087   vul_arg<vcl_vector<double> > quant("-q", "Specify this switch with 1 or more comma-separated floats (e.g. 0.05,0.50,0.95) to record quantile(s)");
00088   vul_arg<bool> sum("-sum", "Specify this to record the sum", false);
00089   vul_arg<bool> sum_squares("-ssq", "Specify this to record the sum of squares", false);
00090   vul_arg<bool> rms("-rms", "Specify this to record the rms", false);
00091   vul_arg<bool> mean_of_absolutes("-moa", "Specify this to record the mean_of_absolutes", false);
00092   vul_arg<bool> skewness("-skew", "Specify this to record the skewness", false);
00093   vul_arg<bool> kurtosis("-kurt", "Specify this to record the kurtosis", false);
00094   vul_arg<bool> absolute("-absolute", "Calculate statistics of absolute sample values", false);
00095   vul_arg_parse(argc, argv);
00096 
00097   // Try to open the input file if specified or use stdin
00098   vcl_istream* is=0;
00099   if (!in_file().empty())
00100   {
00101     is = new vcl_ifstream(in_file().c_str());
00102     if (!is || !is->good())
00103       do_error(vcl_string("Failed to open input file ") + in_file().c_str());
00104     MBL_LOG(DEBUG, logger(), "Opened input file: " << in_file().c_str());
00105   }
00106   else
00107   {
00108     is = &vcl_cin;
00109   }
00110   MBL_LOG(INFO, logger(), "in_file: " << (in_file.set() ? in_file() : vcl_string("stdin")));
00111 
00112   // Load the data from stream until end
00113   vcl_vector<double> data_vec;
00114   data_vec.assign(vcl_istream_iterator<double>(*is), vcl_istream_iterator<double>());
00115   if (data_vec.empty())
00116     do_error("Could not parse data file.");
00117   MBL_LOG(DEBUG, logger(), "data file contained " << data_vec.size() << " values.");
00118   
00119   if (absolute())
00120   {
00121     for (unsigned i=0;i<data_vec.size();++i) data_vec[i]=vcl_abs(data_vec[i]);
00122   }
00123 
00124   // Clean up if input was from a file
00125   {
00126     vcl_ifstream* ifs = dynamic_cast<vcl_ifstream*>(is);
00127     if (ifs)
00128     {
00129       ifs->close();
00130       delete ifs;
00131     }
00132     is = 0;
00133   }
00134 
00135   // Calculate the requested statistics
00136   mbl_sample_stats_1d data(data_vec);
00137   vcl_map<vcl_string, double> stats;
00138   if (n.set())                 stats["n"]=data.n_samples();
00139   if (mean.set())              stats["mean"]=data.mean();
00140   if (variance.set())          stats["var"]=data.variance();
00141   if (sd.set())                stats["sd"]=data.sd();
00142   if (se.set())                stats["se"]=data.stdError();
00143   if (med.set())               stats["med"]=data.median();
00144   if (min.set())               stats["min"]=data.min();
00145   if (max.set())               stats["max"]=data.max();
00146   if (sum.set())               stats["sum"]=data.sum();
00147   if (sum_squares.set())       stats["ssq"]=data.sum_squares();
00148   if (rms.set())               stats["rms"]=data.rms();
00149   if (mean_of_absolutes.set()) stats["moa"]=data.mean_of_absolutes();
00150   if (skewness.set())          stats["skew"]=data.skewness();
00151   if (kurtosis.set())          stats["kurt"]=data.kurtosis();
00152   if (pc.set())
00153   {
00154     for (vcl_vector<int>::const_iterator it=pc().begin(); it!=pc().end(); ++it)
00155     {
00156       vcl_string name = vul_sprintf("pc%02u", *it);
00157       stats[name] = data.nth_percentile(*it);
00158     }
00159   }
00160   if (quant.set())
00161   {
00162     for (vcl_vector<double>::const_iterator it=quant().begin(); it!=quant().end(); ++it)
00163     {
00164       vcl_string name = vul_sprintf("q%f", *it);
00165       stats[name] = data.quantile(*it);
00166     }
00167   }
00168 
00169   // Open output file if requested, otherwise use stdout
00170   vcl_ostream* os=0;
00171   if (!out_file().empty())
00172   {
00173     os = new vcl_ofstream(out_file().c_str(), vcl_ios::app);
00174     if (!os || !os->good())
00175       do_error(vcl_string("Failed to open outout file ") + out_file().c_str());
00176     MBL_LOG(DEBUG, logger(), "Opened output file: " << out_file().c_str());
00177   }
00178   else
00179   {
00180     os = &vcl_cout;
00181   }
00182 
00183 
00184   // Use provided label if specified, otherwise use input filename (or empty string).
00185   vcl_string my_label = label.set() ? label() : in_file();
00186   
00187   // Write statistics in 1 of multiple formats
00188   enum OutputFormat output_format = format()=="table" ? TABLE : LIST;
00189   switch (output_format)
00190   {
00191   case TABLE: // Tabular format
00192     {
00193       // Write a line of column headers unless suppressed
00194       if (!nohead())
00195       {
00196         if (os && os->good()) *os << '#' << sep();
00197         for (vcl_map<vcl_string,double>::const_iterator it=stats.begin(); it!=stats.end(); ++it)
00198         {
00199           if (os && os->good()) *os << it->first << sep();
00200         }
00201         if (os && os->good()) *os << '\n';
00202       }
00203 
00204       // Write all statistics on one line arranged in columns
00205       if (os && os->good()) *os << my_label << sep();
00206       for (vcl_map<vcl_string,double>::const_iterator it=stats.begin(); it!=stats.end(); ++it)
00207       {
00208         if (os && os->good()) 
00209           *os << it->second << sep();
00210       }
00211       if (os && os->good()) *os << vcl_endl;
00212     }
00213     break;
00214   
00215   default:
00216     {
00217       // List format
00218       for (vcl_map<vcl_string,double>::const_iterator it=stats.begin(); it!=stats.end(); ++it)
00219       {
00220         if (os && os->good()) 
00221         {
00222           *os <<  my_label << " " << it->first << ": " << it->second << "\n";
00223         }
00224       }
00225     }
00226     break;
00227   }
00228 
00229   // Clean up if output was to a file
00230   {
00231     vcl_ofstream* ofs = dynamic_cast<vcl_ofstream*>(os);
00232     if (ofs)
00233     {
00234       ofs->close();
00235       delete ofs;
00236     }
00237     os = 0;
00238   }
00239 
00240   return 0;
00241 }
00242 
00243 
00244 //=========================================================================
00245 // main() function with exception-handling wrapper
00246 //=========================================================================
00247 int main(int argc, char *argv[])
00248 {
00249   int errcode = 0;
00250 
00251   // Initialize the logger
00252   mbl_logger::root().load_log_config_file();
00253 
00254   try
00255   {
00256     errcode = main2(argc, argv);
00257   }
00258   catch (const vcl_exception & e)
00259   {
00260     vcl_cerr << "ERROR: " << typeid(e).name() << '\n' <<
00261       e.what() << vcl_endl;
00262     errcode = 4;
00263   }
00264 
00265   return errcode;
00266 }
00267