00001
00002
00003
00004
00005
00006
00007 #include <vcl_iostream.h>
00008 #include <vcl_iterator.h>
00009 #include <vcl_fstream.h>
00010 #include <vcl_string.h>
00011 #include <vcl_exception.h>
00012 #include <vcl_map.h>
00013 #include <vcl_typeinfo.h>
00014 #include <vul/vul_arg.h>
00015 #include <vul/vul_sprintf.h>
00016 #include <mbl/mbl_log.h>
00017 #include <mbl/mbl_exception.h>
00018 #include <mbl/mbl_sample_stats_1d.h>
00019
00020
00021
00022 enum OutputFormat
00023 {
00024 LIST,
00025 TABLE
00026 };
00027
00028
00029
00030
00031
00032 static mbl_logger& logger()
00033 {
00034 static mbl_logger l("mul.mbl.tools.sample_stats");
00035 return l;
00036 }
00037
00038
00039
00040
00041
00042 static void do_error(const vcl_string& msg)
00043 {
00044 MBL_LOG(ERR, logger(), msg);
00045 throw mbl_exception_abort(msg);
00046 }
00047
00048
00049
00050
00051
00052 int main2(int argc, char *argv[])
00053 {
00054 vul_arg_base::set_help_precis("Compute various statistics of 1D data");
00055 vul_arg_base::set_help_description("DETAILED INFORMATION\n"
00056 "1. By default, data is read from stdin, unless the -i option is used to specify an input filename.\n\n"
00057 "2. By default, output is written to stdout, unless the -o option is used to specify an output filename.\n"
00058 " When writing to an output file, results are *appended* to the specified file.\n"
00059 " This permits the program to be run multiple times *sequentially* on different data, building up a list of comparable statistics.\n"
00060 " Results are undefined if this program is run multiple times *simultaneously* with the -o option specifying the same file.\n\n"
00061 "3. A choice of 2 output formats is provided with the -fmt option: \"table\" and \"list\". The default is \"list\".\n"
00062 " list: write each statistic on a separate line, including its title.\n"
00063 " table: write all statistics on one line, arranged in columns, with optional column headers.\n"
00064 " In \"table\" format, column headers are printed by default, unless the -h option is specified.\n"
00065 "4. The desired statistics may be specified in any order, but the output is printed in alphabetical order.\n"
00066 );
00067
00068
00069
00070
00071 vul_arg<vcl_string> in_file("-i", "input file containing scalar values (whitespace-separated); otherwise uses stdin", "");
00072 vul_arg<vcl_string> out_file("-o", "output file to append statistics; otherwise write to stdout", "");
00073 vul_arg<vcl_string> label("-label","Adds this label to each line outputting a statistic - useful for later grep");
00074 vul_arg<vcl_string> format("-fmt","Specify the output format, e.g. \"table\", \"list\" (default is list). See help for more details.");
00075 vul_arg<bool> nohead("-h","Specify this to SUPPRESS column headers in tabular format", false);
00076 vul_arg<vcl_string> sep("-sep", "String to use as a separator between columns in tabular format, e.g. \", \" or \" \" (default=TAB)", "\t");
00077
00078 vul_arg<bool> n("-n", "Specify this to record the number of samples", false);
00079 vul_arg<bool> mean("-mean", "Specify this to record the mean", false);
00080 vul_arg<bool> variance("-var", "Specify this to record the variance", false);
00081 vul_arg<bool> sd("-sd", "Specify this to record the standard deviation", false);
00082 vul_arg<bool> se("-se", "Specify this to record the standard error", false);
00083 vul_arg<bool> med("-med", "Specify this to record the median", false);
00084 vul_arg<bool> min("-min", "Specify this to record the minimum", false);
00085 vul_arg<bool> max("-max", "Specify this to record the maximum", false);
00086 vul_arg<vcl_vector<int> > pc("-pc", "Specify this switch with 1 or more comma-separated integers (e.g. 5,50,95) to record percentile(s)");
00087 vul_arg<vcl_vector<double> > quant("-q", "Specify this switch with 1 or more comma-separated floats (e.g. 0.05,0.50,0.95) to record quantile(s)");
00088 vul_arg<bool> sum("-sum", "Specify this to record the sum", false);
00089 vul_arg<bool> sum_squares("-ssq", "Specify this to record the sum of squares", false);
00090 vul_arg<bool> rms("-rms", "Specify this to record the rms", false);
00091 vul_arg<bool> mean_of_absolutes("-moa", "Specify this to record the mean_of_absolutes", false);
00092 vul_arg<bool> skewness("-skew", "Specify this to record the skewness", false);
00093 vul_arg<bool> kurtosis("-kurt", "Specify this to record the kurtosis", false);
00094 vul_arg<bool> absolute("-absolute", "Calculate statistics of absolute sample values", false);
00095 vul_arg_parse(argc, argv);
00096
00097
00098 vcl_istream* is=0;
00099 if (!in_file().empty())
00100 {
00101 is = new vcl_ifstream(in_file().c_str());
00102 if (!is || !is->good())
00103 do_error(vcl_string("Failed to open input file ") + in_file().c_str());
00104 MBL_LOG(DEBUG, logger(), "Opened input file: " << in_file().c_str());
00105 }
00106 else
00107 {
00108 is = &vcl_cin;
00109 }
00110 MBL_LOG(INFO, logger(), "in_file: " << (in_file.set() ? in_file() : vcl_string("stdin")));
00111
00112
00113 vcl_vector<double> data_vec;
00114 data_vec.assign(vcl_istream_iterator<double>(*is), vcl_istream_iterator<double>());
00115 if (data_vec.empty())
00116 do_error("Could not parse data file.");
00117 MBL_LOG(DEBUG, logger(), "data file contained " << data_vec.size() << " values.");
00118
00119 if (absolute())
00120 {
00121 for (unsigned i=0;i<data_vec.size();++i) data_vec[i]=vcl_abs(data_vec[i]);
00122 }
00123
00124
00125 {
00126 vcl_ifstream* ifs = dynamic_cast<vcl_ifstream*>(is);
00127 if (ifs)
00128 {
00129 ifs->close();
00130 delete ifs;
00131 }
00132 is = 0;
00133 }
00134
00135
00136 mbl_sample_stats_1d data(data_vec);
00137 vcl_map<vcl_string, double> stats;
00138 if (n.set()) stats["n"]=data.n_samples();
00139 if (mean.set()) stats["mean"]=data.mean();
00140 if (variance.set()) stats["var"]=data.variance();
00141 if (sd.set()) stats["sd"]=data.sd();
00142 if (se.set()) stats["se"]=data.stdError();
00143 if (med.set()) stats["med"]=data.median();
00144 if (min.set()) stats["min"]=data.min();
00145 if (max.set()) stats["max"]=data.max();
00146 if (sum.set()) stats["sum"]=data.sum();
00147 if (sum_squares.set()) stats["ssq"]=data.sum_squares();
00148 if (rms.set()) stats["rms"]=data.rms();
00149 if (mean_of_absolutes.set()) stats["moa"]=data.mean_of_absolutes();
00150 if (skewness.set()) stats["skew"]=data.skewness();
00151 if (kurtosis.set()) stats["kurt"]=data.kurtosis();
00152 if (pc.set())
00153 {
00154 for (vcl_vector<int>::const_iterator it=pc().begin(); it!=pc().end(); ++it)
00155 {
00156 vcl_string name = vul_sprintf("pc%02u", *it);
00157 stats[name] = data.nth_percentile(*it);
00158 }
00159 }
00160 if (quant.set())
00161 {
00162 for (vcl_vector<double>::const_iterator it=quant().begin(); it!=quant().end(); ++it)
00163 {
00164 vcl_string name = vul_sprintf("q%f", *it);
00165 stats[name] = data.quantile(*it);
00166 }
00167 }
00168
00169
00170 vcl_ostream* os=0;
00171 if (!out_file().empty())
00172 {
00173 os = new vcl_ofstream(out_file().c_str(), vcl_ios::app);
00174 if (!os || !os->good())
00175 do_error(vcl_string("Failed to open outout file ") + out_file().c_str());
00176 MBL_LOG(DEBUG, logger(), "Opened output file: " << out_file().c_str());
00177 }
00178 else
00179 {
00180 os = &vcl_cout;
00181 }
00182
00183
00184
00185 vcl_string my_label = label.set() ? label() : in_file();
00186
00187
00188 enum OutputFormat output_format = format()=="table" ? TABLE : LIST;
00189 switch (output_format)
00190 {
00191 case TABLE:
00192 {
00193
00194 if (!nohead())
00195 {
00196 if (os && os->good()) *os << '#' << sep();
00197 for (vcl_map<vcl_string,double>::const_iterator it=stats.begin(); it!=stats.end(); ++it)
00198 {
00199 if (os && os->good()) *os << it->first << sep();
00200 }
00201 if (os && os->good()) *os << '\n';
00202 }
00203
00204
00205 if (os && os->good()) *os << my_label << sep();
00206 for (vcl_map<vcl_string,double>::const_iterator it=stats.begin(); it!=stats.end(); ++it)
00207 {
00208 if (os && os->good())
00209 *os << it->second << sep();
00210 }
00211 if (os && os->good()) *os << vcl_endl;
00212 }
00213 break;
00214
00215 default:
00216 {
00217
00218 for (vcl_map<vcl_string,double>::const_iterator it=stats.begin(); it!=stats.end(); ++it)
00219 {
00220 if (os && os->good())
00221 {
00222 *os << my_label << " " << it->first << ": " << it->second << "\n";
00223 }
00224 }
00225 }
00226 break;
00227 }
00228
00229
00230 {
00231 vcl_ofstream* ofs = dynamic_cast<vcl_ofstream*>(os);
00232 if (ofs)
00233 {
00234 ofs->close();
00235 delete ofs;
00236 }
00237 os = 0;
00238 }
00239
00240 return 0;
00241 }
00242
00243
00244
00245
00246
00247 int main(int argc, char *argv[])
00248 {
00249 int errcode = 0;
00250
00251
00252 mbl_logger::root().load_log_config_file();
00253
00254 try
00255 {
00256 errcode = main2(argc, argv);
00257 }
00258 catch (const vcl_exception & e)
00259 {
00260 vcl_cerr << "ERROR: " << typeid(e).name() << '\n' <<
00261 e.what() << vcl_endl;
00262 errcode = 4;
00263 }
00264
00265 return errcode;
00266 }
00267