Statistics.h
Go to the documentation of this file.
1 /**
2  *
3  * \brief Calculate statistics given a range of values.
4  *
5  * \author T.Voss, T. Glasmachers, O.Krause
6  * \date 2010-2011
7  *
8  * \par Copyright (c) 1998-2007:
9  * Institut f&uuml;r Neuroinformatik<BR>
10  * Ruhr-Universit&auml;t Bochum<BR>
11  * D-44780 Bochum, Germany<BR>
12  * Phone: +49-234-32-25558<BR>
13  * Fax: +49-234-32-14209<BR>
14  * eMail: Shark-admin@neuroinformatik.ruhr-uni-bochum.de<BR>
15  * www: http://www.neuroinformatik.ruhr-uni-bochum.de<BR>
16  * <BR>
17  *
18  *
19  * <BR><HR>
20  * This file is part of Shark. This library is free software;
21  * you can redistribute it and/or modify it under the terms of the
22  * GNU General Public License as published by the Free Software
23  * Foundation; either version 3, or (at your option) any later version.
24  *
25  * This library is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU General Public License for more details.
29  *
30  * You should have received a copy of the GNU General Public License
31  * along with this library; if not, see <http://www.gnu.org/licenses/>.
32  *
33  */
34 #ifndef SHARK_STATISTICS_H
35 #define SHARK_STATISTICS_H
36 
37 #include <shark/Core/Flags.h>
38 
39 #include <boost/range/iterator_range.hpp>
40 #include <boost/optional.hpp>
41 
42 #include <boost/accumulators/accumulators.hpp>
43 #include <boost/accumulators/statistics/stats.hpp>
44 
45 #include <boost/accumulators/statistics/count.hpp>
46 #include <boost/accumulators/statistics/max.hpp>
47 #include <boost/accumulators/statistics/min.hpp>
48 #include <boost/accumulators/statistics/mean.hpp>
49 #include <boost/accumulators/statistics/median.hpp>
50 #include <boost/accumulators/statistics/moment.hpp>
51 #include <boost/accumulators/statistics/p_square_quantile.hpp>
52 #include <boost/accumulators/statistics/variance.hpp>
53 #include <iostream>
54 #include <vector>
55 
56 namespace ba = boost::accumulators;
57 
58 namespace shark {
59 
60  /**
61  * \brief Calculate pre-defined statistics given a range of values.
62  *
63  * \sa examples/Statistics/StatisticsMain.cpp
64  *
65  * Calculate statistics from standard in:
66  * \code
67  * shark::Statistics stats;
68  * stats = std::for_each( std::istream_iterator<double>( std::cin ), std::istream_iterator<double>(), stats );
69  * std::cout << stats << std::endl;
70  * \endcode
71  * Implemented in terms of boost::accumulators.
72  */
73  struct Statistics {
74 
75  /** \cond IMPL */
76  typedef ba::accumulator_set<
77  double,
78  ba::stats<
79  ba::tag::median(ba::with_p_square_quantile),
80  ba::tag::density,
83  ba::tag::min,
84  ba::tag::max,
85  ba::tag::count
86  >
87  > AccumulatorType;
88  typedef ba::accumulator_set<double, ba::stats<ba::tag::p_square_quantile> > QuartileAccumulatorType;
89 
90  typedef double LowerQuantileProbability;
91  typedef double UpperQuantileProbability;
92  /** \endcond IMPL */
93 
94  /** \brief Histogram type */
95  typedef boost::iterator_range<
96  std::vector<
97  std::pair<double,double>
98  >::iterator
99  > histogram_type;
100 
101  /**
102  * \brief Tags the mean value.
103  */
104  struct Mean {};
105  /**
106  * \brief Tags the variance.
107  */
108  struct Variance {};
109  /**
110  * \brief Tags the unbiased variance (not implemented).
111  */
112  struct UnbiasedVariance {};
113  /**
114  * \brief Tags the histogram.
115  */
116  struct Histogram {};
117  /**
118  * \brief Tags the median.
119  */
120  struct Median {};
121  /**
122  * \brief Tags the lower quartile.
123  */
124  struct LowerQuartile {};
125  /**
126  * \brief Tags the upper quartile.
127  */
128  struct UpperQuartile {};
129  /**
130  * \brief Tags the minimum value.
131  */
132  struct Min {};
133  /**
134  * \brief Tags the maximum value.
135  */
136  struct Max {};
137  /**
138  * \brief Tags the number of samples.
139  */
140  struct NumSamples {};
141 
142  /**
143  * \brief Default c'tor.
144  * \param [in] lowerQuantileProbability Probability for the lower quantile, default value: 0.25.
145  * \param [in] upperQuantileProbability Probability for the upper quantile, default value: 0.75.
146  */
147  Statistics( double lowerQuantileProbability = 0.25, double upperQuantileProbability = 0.75 ) : m_acc( ba::density_cache_size = 5, ba::density_num_bins = 20 ),
148  m_accLowerQuartile( ba::quantile_probability = lowerQuantileProbability ),
149  m_accUpperQuartile( ba::quantile_probability = upperQuantileProbability ) {
150  }
151 
152  /**
153  * \brief Accesses the mean value of the supplied values.
154  */
155  double operator()( Mean mean ) const { return( ba::mean( m_acc ) ); }
156 
157  /**
158  * \brief Accesses the variance of the supplied values.
159  */
160  double operator()( Variance variance ) const { return( ba::variance( m_acc ) ); }
161 
162  /**
163  * \brief Accesses the histogram of the supplied values.
164  */
165  histogram_type operator()( Histogram histogram ) const { return( ba::density( m_acc ) ); }
166  /**
167  * \brief Accesses the median of the supplied values.
168  */
169  double operator()( Median median ) const { return( ba::median( m_acc ) ); }
170 
171  /**
172  * \brief Accesses the lower quartile of the supplied values.
173  */
174  double operator()( LowerQuartile lq ) const { return( ba::p_square_quantile( m_accLowerQuartile ) ); }
175 
176  /**
177  * \brief Accesses the upper quartile of the supplied values.
178  */
179  double operator()( UpperQuartile uq ) const { return( ba::p_square_quantile( m_accUpperQuartile ) ); }
180 
181  /**
182  * \brief Accesses the minimum of the supplied values.
183  */
184  double operator()( Min min ) const { return( ba::min( m_acc ) ); }
185 
186  /**
187  * \brief Accesses the maximum of the supplied values.
188  */
189  double operator()( Max max ) const { return( ba::max( m_acc ) ); }
190 
191  /**
192  * \brief Accesses the total number of samples.
193  */
194  std::size_t operator()( NumSamples numSamples ) const { return( ba::count( m_acc ) ); }
195 
196  /**
197  * \brief Updates statistics with the supplied value.
198  * \param [in] d The value.
199  */
200  void operator()( double d ) {
201  m_acc( d );
202  m_accLowerQuartile( d );
203  m_accUpperQuartile( d );
204  }
205 
206  /**
207  * \brief Calculates statistics for the supplied range of values.
208  * \tparam InputIterator Iterator type, needs to be a model of forward iterator.
209  * \param [in] begin Iterator pointing to the first valid element of the range.
210  * \param [in] end Iterator pointing behind the last valid element of the range.
211  */
212  template<class InputIterator>
213  void operator()( InputIterator begin , InputIterator end ) {
214  for(;begin != end; ++begin){
215  (*this)(*begin);
216  }
217  }
218 
219  /** \cond IMPL */
220  AccumulatorType m_acc;
221  QuartileAccumulatorType m_accLowerQuartile;
222  QuartileAccumulatorType m_accUpperQuartile;
223  /** \endcond IMPL */
224  };
225 
226  /**
227  * \brief Writes statistics to the supplied stream.
228  */
229  template<typename CharT, typename Traits>
230  static std::basic_ostream<CharT,Traits> & operator<<( std::basic_ostream<CharT,Traits> & s, const Statistics & stats ) {
231  s << "Sample size: " << stats( shark::Statistics::NumSamples() ) << std::endl;
232  s << "Min: " << stats( shark::Statistics::Min() ) << std::endl;
233  s << "Max: " << stats( shark::Statistics::Max() ) << std::endl;
234  s << "Mean: " << stats( shark::Statistics::Mean() ) << std::endl;
235  s << "Variance: " << stats( shark::Statistics::Variance() ) << std::endl;
236  s << "Median: " << stats( shark::Statistics::Median() ) << std::endl;
237  s << "Lower Quantile: " << stats( shark::Statistics::LowerQuartile() ) << std::endl;
238  s << "Upper Quantile: " << stats( shark::Statistics::UpperQuartile() ) << std::endl;
239 
240  return( s );
241  }
242 }
243 
244 #endif // SHARK_STATISTICS_H