VectorStatistics.inl
Go to the documentation of this file.
1 namespace shark{
2 /*!
3  * \brief Calculates the mean and variance values of a dataset
4  *
5  * Given the vector of data, the mean and variance values
6  * are calculated as in the functions #mean and #variance.
7  *
8  * \param data Input data.
9  * \param meanVec Vector of mean values.
10  * \param varianceVec Vector of variances.
11  *
12  */
13 template<class Vec1T,class Vec2T,class Vec3T>
14 void meanvar
15 (
16  Data<Vec1T> const& data,
19 )
20 {
21  SIZE_CHECK(!data.empty());
22  typedef typename Data<Vec1T>::const_batch_reference BatchRef;
23  std::size_t const dataSize = data.numberOfElements();
24  std::size_t elementSize=dataDimension(data);
25 
26  varianceVec().resize(elementSize);
27  varianceVec().clear();
28 
29  meanVec()= mean(data);
30 
31  //sum of variances of each column
32  BOOST_FOREACH(BatchRef batch,data.batches()){
33  std::size_t batchSize = batch.size1();
34  noalias(varianceVec()) += sumRows(sqr(batch-repeat(meanVec,batchSize)));
35  }
36  varianceVec()/=dataSize;
37 }
38 
39 template<class MatT, class Vec1T,class Vec2T>
40 void meanvar
41 (
45 )
46 {
47  SIZE_CHECK(data().size1() > 0);
48  SIZE_CHECK(data().size2() > 0);
49 
50  const size_t dataSize = data().size1();
51  const size_t elementSize = data().size2();
52 
53  meanVec() = mean(data);
54 
55  varianceVec().resize(elementSize);
56  noalias(varianceVec()) = sumRows(sqr(data-repeat(meanVec,dataSize)));
57 
58  varianceVec()/=dataSize;
59 }
60 
61 
62 /*!
63  * \brief Calculates the mean and covariance values of a set of data
64  *
65  * Given the vector of data, the mean and variance values
66  * are calculated as in the functions #mean and #variance.
67  *
68  * \param data Input data.
69  * \param meanVec Vector of mean values.
70  * \param covariance Covariance matrix.
71  *
72  */
73 template<class Vec1T,class Vec2T,class MatT>
74 void meanvar
75 (
76  const Data<Vec1T>& data,
79 ){
80  SIZE_CHECK(!data.empty());
81  typedef typename Batch<Vec1T>::type BatchType;
82  std::size_t const dataSize = data.numberOfElements();
83  std::size_t elementSize=dataDimension(data);
84 
85  covariance().resize(elementSize,elementSize);
86  covariance().clear();
87 
88  meanVec() = mean(data);
89  //sum of variances of each column
90  for(std::size_t b = 0; b != data.numberOfBatches(); ++b){
91  //make the batch mean-free
92  BatchType batch = data.batch(b)-repeat(meanVec,data.batch(b).size1());
93  symmRankKUpdate(trans(batch),covariance,1.0);
94  }
95  covariance()/=dataSize;
96 }
97 
98 /*!
99  * \brief Calculates the mean vector of array "x".
100  *
101  * Given a \em d -dimensional array \em x with size \em N1 x ... x \em Nd,
102  * this function calculates the mean vector given as:
103  * \f[
104  * mean_j = \frac{1}{N1} \sum_{i=1}^{N1} x_{i,j}
105  * \f]
106  * Example:
107  * \f[
108  * \left(
109  * \begin{array}{*{4}{c}}
110  * 1 & 2 & 3 & 4\\
111  * 5 & 6 & 7 & 8\\
112  * 9 & 10 & 11 & 12\\
113  * \end{array}
114  * \right)
115  * \longrightarrow
116  * \frac{1}{3}
117  * \left(
118  * \begin{array}{*{4}{c}}
119  * 1+5+9 & 2+6+10 & 3+7+11 & 4+8+12\\
120  * \end{array}
121  * \right)
122  * \longrightarrow
123  * \left(
124  * \begin{array}{*{4}{c}}
125  * 5 & 6 & 7 & 8\\
126  * \end{array}
127  * \right)
128  * \f]
129  *
130  * \param data input data, from which the
131  * mean value will be calculated
132  * \return the mean vector of \em x
133  */
134 template<class VectorType>
136  SIZE_CHECK(!data.empty());
137 
138  VectorType mean(dataDimension(data),0.0);
139 
140  typedef typename Data<VectorType>::const_batch_reference BatchRef;
141 
142  BOOST_FOREACH(BatchRef batch, data.batches()){
143  sumRows(batch,mean);
144  }
145  mean /= data.numberOfElements();
146  return mean;
147 }
148 
149 template<class MatrixType>
151  SIZE_CHECK(data().size2() > 0);
152 
154  mean.clear();
155 
156  sumRows(data(),mean);
157 
158  mean /= data().size1();
159  return mean;
160 }
161 
162 /*!
163  * \brief Calculates the variance vector of array "x".
164  *
165  * Given a \em d -dimensional array \em x with size \em N1 x ... x \em Nd
166  * and mean value vector \em m,
167  * this function calculates the variance vector given as:
168  * \f[
169  * variance = \frac{1}{N1} \sum_{i=1}^{N1} (x_i - m_i)^2
170  * \f]
171  *
172  * \param data input data from which the variance will be calculated
173  * \return the variance vector of \em x
174  */
175 template<class VectorType>
177 {
178  RealVector m; // vector of mean values.
179  RealVector v; // vector of variance values
180 
181  meanvar(data,m,v);
182  return v;
183 }
184 
185 /*!
186  * \brief Calculates the covariance matrix of the data vectors stored in
187  * data.
188  *
189  * Given a Set \f$X = (x_{ij})\f$ of \f$n\f$ vectors with length \f$N\f$,
190  * the function calculates the covariance matrix given as
191  *
192  * \f$
193  * Cov = (c_{kl}) \mbox{,\ } c_{kl} = \frac{1}{n - 1} \sum_{i=1}^n
194  * (x_{ik} - \overline{x_k})(x_{il} - \overline{x_l})\mbox{,\ }
195  * k,l = 1, \dots, N
196  * \f$
197  *
198  * where \f$\overline{x_j} = \frac{1}{n} \sum_{i = 1}^n x_{ij}\f$ is the
199  * mean value of \f$x_j \mbox{,\ }j = 1, \dots, N\f$.
200  *
201  * \param data The \f$n \times N\f$ input matrix.
202  * \return \f$N \times N\f$ matrix of covariance values.
203  */
204 template<class VectorType>
206  RealVector mean;
207  RealMatrix covariance;
208  meanvar(data,mean,covariance);
209  return covariance;
210 }
211 
212 /*!
213  * \brief Calculates the coefficient of correlation matrix of the data
214  * vectors stored in data.
215  *
216  * Given a matrix \f$X = (x_{ij})\f$ of \f$n\f$ vectors with length \f$N\f$,
217  * the function calculates the coefficient of correlation matrix given as
218  *
219  * \f$
220  * r := (r_{kl}) \mbox{,\ } r_{kl} =
221  * \frac{c_{kl}}{\Delta x_k \Delta x_l}\mbox{,\ }
222  * k,l = 1, \dots, N
223  * \f$
224  *
225  * where \f$c_{kl}\f$ is the entry of the covariance matrix of
226  * \f$x\f$ and \f$y\f$ and \f$\Delta x_k\f$ and \f$\Delta x_l\f$ are the
227  * standard deviations of \f$x_k\f$ and \f$x_l\f$ respectively.
228  *
229  * \param data The \f$n \times N\f$ input matrix.
230  * \return The \f$N \times N\f$ coefficient of correlation matrix.
231  */
232 template<class VectorType>
234 {
236 
237  for (std::size_t i = 0; i < C.size1(); ++i)
238  for (std::size_t j = 0; j < i; ++j)
239  if (C(i, i) == 0 || C(j, j) == 0)
240  C(i, j) = C(j, i) = 0;
241  else
242  C(i, j) = C(j , i) = C(i, j) / std::sqrt(C(i, i) * C(j, j));
243 
244  for (std::size_t i = 0; i < C.size1(); ++i)
245  C(i, i) = 1;
246 
247  return C;
248 }
249 
250 }