NormalizeComponentsUnitVariance.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Data normalization to zero mean and unit variance
6  *
7  *
8  *
9  *
10  * \author T. Glasmachers
11  * \date 2010, 2013
12  *
13  *
14  * \par Copyright 1995-2017 Shark Development Team
15  *
16  * <BR><HR>
17  * This file is part of Shark.
18  * <http://shark-ml.org/>
19  *
20  * Shark is free software: you can redistribute it and/or modify
21  * it under the terms of the GNU Lesser General Public License as published
22  * by the Free Software Foundation, either version 3 of the License, or
23  * (at your option) any later version.
24  *
25  * Shark is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public License
31  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32  *
33  */
34 //===========================================================================
35 
36 
37 #ifndef SHARK_ALGORITHMS_TRAINERS_NORMALIZECOMPONENTSUNITVARIANCE_H
38 #define SHARK_ALGORITHMS_TRAINERS_NORMALIZECOMPONENTSUNITVARIANCE_H
39 
40 
43 #include <shark/Data/Statistics.h>
44 
45 namespace shark {
46 
47 
48 ///
49 /// \brief Train a linear model to normalize the components of a dataset to unit variance, and optionally to zero mean.
50 ///
51 /// \par
52 /// Normalizing the components of a dataset works via
53 /// training a Normalizer model. This model is then
54 /// applied to the dataset in order to perform the
55 /// normalization. The same model can be applied to
56 /// different datasets.
57 ///
58 /// \par
59 /// The typical use case is that the Normalizer
60 /// model is trained on the training data. Later, as
61 /// "test" data comes in, the same model is used, of
62 /// course without being recalibrated. Thus, the model
63 /// used for normalization must be independent of the
64 /// dataset it was trained on.
65 ///
66 /// \par
67 /// Note that subtracting the mean destroys sparsity.
68 /// Therefore this feature is turned off by default.
69 /// If you have non-sparse data and you need to
70 /// move data to zero mean, not only to unit variance,
71 /// then enable the flag zeroMean in the constructor.
72 ///
73 template <class DataType = RealVector>
74 class NormalizeComponentsUnitVariance : public AbstractUnsupervisedTrainer< Normalizer<DataType> >
75 {
76 public:
78 
79  /// \brief Constructor
80  ///
81  /// \par
82  /// The normalizer scales the data to unit variance.
83  /// It can also remove the mean of the data. This is usually
84  /// desired, e.g., for neural network training. Note however
85  /// that this feature is sometimes undesirable since it can
86  /// destroy sparsity.
87  ///
88  /// \param zeroMean enable or disable data mean removal
90  : m_zeroMean(zeroMean){ }
91 
92  /// \brief From INameable: return the class name.
93  std::string name() const
94  { return "NormalizeComponentsUnitVariance"; }
95 
97  {
98  SHARK_RUNTIME_CHECK(input.numberOfElements() >= 2, "Input needs to consist of at least two points");
99  std::size_t dc = dataDimension(input);
100 
101  RealVector mean;
102  RealVector variance;
103  meanvar(input, mean, variance);
104 
105  RealVector diagonal(dc);
106  RealVector vector(dc);
107 
108  for (std::size_t d=0; d != dc; d++){
109  double stddev = std::sqrt(variance(d));
110  if (stddev == 0.0)
111  {
112  diagonal(d) = 0.0;
113  vector(d) = 0.0;
114  }
115  else
116  {
117  diagonal(d) = 1.0 / stddev;
118  vector(d) = -mean(d) / stddev;
119  }
120  }
121 
122  if (m_zeroMean)
123  model.setStructure(diagonal, vector);
124  else
125  model.setStructure(diagonal);
126  }
127 
128 protected:
130 };
131 
132 
133 }
134 #endif