NormalizeComponentsUnitInterval.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Data normalization to the unit interval
6  *
7  *
8  *
9  *
10  * \author T. Glasmachers
11  * \date 2010, 2013
12  *
13  *
14  * \par Copyright 1995-2017 Shark Development Team
15  *
16  * <BR><HR>
17  * This file is part of Shark.
18  * <http://shark-ml.org/>
19  *
20  * Shark is free software: you can redistribute it and/or modify
21  * it under the terms of the GNU Lesser General Public License as published
22  * by the Free Software Foundation, either version 3 of the License, or
23  * (at your option) any later version.
24  *
25  * Shark is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public License
31  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32  *
33  */
34 //===========================================================================
35 
36 
37 #ifndef SHARK_ALGORITHMS_TRAINERS_NORMALIZECOMPONENTSUNITINTERVAL_H
38 #define SHARK_ALGORITHMS_TRAINERS_NORMALIZECOMPONENTSUNITINTERVAL_H
39 
40 
43 
44 namespace shark{
45 
46 
47 ///
48 /// \brief Train a model to normalize the components of a dataset to fit into the unit inverval
49 ///
50 /// \par
51 /// Normalizing the components of a dataset works via
52 /// training a LinearMap model. This model is then
53 /// applied to the dataset in order to perform the
54 /// normalization. The same model can be applied to
55 /// different datasets.
56 ///
57 /// \par
58 /// The typical use case is that the AffineLinearMap
59 /// model is trained on the training data. Later, as
60 /// "test" data comes in, the same model is used, of
61 /// course without being recalibrated. Thus, the model
62 /// used for normalization must be independent of the
63 /// dataset it was trained on.
64 ///
65 /// \par
66 /// Note that the transformation represented by this
67 /// trainer destroys sparsity of the data. Therefore
68 /// one may prefer NormalizeComponentsUnitVariance
69 /// particularly on sparse data.
70 ///
71 template <class DataType = RealVector>
72 class NormalizeComponentsUnitInterval : public AbstractUnsupervisedTrainer< Normalizer<DataType> >
73 {
74 public:
76 
78  { }
79 
80  /// \brief From INameable: return the class name.
81  std::string name() const
82  { return "NormalizeComponentsUnitInterval"; }
83 
85  std:: size_t ic = input.numberOfElements();
86  SHARK_RUNTIME_CHECK(ic >= 2, "Input needs to consist of at least two points");
87  std::size_t dc = dataDimension(input);
88 
89  RealVector min = input.element(0);
90  RealVector max = input.element(0);
91  for(std::size_t i=1; i != ic; i++){
92  for(std::size_t d = 0; d != dc; d++){
93  double x = input.element(i)(d);
94  min(d) = std::min(min(d), x);
95  max(d) = std::max(max(d), x);
96  }
97  }
98 
99  RealVector diagonal(dc);
100  RealVector offset(dc);
101 
102  for (std::size_t d=0; d != dc; d++)
103  {
104  if (min(d) == max(d))
105  {
106  diagonal(d) = 0.0;
107  offset(d) = -min(d) + 0.5;
108  }
109  else
110  {
111  double n = 1.0 / (max(d) - min(d));
112  diagonal(d) = n;
113  offset(d) = -min(d) * n;
114  }
115  }
116 
117  model.setStructure(diagonal, offset);
118  }
119 };
120 
121 
122 }
123 #endif