Shark machine learning library
About Shark
News!
Contribute
Credits and copyright
Downloads
Getting Started
Installation
Using the docs
Documentation
Tutorials
Quick references
Class list
Global functions
FAQ
Showroom
include
shark
Algorithms
Trainers
NormalizeComponentsUnitVariance.h
Go to the documentation of this file.
1
//===========================================================================
2
/*!
3
*
4
*
5
* \brief Data normalization to zero mean and unit variance
6
*
7
*
8
*
9
*
10
* \author T. Glasmachers
11
* \date 2010, 2013
12
*
13
*
14
* \par Copyright 1995-2017 Shark Development Team
15
*
16
* <BR><HR>
17
* This file is part of Shark.
18
* <http://shark-ml.org/>
19
*
20
* Shark is free software: you can redistribute it and/or modify
21
* it under the terms of the GNU Lesser General Public License as published
22
* by the Free Software Foundation, either version 3 of the License, or
23
* (at your option) any later version.
24
*
25
* Shark is distributed in the hope that it will be useful,
26
* but WITHOUT ANY WARRANTY; without even the implied warranty of
27
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28
* GNU Lesser General Public License for more details.
29
*
30
* You should have received a copy of the GNU Lesser General Public License
31
* along with Shark. If not, see <http://www.gnu.org/licenses/>.
32
*
33
*/
34
//===========================================================================
35
36
37
#ifndef SHARK_ALGORITHMS_TRAINERS_NORMALIZECOMPONENTSUNITVARIANCE_H
38
#define SHARK_ALGORITHMS_TRAINERS_NORMALIZECOMPONENTSUNITVARIANCE_H
39
40
41
#include <
shark/Models/Normalizer.h
>
42
#include <
shark/Algorithms/Trainers/AbstractTrainer.h
>
43
#include <
shark/Data/Statistics.h
>
44
45
namespace
shark
{
46
47
48
///
49
/// \brief Train a linear model to normalize the components of a dataset to unit variance, and optionally to zero mean.
50
///
51
/// \par
52
/// Normalizing the components of a dataset works via
53
/// training a Normalizer model. This model is then
54
/// applied to the dataset in order to perform the
55
/// normalization. The same model can be applied to
56
/// different datasets.
57
///
58
/// \par
59
/// The typical use case is that the Normalizer
60
/// model is trained on the training data. Later, as
61
/// "test" data comes in, the same model is used, of
62
/// course without being recalibrated. Thus, the model
63
/// used for normalization must be independent of the
64
/// dataset it was trained on.
65
///
66
/// \par
67
/// Note that subtracting the mean destroys sparsity.
68
/// Therefore this feature is turned off by default.
69
/// If you have non-sparse data and you need to
70
/// move data to zero mean, not only to unit variance,
71
/// then enable the flag zeroMean in the constructor.
72
///
73
template
<
class
DataType = RealVector>
74
class
NormalizeComponentsUnitVariance
:
public
AbstractUnsupervisedTrainer
< Normalizer<DataType> >
75
{
76
public
:
77
typedef
AbstractUnsupervisedTrainer< Normalizer<DataType>
>
base_type
;
78
79
/// \brief Constructor
80
///
81
/// \par
82
/// The normalizer scales the data to unit variance.
83
/// It can also remove the mean of the data. This is usually
84
/// desired, e.g., for neural network training. Note however
85
/// that this feature is sometimes undesirable since it can
86
/// destroy sparsity.
87
///
88
/// \param zeroMean enable or disable data mean removal
89
NormalizeComponentsUnitVariance
(
bool
zeroMean)
90
:
m_zeroMean
(zeroMean){ }
91
92
/// \brief From INameable: return the class name.
93
std::string
name
()
const
94
{
return
"NormalizeComponentsUnitVariance"
; }
95
96
void
train
(
Normalizer<DataType>
& model,
UnlabeledData<DataType>
const
& input)
97
{
98
SHARK_RUNTIME_CHECK
(input.
numberOfElements
() >= 2,
"Input needs to consist of at least two points"
);
99
std::size_t dc =
dataDimension
(input);
100
101
RealVector
mean
;
102
RealVector
variance
;
103
meanvar
(input, mean, variance);
104
105
RealVector diagonal(dc);
106
RealVector vector(dc);
107
108
for
(std::size_t d=0; d != dc; d++){
109
double
stddev = std::sqrt(
variance
(d));
110
if
(stddev == 0.0)
111
{
112
diagonal(d) = 0.0;
113
vector(d) = 0.0;
114
}
115
else
116
{
117
diagonal(d) = 1.0 / stddev;
118
vector(d) = -
mean
(d) / stddev;
119
}
120
}
121
122
if
(
m_zeroMean
)
123
model.
setStructure
(diagonal, vector);
124
else
125
model.
setStructure
(diagonal);
126
}
127
128
protected
:
129
bool
m_zeroMean
;
130
};
131
132
133
}
134
#endif