Shark machine learning library
About Shark
News!
Contribute
Credits and copyright
Downloads
Getting Started
Installation
Using the docs
Documentation
Tutorials
Quick references
Class list
Global functions
FAQ
Showroom
include
shark
Algorithms
Trainers
PCA.h
Go to the documentation of this file.
1
//===========================================================================
2
/*!
3
*
4
*
5
* \brief Principal Component Analysis
6
*
7
*
8
*
9
*
10
* \author T. Glasmachers, C. Igel
11
* \date 2010, 2011
12
*
13
*
14
* \par Copyright 1995-2017 Shark Development Team
15
*
16
* <BR><HR>
17
* This file is part of Shark.
18
* <http://shark-ml.org/>
19
*
20
* Shark is free software: you can redistribute it and/or modify
21
* it under the terms of the GNU Lesser General Public License as published
22
* by the Free Software Foundation, either version 3 of the License, or
23
* (at your option) any later version.
24
*
25
* Shark is distributed in the hope that it will be useful,
26
* but WITHOUT ANY WARRANTY; without even the implied warranty of
27
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28
* GNU Lesser General Public License for more details.
29
*
30
* You should have received a copy of the GNU Lesser General Public License
31
* along with Shark. If not, see <http://www.gnu.org/licenses/>.
32
*
33
*/
34
//===========================================================================
35
36
37
#ifndef SHARK_ALGORITHMS_TRAINER_PCA_H
38
#define SHARK_ALGORITHMS_TRAINER_PCA_H
39
40
#include <
shark/Core/DLLSupport.h
>
41
#include <
shark/Models/LinearModel.h
>
42
#include <
shark/Algorithms/Trainers/AbstractTrainer.h
>
43
44
namespace
shark
{
45
46
/*!
47
* \brief Principal Component Analysis
48
*
49
* The Principal Component Analysis, also known as
50
* Karhunen-Loeve transformation, takes a symmetric
51
* \f$ n \times n \f$ matrix \f$ A \f$ and uses its decomposition
52
*
53
* \f$
54
* A = \Gamma \Lambda \Gamma^T,
55
* \f$
56
*
57
* where \f$ \Lambda \f$ is the diagonal matrix of eigenvalues
58
* of \f$ A \f$ and \f$ \Gamma \f$ is the orthogonal matrix
59
* with the corresponding eigenvectors as columns.
60
* \f$ \Lambda \f$ then defines a successive orthogonal rotation
61
* that maximizes the variances of the coordinates, i.e. the
62
* coordinate system is rotated in such a way that the correlation
63
* between the new axes becomes zero. If there are \f$ p \f$ axes,
64
* the first axis is rotated in a way that the points on the new axis
65
* have maximum variance. Then the remaining \f$ p - 1 \f$ axes are
66
* rotated such that a another axis covers a maximum part of the rest
67
* variance, that is not covered by the first axis. After the
68
* rotation of \f$ p - 1 \f$ axes, the rotation destination of axis
69
* no. \f$ p \f$ is fixed. An application for PCA is the reduction
70
* of dimensions by skipping the components with the least
71
* corresponding eigenvalues/variances. Furthermore, the eigenvalues
72
* may be rescaled to one, resulting in a whitening of the data.
73
*/
74
class
PCA
:
public
AbstractUnsupervisedTrainer
<LinearModel<> >
75
{
76
private
:
77
typedef
AbstractUnsupervisedTrainer<LinearModel<>
>
base_type
;
78
public
:
79
enum
PCAAlgorithm
{
STANDARD
,
SMALL_SAMPLE
,
AUTO
};
80
81
/// Constructor.
82
/// The parameter defines whether the model should also
83
/// whiten the data.
84
PCA
(
bool
whitening =
false
)
85
:
m_whitening
(whitening){
86
m_algorithm
=
AUTO
;
87
};
88
/// Constructor.
89
/// The parameter defines whether the model should also
90
/// whiten the data.
91
/// The eigendecomposition of the data is stored inthe PCA object.
92
PCA
(
UnlabeledData<RealVector>
const
& inputs,
bool
whitening =
false
)
93
:
m_whitening
(whitening){
94
m_algorithm
=
AUTO
;
95
setData
(inputs);
96
};
97
98
/// \brief From INameable: return the class name.
99
std::string
name
()
const
100
{
return
"PCA"
; }
101
102
/// If set to true, the encoded data has unit variance along
103
/// the new coordinates.
104
void
setWhitening
(
bool
whitening) {
105
m_whitening
= whitening;
106
}
107
108
/// Train the model to perform PCA. The model must be a
109
/// LinearModel object with offset, and its output dimension
110
/// defines the number of principal components
111
/// represented. The model returned is the one given by the
112
/// econder() function (i.e., mapping from the original input
113
/// space to the PCA coordinate system).
114
void
train
(
LinearModel<>
& model,
UnlabeledData<RealVector>
const
& inputs) {
115
std::size_t m = model.
outputShape
().numElements();
///< reduced dimensionality
116
setData
(inputs);
// compute PCs
117
encoder
(model, m);
// define the model
118
}
119
120
121
//! Sets the input data and performs the PCA. This is a
122
//! computationally costly operation. The eigendecomposition
123
//! of the data is stored inthe PCA object.
124
SHARK_EXPORT_SYMBOL
void
setData
(
UnlabeledData<RealVector>
const
& inputs);
125
126
//! Returns a model mapping the original data to the
127
//! m-dimensional PCA coordinate system.
128
SHARK_EXPORT_SYMBOL
void
encoder
(
LinearModel<>
& model, std::size_t m = 0);
129
130
//! Returns a model mapping encoded data from the
131
//! m-dimensional PCA coordinate system back to the
132
//! n-dimensional original coordinate system.
133
SHARK_EXPORT_SYMBOL
void
decoder
(
LinearModel<>
& model, std::size_t m = 0);
134
135
/// Eigenvalues of last training. The number of eigenvalues
136
//! is equal to the minimum of the input dimensions (i.e.,
137
//! number of attributes) and the number of data points used
138
//! for training the PCA.
139
RealVector
const
&
eigenvalues
()
const
{
140
return
m_eigenvalues
;
141
}
142
/// Returns ith eigenvalue.
143
double
eigenvalue
(std::size_t i)
const
{
144
SIZE_CHECK
( i <
m_l
);
145
if
( i <
m_eigenvalues
.size())
146
return
m_eigenvalues
(i);
147
return
0.;
148
}
149
150
//! Eigenvectors of last training. The number of eigenvectors
151
//! is equal to the minimum of the input dimensions (i.e.,
152
//! number of attributes) and the number of data points used
153
//! for training the PCA.
154
RealMatrix
const
&
eigenvectors
()
const
{
155
return
m_eigenvectors
;
156
}
157
158
/// mean of last training
159
RealVector
const
&
mean
()
const
{
160
return
m_mean
;
161
}
162
163
protected
:
164
bool
m_whitening
;
///< normalize variance yes/no
165
RealMatrix
m_eigenvectors
;
///< eigenvectors
166
RealVector
m_eigenvalues
;
///< eigenvalues
167
RealVector
m_mean
;
///< mean value
168
169
std::size_t
m_n
;
///< number of attributes
170
std::size_t
m_l
;
///< number of training data points
171
172
PCAAlgorithm
m_algorithm
;
///< whether to use design matrix or its transpose for building covariance matrix
173
};
174
175
176
}
177
#endif // SHARK_ML_TRAINER_PCA_H