Shark machine learning library
About Shark
News!
Contribute
Credits and copyright
Downloads
Getting Started
Installation
Using the docs
Documentation
Tutorials
Quick references
Class list
Global functions
FAQ
Showroom
examples
Unsupervised
PCA.cpp
Go to the documentation of this file.
1
#include <
shark/Algorithms/Trainers/PCA.h
>
2
3
//header needed for data generation
4
#include <
shark/Statistics/Distributions/MultiVariateNormalDistribution.h
>
5
6
using namespace
shark
;
7
using namespace
std
;
8
9
///In this test, we will use PCA to calculate the
10
///eigenvectors of a scatter matrix and do a
11
///reduction of the subspace to the space
12
///spanned by the two eigenvectors with the biggest
13
///eigenvalues.
14
15
///the principal components of our multivariate data distribution
16
///we will use them later for checking
17
double
principalComponents
[3][3] =
18
{
19
{ 5, 0, 0},
20
{ 0, 2, 2},
21
{ 0,-1, 1}
22
};
23
24
std::size_t
numberOfExamples
= 30000;
25
26
///The test distribution is just a multivariate Gaussian.
27
UnlabeledData<RealVector>
createData
()
28
{
29
RealVector
mean
(3);
30
mean
(0) = 1;
31
mean
(1) = -1;
32
mean
(2) = 3;
33
34
// to create the covariance matrix we first put the
35
// copy the principal components in the matrix
36
// and than use an outer product
37
RealMatrix
covariance
(3,3);
38
for
(
int
i = 0; i != 3; ++i)
39
{
40
for
(
int
j = 0; j != 3; ++j)
41
{
42
covariance
(i,j) =
principalComponents
[i][j];
43
}
44
}
45
covariance = prod(trans(covariance),covariance);
46
47
//now we can create the distribution
48
MultiVariateNormalDistribution
distribution(covariance);
49
50
//and we sample from it
51
std::vector<RealVector> data(
numberOfExamples
);
52
for
(
auto
& sample: data)
53
{
54
//first element is the sample, second is the underlying uniform gaussian
55
sample = mean + distribution(
random::globalRng
).first;
56
}
57
return
createDataFromRange
(data);
58
}
59
60
61
int
main
(){
62
63
// We first create our problem. Since the PCA is a unsupervised Method,
64
// We use UnlabeledData instead of Datas.
65
UnlabeledData<RealVector>
data =
createData
();
66
67
// With the definition of the model, we declare, how many
68
// principal components we want. If we want all, we set
69
// inputs=outputs = 3, but since want to do a reduction, we
70
// use only 2 in the second argument. The third argument is
71
// the bias. pca needs a bias to work.
72
LinearModel<>
pcaModel(3,2,
true
);
73
74
// Now we can construct the PCA.
75
// We can decide whether we want a whitened output or not.
76
// For testing purposes, we don't want whitening in this
77
// example.
78
PCA
pca;
79
pca.
setWhitening
(
false
);
80
pca.
train
(pcaModel,data);
81
82
83
84
//Print the rescaled results.
85
//Should be the same as principalComponents, except for sign change
86
//and numerical errors.
87
cout <<
"RESULTS: "
<< std::endl;
88
cout <<
"======== "
<< std::endl << std::endl;
89
cout <<
"principal component 1: "
<< row(pcaModel.
matrix
(),0) * sqrt(pca.
eigenvalues
()(0)) << std::endl;
90
cout <<
"principal component 2: "
<< row(pcaModel.
matrix
(),1) * sqrt( pca.
eigenvalues
()(1) ) << std::endl;
91
92
}