PCA.cpp
Go to the documentation of this file.
2 
3 //header needed for data generation
5 
6 using namespace shark;
7 using namespace std;
8 
9 ///In this test, we will use PCA to calculate the
10 ///eigenvectors of a scatter matrix and do a
11 ///reduction of the subspace to the space
12 ///spanned by the two eigenvectors with the biggest
13 ///eigenvalues.
14 
15 ///the principal components of our multivariate data distribution
16 ///we will use them later for checking
17 double principalComponents[3][3] =
18 {
19  { 5, 0, 0},
20  { 0, 2, 2},
21  { 0,-1, 1}
22 };
23 
24 std::size_t numberOfExamples = 30000;
25 
26 ///The test distribution is just a multivariate Gaussian.
28 {
29  RealVector mean(3);
30  mean(0) = 1;
31  mean(1) = -1;
32  mean(2) = 3;
33 
34  // to create the covariance matrix we first put the
35  // copy the principal components in the matrix
36  // and than use an outer product
37  RealMatrix covariance(3,3);
38  for(int i = 0; i != 3; ++i)
39  {
40  for(int j = 0; j != 3; ++j)
41  {
42  covariance(i,j) = principalComponents[i][j];
43  }
44  }
45  covariance = prod(trans(covariance),covariance);
46 
47  //now we can create the distribution
48  MultiVariateNormalDistribution distribution(covariance);
49 
50  //and we sample from it
51  std::vector<RealVector> data(numberOfExamples);
52  for (auto& sample: data)
53  {
54  //first element is the sample, second is the underlying uniform gaussian
55  sample = mean + distribution(random::globalRng).first;
56  }
57  return createDataFromRange(data);
58 }
59 
60 
61 int main(){
62 
63  // We first create our problem. Since the PCA is a unsupervised Method,
64  // We use UnlabeledData instead of Datas.
66 
67  // With the definition of the model, we declare, how many
68  // principal components we want. If we want all, we set
69  // inputs=outputs = 3, but since want to do a reduction, we
70  // use only 2 in the second argument. The third argument is
71  // the bias. pca needs a bias to work.
72  LinearModel<> pcaModel(3,2,true);
73 
74  // Now we can construct the PCA.
75  // We can decide whether we want a whitened output or not.
76  // For testing purposes, we don't want whitening in this
77  // example.
78  PCA pca;
79  pca.setWhitening(false);
80  pca.train(pcaModel,data);
81 
82 
83 
84  //Print the rescaled results.
85  //Should be the same as principalComponents, except for sign change
86  //and numerical errors.
87  cout << "RESULTS: " << std::endl;
88  cout << "======== " << std::endl << std::endl;
89  cout << "principal component 1: " << row(pcaModel.matrix(),0) * sqrt(pca.eigenvalues()(0)) << std::endl;
90  cout << "principal component 2: " << row(pcaModel.matrix(),1) * sqrt( pca.eigenvalues()(1) ) << std::endl;
91 
92 }