LassoRegression.cpp
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief LASSO Regression
6  *
7  * This program demonstrates LASSO regression for the identification
8  * of sparse coefficient vectors.
9  *
10  *
11  *
12  * \author T. Glasmachers
13  * \date 2013
14  *
15  *
16  * \par Copyright 1995-2017 Shark Development Team
17  *
18  * <BR><HR>
19  * This file is part of Shark.
20  * <http://shark-ml.org/>
21  *
22  * Shark is free software: you can redistribute it and/or modify
23  * it under the terms of the GNU Lesser General Public License as published
24  * by the Free Software Foundation, either version 3 of the License, or
25  * (at your option) any later version.
26  *
27  * Shark is distributed in the hope that it will be useful,
28  * but WITHOUT ANY WARRANTY; without even the implied warranty of
29  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30  * GNU Lesser General Public License for more details.
31  *
32  * You should have received a copy of the GNU Lesser General Public License
33  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
34  *
35  */
36 //===========================================================================
37 
41 
42 #include <iostream>
43 #include <fstream>
44 
45 using namespace shark;
46 using namespace std;
47 
48 
49 class TestProblem : public LabeledDataDistribution<RealVector, RealVector>
50 {
51 public:
52  TestProblem(size_t informative, size_t nnz, size_t dim)
53  : m_informative(informative)
54  , m_nnz(nnz)
55  , m_dim(dim)
56  { }
57 
58 
59  void draw(RealVector& input, RealVector& label) const
60  {
61  input.resize(m_dim);
62  input.clear();
63  label.resize(1);
64 
65  // we have one informative component per example
66  double g = random::gauss(random::globalRng);
67  size_t i = random::discrete(random::globalRng, std::size_t(0), m_informative-1);
68  input(i) = g;
69  label(0) = g;
70 
71  // the rest is non-informative
72  for (size_t n=1; n<m_nnz; n++)
73  {
74  size_t i = random::discrete(random::globalRng, m_informative, m_dim-1);
75  input(i) = random::gauss(random::globalRng);
76  }
77  }
78 
79 protected:
80  size_t m_informative;
81  size_t m_nnz;
82  size_t m_dim;
83 };
84 
85 
86 int main(int argc, char** argv)
87 {
88  // Define a test problem with 10 out of 1000 informative
89  // components. Each instance contains one informative and
90  // 49 noise components. 10000 instances are drawn.
91  TestProblem prob(10, 50, 1000);
92  cout << "generating 100000 points ..." << flush;
93  RegressionDataset data = prob.generateDataset(100000);
94  cout << " done." << endl;
95 
96  // Set the regularization parameter.
97  // For this problem the LASSO method identifies the correct
98  // subset of 10 informative coefficients for a large range
99  // of parameter values.
100  double lambda = 1.0;
101 
102  // trainer and model
103  LinearModel<> model;
104  LassoRegression<> trainer(lambda);
105 
106  // train the model
107  cout << "LASSO training ..." << flush;
108  trainer.train(model, data);
109  cout << " done." << endl;
110 
111  // check non-zero coefficients
112  RealMatrix m = model.matrix();
113  size_t nnz = 0;
114  size_t correct = 0;
115  size_t wrong = 0;
116  for (size_t j=0; j<m.size2(); j++)
117  {
118  if (m(0, j) != 0.0)
119  {
120  nnz++;
121  if (j < 10) correct++;
122  else wrong++;
123  }
124  }
125  cout << "solution statistics:" << endl;
126  cout << " number of non-zero coefficients: " << nnz << endl;
127  cout << " correctly identified coefficients: " << correct << endl;
128  cout << " wrongly identified coefficients: " << wrong << endl;
129 }