RegularizationNetworkTrainer.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Trainer for a Regularization Network or a Gaussian Process
6  *
7  *
8  *
9  *
10  * \author T. Glasmachers
11  * \date 2007-2012
12  *
13  *
14  * \par Copyright 1995-2017 Shark Development Team
15  *
16  * <BR><HR>
17  * This file is part of Shark.
18  * <http://shark-ml.org/>
19  *
20  * Shark is free software: you can redistribute it and/or modify
21  * it under the terms of the GNU Lesser General Public License as published
22  * by the Free Software Foundation, either version 3 of the License, or
23  * (at your option) any later version.
24  *
25  * Shark is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public License
31  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32  *
33  */
34 //===========================================================================
35 
36 
37 #ifndef SHARK_ALGORITHMS_REGULARIZATIONNETWORKTRAINER_H
38 #define SHARK_ALGORITHMS_REGULARIZATIONNETWORKTRAINER_H
39 
40 
43 
44 
45 namespace shark {
46 
47 
48 ///
49 /// \brief Training of a regularization network.
50 ///
51 /// A regularization network is a kernel-based model for
52 /// regression problems. Given are data tuples
53 /// \f$ (x_i, y_i) \f$ with x-component denoting input and
54 /// y-component denoting a real-valued label (see the tutorial on
55 /// label conventions; the implementation uses RealVector),
56 /// a kernel function k(x, x') and a regularization
57 /// constant \f$ C > 0\f$. Let H denote the kernel induced
58 /// reproducing kernel Hilbert space of k, and let \f$ \phi \f$
59 /// denote the corresponding feature map.
60 /// Then the SVM regression function is of the form
61 /// \f[
62 /// f(x) = \langle w, \phi(x) \rangle + b
63 /// \f]
64 /// with coefficients w and b given by the (primal)
65 /// optimization problem
66 /// \f[
67 /// \min \frac{1}{2} \|w\|^2 + C \sum_i L(y_i, f(x_i)),
68 /// \f]
69 /// where the simple quadratic loss is employed:
70 /// \f[
71 /// L(y, f(x)) = (y - f(x))^2
72 /// \f]
73 /// Regularization networks can be interpreted as a special
74 /// type of support vector machine (for regression, with
75 /// squared loss, and thus with non-sparse weights).
76 ///
77 /// Training a regularization network is identical to training a
78 /// Gaussian process for regression. The parameter \f$ C \f$ then
79 /// corresponds precision of the noise (denoted by \f$ \beta \f$ in
80 /// Bishop's textbook). The precision is the inverse of the variance
81 /// of the noise. The variance of the noise is denoted by \f$
82 /// \sigma_n^2 \f$ in the textbook by Rasmussen and
83 /// Williams. Accordingly, \f$ C = 1/\sigma_n^2 \f$.
84 
85 template <class InputType>
86 class RegularizationNetworkTrainer : public AbstractSvmTrainer<InputType, RealVector,KernelExpansion<InputType> >
87 {
88 public:
92 
93  /// \param kernel Kernel
94  /// \param betaInv Inverse precision, equal to assumed noise variance, equal to inverse regularization parameter C
95  /// \param unconstrained Indicates exponential encoding of the regularization parameter
96  RegularizationNetworkTrainer(KernelType* kernel, double betaInv, bool unconstrained = false)
97  : base_type(kernel, 1.0 / betaInv, false, unconstrained)
98  { }
99 
100  /// \brief From INameable: return the class name.
101  std::string name() const
102  { return "RegularizationNetworkTrainer"; }
103 
104  /// \brief Returns the assumed noise variance (i.e., 1/C)
105  double noiseVariance() const
106  { return 1.0 / this->C(); }
107  /// \brief Sets the assumed noise variance (i.e., 1/C)
108  void setNoiseVariance(double betaInv)
109  { this->C() = 1.0 / betaInv; }
110 
111  /// \brief Returns the precision (i.e., C), the inverse of the assumed noise variance
112  double precision() const
113  { return this->C(); }
114  /// \brief Sets the precision (i.e., C), the inverse of the assumed noise variance
115  void setPrecision(double beta)
116  { this->C() = beta; }
117 
119  svm.setStructure(base_type::m_kernel,dataset.inputs(),true, labelDimension(dataset));
120 
121  // Setup the kernel matrix
122  RealMatrix M = calculateRegularizedKernelMatrix(*(this->m_kernel),dataset.inputs(), noiseVariance());
123  RealMatrix V = createBatch<RealVector>(dataset.labels().elements());
124  RealVector mean = sum_rows(V)/V.size1();
125  noalias(V) -= blas::repeat(mean,V.size1());
126 
127  //check whether lambda is large enough to make the eigenvalues numerically stable
128  if(noiseVariance()/max(diag(M)) < 1.e-5)
129  noalias(svm.alpha()) = inv(M,blas::symm_semi_pos_def()) % V;
130  else//we think now it is stable so we can use the fast pure cholesky decomposition
131  noalias(svm.alpha()) = inv(M,blas::symm_pos_def()) % V;
132  noalias(svm.offset()) = mean;
133  }
134 };
135 
136 
137 // A regularization network can be interpreted as a Gaussian
138 // process, with the same trainer:
139 #define GaussianProcessTrainer RegularizationNetworkTrainer
140 
141 
142 }
143 #endif