GaussianRbfKernel.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Radial Gaussian kernel
6  *
7  *
8  *
9  * \author T.Glasmachers, O. Krause, M. Tuma
10  * \date 2010-2012
11  *
12  *
13  * \par Copyright 1995-2017 Shark Development Team
14  *
15  * <BR><HR>
16  * This file is part of Shark.
17  * <http://shark-ml.org/>
18  *
19  * Shark is free software: you can redistribute it and/or modify
20  * it under the terms of the GNU Lesser General Public License as published
21  * by the Free Software Foundation, either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * Shark is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU Lesser General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License
30  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
33 //===========================================================================
34 
35 #ifndef SHARK_MODELS_KERNELS_GAUSSIAN_RBF_KERNEL_H
36 #define SHARK_MODELS_KERNELS_GAUSSIAN_RBF_KERNEL_H
37 
38 
40 namespace shark{
41 
42 /// \brief Gaussian radial basis function kernel.
43 ///
44 /// Gaussian radial basis function kernel
45 /// \f$ k(x_1, x_2) = \exp(-\gamma \cdot \| x_1 - x_2 \|^2) \f$
46 /// with single bandwidth parameter \f$ \gamma \f$.
47 /// Optionally, the parameter can be encoded as \f$ \exp(\eta) \f$,
48 /// which allows for unconstrained optimization.
49 template<class InputType=RealVector>
50 class GaussianRbfKernel : public AbstractKernelFunction<InputType>
51 {
52 private:
54 
55  struct InternalState: public State{
56  RealMatrix norm2;
57  RealMatrix expNorm;
58 
59  void resize(std::size_t sizeX1, std::size_t sizeX2){
60  norm2.resize(sizeX1, sizeX2);
61  expNorm.resize(sizeX1, sizeX2);
62  }
63  };
64 public:
68 
69  GaussianRbfKernel(double gamma = 1.0, bool unconstrained = false){
70  m_gamma = gamma;
71  m_unconstrained = unconstrained;
75  }
76 
77  /// \brief From INameable: return the class name.
78  std::string name() const
79  { return "GaussianRbfKernel"; }
80 
81  RealVector parameterVector() const{
82  RealVector ret(1);
83  if (m_unconstrained){
84  ret(0) = std::log(m_gamma);
85  }
86  else{
87  ret(0) = m_gamma;
88  }
89  return ret;
90  }
91  void setParameterVector(RealVector const& newParameters){
92  SHARK_RUNTIME_CHECK(newParameters.size() == 1, "[GaussianRbfKernel::setParameterVector] invalid size of parameter vector");
93  if (m_unconstrained){
94  m_gamma = std::exp(newParameters(0));
95  }
96  else{
97  SHARK_RUNTIME_CHECK(newParameters(0) > 0.0, "[GaussianRbfKernel::setParameterVector] gamma must be positive");
98  m_gamma = newParameters(0);
99  }
100  }
101 
102  size_t numberOfParameters() const {
103  return 1;
104  }
105 
106  /// Get the bandwidth parameter value.
107  double gamma() const {
108  return m_gamma;
109  }
110 
111  /// Return ``standard deviation'' of Gaussian.
112  double sigma() const{
113  return 1. / std::sqrt(2 * m_gamma);
114  }
115 
116  /// Set the bandwidth parameter value.
117  /// \throws shark::Exception if gamma <= 0.
118  void setGamma(double gamma){
119  SHARK_RUNTIME_CHECK(gamma > 0.0, "[GaussianRbfKernel::setGamma] gamma must be positive");
120  m_gamma = gamma;
121  }
122 
123  /// Set ``standard deviation'' of Gaussian.
124  void setSigma(double sigma){
125  m_gamma = 1. / (2 * sigma * sigma);
126  }
127 
128  /// From ISerializable.
129  void read(InArchive& ar){
130  ar >> m_gamma;
131  ar >> m_unconstrained;
132  }
133 
134  /// From ISerializable.
135  void write(OutArchive& ar) const{
136  ar << m_gamma;
137  ar << m_unconstrained;
138  }
139 
140  ///\brief creates the internal state of the kernel
141  boost::shared_ptr<State> createState()const{
142  return boost::shared_ptr<State>(new InternalState());
143  }
144 
145  /// \brief evaluates \f$ k(x_1,x_2)\f$
146  ///
147  /// Gaussian radial basis function kernel
148  /// \f[ k(x_1, x_2) = \exp(-\gamma \cdot \| x_1 - x_2 \|^2) \f]
149  double eval(ConstInputReference x1, ConstInputReference x2) const{
150  SIZE_CHECK(x1.size() == x2.size());
151  double norm2 = distanceSqr(x2, x1);
152  double exponential = std::exp(-m_gamma * norm2);
153  return exponential;
154  }
155 
156  /// \brief evaluates \f$ k(x_1,x_2)\f$ and computes the intermediate value
157  ///
158  /// Gaussian radial basis function kernel
159  /// \f[ k(x_1, x_2) = \exp(-\gamma \cdot \| x_1 - x_2 \|^2) \f]
160  void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result, State& state) const{
161  SIZE_CHECK(batchX1.size2() == batchX2.size2());
162  std::size_t sizeX1=batchX1.size1();
163  std::size_t sizeX2=batchX2.size1();
164 
165  //configure state memory
166  InternalState& s=state.toState<InternalState>();
167  s.resize(sizeX1,sizeX2);
168 
169  //calculate kernel response
170  noalias(s.norm2)=distanceSqr(batchX1,batchX2);
171  noalias(s.expNorm)=exp(-m_gamma*s.norm2);
172  result=s.expNorm;
173  }
174 
175  void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result) const{
176  SIZE_CHECK(batchX1.size2() == batchX2.size2());
177  result = distanceSqr(batchX1,batchX2);
178  noalias(result)=exp(-m_gamma*result);
179  }
180 
182  ConstBatchInputReference batchX1,
183  ConstBatchInputReference batchX2,
184  RealMatrix const& coefficients,
185  State const& state,
186  RealVector& gradient
187  ) const{
188  std::size_t sizeX1=batchX1.size1();
189  std::size_t sizeX2=batchX2.size1();
190  InternalState const& s = state.toState<InternalState>();
191 
192  //internal checks
193  SIZE_CHECK(batchX1.size2() == batchX2.size2());
194  SIZE_CHECK(s.norm2.size1() == sizeX1);
195  SIZE_CHECK(s.norm2.size2() == sizeX2);
196  SIZE_CHECK(s.expNorm.size1() == sizeX1);
197  SIZE_CHECK(s.expNorm.size2() == sizeX2);
198 
199  gradient.resize(1);
200  gradient(0)= - sum(coefficients *s.expNorm * s.norm2);
201  if(m_unconstrained){
202  gradient *= m_gamma;
203  }
204  }
206  ConstBatchInputReference batchX1,
207  ConstBatchInputReference batchX2,
208  RealMatrix const& coefficientsX2,
209  State const& state,
210  BatchInputType& gradient
211  ) const{
212  std::size_t sizeX1=batchX1.size1();
213  std::size_t sizeX2=batchX2.size1();
214  InternalState const& s = state.toState<InternalState>();
215 
216  //internal checks
217  SIZE_CHECK(batchX1.size2() == batchX2.size2());
218  SIZE_CHECK(s.norm2.size1() == sizeX1);
219  SIZE_CHECK(s.norm2.size2() == sizeX2);
220  SIZE_CHECK(s.expNorm.size1() == sizeX1);
221  SIZE_CHECK(s.expNorm.size2() == sizeX2);
222 
223  gradient.resize(sizeX1,batchX1.size2());
224  RealMatrix W = coefficientsX2*s.expNorm;
225  noalias(gradient) = prod(W,batchX2);
226  RealVector columnSum = sum_columns(coefficientsX2*s.expNorm);
227 
228  for(std::size_t i = 0; i != sizeX1; ++i){
229  noalias(row(gradient,i)) -= columnSum(i) * row(batchX1,i);
230  }
231  gradient*=2.0*m_gamma;
232  }
233 
234 
235  //~ /// \brief Evaluates \f$ \frac {\partial k(x_1,x_2)}{\partial \gamma}\f$ and \f$ \frac {\partial^2 k(x_1,x_2)}{\partial \gamma^2}\f$
236  //~ ///
237  //~ /// Gaussian radial basis function kernel
238  //~ /// \f[ \frac {\partial k(x_1,x_2)}{\partial \gamma} = - \| x_1 - x_2 \|^2 \cdot k(x_1,x_2) \f]
239  //~ /// \f[ \frac {\partial^2 k(x_1,x_2)}{\partial^2 \gamma^2} = \| x_1 - x_2 \|^4 \cdot k(x_1,x_2) \f]
240  //~ void parameterDerivative(ConstInputReference x1, ConstInputReference x2, Intermediate const& intermediate, RealVector& gradient, RealMatrix& hessian) const{
241  //~ SIZE_CHECK(x1.size() == x2.size());
242  //~ SIZE_CHECK(intermediate.size() == numberOfIntermediateValues(x1,x2));
243  //~ double norm2 = intermediate[0];
244  //~ double exponential = intermediate[1];
245 
246  //~ gradient.resize(1);
247  //~ hessian.resize(1, 1);
248  //~ if (!m_unconstrained){
249  //~ gradient(0) = -exponential * norm2;
250  //~ hessian(0, 0) = -gradient(0) * norm2;
251  //~ }
252  //~ else{
253  //~ gradient(0) = -exponential * norm2 * m_gamma;
254  //~ hessian(0, 0) = -gradient(0) * norm2 * m_gamma;
255  //~ }
256  //~ }
257  //~ /// \brief Evaluates \f$ \frac {\partial k(x_1,x_2)}{\partial x_1}\f$ and \f$ \frac {\partial^2 k(x_1,x_2)}{\partial x_1^2}\f$
258  //~ ///
259  //~ /// Gaussian radial basis function kernel
260  //~ /// \f[ \frac {\partial k(x_1,x_2)}{\partial x_1} = -2 \gamma \left( x_1 - x_2 \right)\cdot k(x_1,x_2) \f]
261  //~ /// \f[ \frac {\partial^2 k(x_1,x_2)}{\partial^2 x_1^2} =2 \gamma \left[ -k(x_1,x_2) \cdot \mathbb{I} - \frac {\partial k(x_1,x_2)}{\partial x_1} ( x_1 - x_2 )^T\right] \f]
262  //~ void inputDerivative(const InputType& x1, const InputType& x2, Intermediate const& intermediate, InputType& gradient, InputMatrixType& hessian) const{
263  //~ SIZE_CHECK(x1.size() == x2.size());
264  //~ SIZE_CHECK(intermediate.size() == numberOfIntermediateValues(x1,x2));
265  //~ double exponential = intermediate[1];
266  //~ gradient.resize(x1.size());
267  //~ noalias(gradient) = (2.0 * m_gamma * exponential) * (x2 - x1);
268  //~ hessian.resize(x1.size(), x1.size());
269  //~ noalias(hessian) = 2*m_gamma*outer_prod(gradient,x2 - x1)
270  //~ - RealIdentityMatrix(x1.size())*2*m_gamma*exponential;
271  //~ }
272 
273 protected:
274  double m_gamma; ///< kernel bandwidth parameter
275  bool m_unconstrained; ///< use log storage
276 };
277 
280 
281 
282 }
283 #endif