NormalizedKernel.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Normalization of a kernel function.
6  *
7  *
8  *
9  * \author T.Glasmachers, O. Krause, M. Tuma
10  * \date 2010, 2011
11  *
12  *
13  * \par Copyright 1995-2017 Shark Development Team
14  *
15  * <BR><HR>
16  * This file is part of Shark.
17  * <http://shark-ml.org/>
18  *
19  * Shark is free software: you can redistribute it and/or modify
20  * it under the terms of the GNU Lesser General Public License as published
21  * by the Free Software Foundation, either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * Shark is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU Lesser General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License
30  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
33 //===========================================================================
34 
35 #ifndef SHARK_MODELS_KERNELS_NORMALIZED_KERNEL_H
36 #define SHARK_MODELS_KERNELS_NORMALIZED_KERNEL_H
37 
38 
40 
41 namespace shark {
42 
43 
44 /// \brief Normalized version of a kernel function
45 ///
46 /// For a positive definite kernel k, the normalized kernel
47 /// \f[ \tilde k(x, y) := \frac{k(x, y)}{\sqrt{k(x, x) \cdot k(y, y)}} \f]
48 /// is again a positive definite kernel function.
49 template<class InputType=RealVector>
50 class NormalizedKernel : public AbstractKernelFunction<InputType>
51 {
52 private:
54 
55  struct InternalState: public State{
56  RealMatrix kxy;
57  RealVector kxx;
58  RealVector kyy;
59 
60  boost::shared_ptr<State> stateKxy;
61  std::vector<boost::shared_ptr<State> > stateKxx;
62  std::vector<boost::shared_ptr<State> > stateKyy;
63 
64  void resize(std::size_t sizeX1,std::size_t sizeX2, AbstractKernelFunction<InputType> const* base){
65  kxy.resize(sizeX1,sizeX2);
66  kxx.resize(sizeX1);
67  kyy.resize(sizeX2);
68  stateKxx.resize(sizeX1);
69  stateKyy.resize(sizeX2);
70  stateKxy = base->createState();
71  for(std::size_t i = 0; i != sizeX1;++i){
72  stateKxx[i] = base->createState();
73  }
74  for(std::size_t i = 0; i != sizeX2;++i){
75  stateKyy[i] = base->createState();
76  }
77  }
78  };
79 public:
83 
85  SHARK_ASSERT( base != NULL );
91  }
92 
93  /// \brief From INameable: return the class name.
94  std::string name() const
95  { return "NormalizedKernel<" + m_base->name() + ">"; }
96 
97  RealVector parameterVector() const{
98  return m_base->parameterVector();
99  }
100 
101  void setParameterVector(RealVector const& newParameters){
102  m_base->setParameterVector(newParameters);
103  }
104 
105  std::size_t numberOfParameters() const{
106  return m_base->numberOfParameters();
107  }
108 
109  ///\brief creates the internal state of the kernel
110  boost::shared_ptr<State> createState()const{
111  InternalState* state = new InternalState();
112  return boost::shared_ptr<State>(state);
113  }
114 
115  ///evaluates \f$ k(x,y) \f$
116  ///
117  /// calculates
118  /// \f[ \tilde k(x, y) := \frac{k(x, y)}{\sqrt{k(x, x) \cdot k(y, y)}} \f]
119  double eval(ConstInputReference x1, ConstInputReference x2) const{
120  double val = m_base->eval(x1, x2);
121  val /= std::sqrt(m_base->eval(x1, x1));
122  val /= std::sqrt(m_base->eval(x2, x2));
123  return val;
124  }
125 
126 
127  ///evaluates \f$ k(x_i,y_j) \f$ for a batch of inputs x=(x...x_n) and x=(y_1...y_m)
128  ///
129  /// calculates
130  /// \f[ \tilde k(x_i,y_j) := \frac{k(x_i,y_j)}{\sqrt{k(x_i,x_i) \cdot k(y_j, y_j)}} \f]
131  void eval(ConstBatchInputReference const& batchX1, ConstBatchInputReference const& batchX2, RealMatrix& result, State& state) const{
132  InternalState& s = state.toState<InternalState>();
133 
134  std::size_t sizeX1 = batchSize(batchX1);
135  std::size_t sizeX2 = batchSize(batchX2);
136  s.resize(sizeX1,sizeX2,m_base);
137  result.resize(sizeX1,sizeX2);
138 
139  m_base->eval(batchX1, batchX2,s.kxy, *s.stateKxy);
140 
141 
142  //possible very slow way to evaluate
143  //we need to calculate k(x_i,x_i) and k(y_j,y_j) for every element.
144  //we do it by copying the single element in a batch of size 1 and evaluating this.
145  //the following could be made easier with an interface like
146  //m_base->eval(batchX1,s.kxx,s.statekxx);
147  BatchInputType singleBatch = Batch<InputType>::createBatch(getBatchElement(batchX1,0));
148  RealMatrix singleResult(1,1);
149  for(std::size_t i = 0; i != sizeX1;++i){
150  getBatchElement(singleBatch,0) = getBatchElement(batchX1,i);
151  m_base->eval(singleBatch,singleBatch,singleResult,*s.stateKxx[i]);
152  s.kxx[i] = singleResult(0,0);
153  }
154 
155  for(std::size_t j = 0; j != sizeX2;++j){
156  getBatchElement(singleBatch,0) = getBatchElement(batchX2,j);
157  m_base->eval(singleBatch,singleBatch,singleResult,*s.stateKyy[j]);
158  s.kyy[j] = singleResult(0,0);
159  }
160  RealVector sqrtKyy=sqrt(s.kyy);
161 
162  //finally calculate the result
163  //r(i,j) = k(x_i,x_j)/sqrt(k(x_i,x_i))*sqrt(k(x_j,kx_j))
164  noalias(result) = s.kxy / outer_prod(sqrt(s.kxx),sqrtKyy);
165  }
166 
167  ///evaluates \f$ k(x,y) \f$ for a batch of inputs
168  ///
169  /// calculates
170  /// \f[ \tilde k(x, y) := \frac{k(x, y)}{\sqrt{k(x, x) \cdot k(y, y)}} \f]
171  void eval(ConstBatchInputReference const& batchX1, ConstBatchInputReference const& batchX2, RealMatrix& result) const{
172  std::size_t sizeX1 = batchSize(batchX1);
173  std::size_t sizeX2 = batchSize(batchX2);
174 
175  m_base->eval(batchX1, batchX2,result);
176 
177  RealVector sqrtKyy(sizeX2);
178  for(std::size_t j = 0; j != sizeX2;++j){
179  sqrtKyy(j) = std::sqrt(m_base->eval(getBatchElement(batchX2,j),getBatchElement(batchX2,j)));
180  }
181 
182  for(std::size_t i = 0; i != sizeX1;++i){
183  double sqrtKxx = std::sqrt(m_base->eval(getBatchElement(batchX2,i),getBatchElement(batchX2,i)));
184  noalias(row(result,i)) = sqrtKxx* row(result,i) / sqrtKyy;
185  }
186  }
187 
188  /// calculates the weighted derivate w.r.t. the parameters \f$ w \f$
189  ///
190  /// The derivative for a single element is calculated as follows:
191  ///\f[ \frac{\partial \tilde k_w(x, y)}{\partial w} = \frac{k_w'(x,y)}{\sqrt{k_w(x,x) k_w(y,y)}} - \frac{k_w(x,y) \left(k_w(y,y) k_w'(x,x)+k_w(x,x) k_w'(y,y)\right)}{2 (k_w(x,x) k_w(y,y))^{3/2}} \f]
192  /// where \f$ k_w'(x, y) = \partial k_w(x, y) / \partial w \f$.
194  ConstBatchInputReference const& batchX1,
195  ConstBatchInputReference const& batchX2,
196  RealMatrix const& coefficients,
197  State const& state,
198  RealVector& gradient
199  ) const{
200  ensure_size(gradient,numberOfParameters());
201  InternalState const& s = state.toState<InternalState>();
202  std::size_t sizeX1 = batchSize(batchX1);
203  std::size_t sizeX2 = batchSize(batchX2);
204 
205  RealMatrix weights = coefficients / sqrt(outer_prod(s.kxx,s.kyy));
206 
207  m_base->weightedParameterDerivative(batchX1,batchX2,weights,*s.stateKxy,gradient);
208 
209  noalias(weights) *= s.kxy;
210  RealVector wx = sum_columns(weights) / (2.0 * s.kxx);
211  RealVector wy = sum_rows(weights) / (2.0 * s.kyy);
212 
213  //the following mess could be made easier with an interface like
214  //m_base->weightedParameterDerivative(batchX1,wx,s.statekxx,subGradient);
215  //m_base->weightedParameterDerivative(batchX2,wy,s.statekyy,subGradient);
216  //(calculating the weighted parameter derivative of k(x_i,x_i) or (k(y_i,y_i)
217  RealVector subGradient(gradient.size());
218  BatchInputType singleBatch = Batch<InputType>::createBatch(getBatchElement(batchX1,0));
219  RealMatrix coeff(1,1);
220  for(std::size_t i = 0; i != sizeX1; ++i){
221  getBatchElement(singleBatch,0) = getBatchElement(batchX1,i);
222  coeff(0,0) = wx(i);
223  m_base->weightedParameterDerivative(singleBatch,singleBatch,coeff,*s.stateKxx[i],subGradient);
224  gradient -= subGradient;
225  }
226  for(std::size_t j = 0; j != sizeX2; ++j){
227  getBatchElement(singleBatch,0) = getBatchElement(batchX2,j);
228  coeff(0,0) = wy(j);
229  m_base->weightedParameterDerivative(singleBatch,singleBatch,coeff,*s.stateKyy[j],subGradient);
230  gradient -= subGradient;
231  }
232  }
233 
234  /// Input derivative, calculated according to the equation:
235  /// <br/>
236  /// \f$ \frac{\partial k(x, y)}{\partial x}
237  /// \frac{\sum_i \exp(w_i) \frac{\partial k_i(x, y)}{\partial x}}
238  /// {\sum_i exp(w_i)} \f$
239  /// and summed up over all elements of the second batch
241  ConstBatchInputReference const& batchX1,
242  ConstBatchInputReference const& batchX2,
243  RealMatrix const& coefficientsX2,
244  State const& state,
245  BatchInputType& gradient
246  ) const{
247  InternalState const& s = state.toState<InternalState>();
248  std::size_t sizeX1 = batchSize(batchX1);
249 
250  RealMatrix weights = coefficientsX2 / sqrt(outer_prod(s.kxx,s.kyy));
251 
252  m_base->weightedInputDerivative(batchX1,batchX2,weights,*s.stateKxy,gradient);
253 
254  noalias(weights) *= s.kxy;
255  RealVector wx = sum_columns(weights)/s.kxx;
256 
257  //the following mess could be made easier with an interface like
258  //m_base->weightedInputDerivative(batchX1,wx,s.statekxx,subGradient);
259  //(calculating the weighted input derivative of k(x_i,x_i)
260  RealMatrix subGradient;
261  BatchInputType singleBatch = Batch<InputType>::createBatch(getBatchElement(batchX1,0));
262  RealMatrix coeff(1,1);
263  for(std::size_t i = 0; i != sizeX1; ++i){
264  getBatchElement(singleBatch,0) = getBatchElement(batchX1,i);
265  coeff(0,0) = wx(i);
266  m_base->weightedInputDerivative(singleBatch,singleBatch,coeff,*s.stateKxx[i],subGradient);
267  noalias(row(gradient,i)) -= row(subGradient,0);
268  }
269  }
270 
271 protected:
272  /// kernel to normalize
274 };
275 
278 
279 
280 }
281 #endif