PolynomialKernel.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Polynomial kernel.
6  *
7  *
8  *
9  * \author T.Glasmachers
10  * \date 2011
11  *
12  *
13  * \par Copyright 1995-2017 Shark Development Team
14  *
15  * <BR><HR>
16  * This file is part of Shark.
17  * <http://shark-ml.org/>
18  *
19  * Shark is free software: you can redistribute it and/or modify
20  * it under the terms of the GNU Lesser General Public License as published
21  * by the Free Software Foundation, either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * Shark is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU Lesser General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License
30  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
33 //===========================================================================
34 
35 #ifndef SHARK_MODELS_KERNELS_POLYNOMIAL_KERNEL_H
36 #define SHARK_MODELS_KERNELS_POLYNOMIAL_KERNEL_H
37 
39 
40 namespace shark {
41 
42 
43 /// \brief Polynomial kernel.
44 ///
45 /// \par
46 /// The polynomial kernel is defined as
47 /// \f$ \left( \left\langle x_1, x_2 \right\rangle + b \right)^n \f$
48 /// with degree \f$ n \in \mathbb{N} \f$ and non-negative offset
49 /// \f$ b \geq 0 \f$. For n=1 and b=0 it matches the linear kernel
50 /// (standard inner product).
51 template<class InputType = RealVector>
52 class PolynomialKernel : public AbstractKernelFunction<InputType>
53 {
54 private:
56 
57  struct InternalState: public State{
58  RealMatrix base;//stores the inner product of vectors x_1,x_j
59  RealMatrix exponentedProd;//pow(innerProd,m_exponent)
60 
61  void resize(std::size_t sizeX1, std::size_t sizeX2){
62  base.resize(sizeX1, sizeX2);
63  exponentedProd.resize(sizeX1, sizeX2);
64  }
65  };
66 public:
70 
71  /// Constructor.
72  ///
73  /// \param degree exponent of the polynomial
74  /// \param offset constant added to the standard inner product
75  /// \param degree_is_parameter should the degree be a regular model parameter? if yes, the kernel will not be differentiable
76  /// \param unconstrained should the offset internally be represented as exponential of the externally visible parameter?
77  PolynomialKernel( unsigned int degree = 2, double offset = 0.0, bool degree_is_parameter = true, bool unconstrained = false )
78  : m_degree( degree ),
79  m_offset( offset ),
80  m_degreeIsParam( degree_is_parameter ),
81  m_unconstrained( unconstrained ) {
82  SHARK_RUNTIME_CHECK(degree > 0, "[PolynomialKernel::PolynomialKernel] degree must be positive");
85  if ( !m_degreeIsParam )
87  }
88 
89  /// \brief From INameable: return the class name.
90  std::string name() const
91  { return "PolynomialKernel"; }
92 
93  void setDegree( unsigned int deg ) {
94  RANGE_CHECK( deg > 0 );
95  SHARK_RUNTIME_CHECK( !m_degreeIsParam, "[PolynomialKernel::setDegree] Please use setParameterVector when the degree is a parameter.");
96  m_degree = deg;
97  }
98 
99  unsigned int degree() const {
100  return m_degree;
101  }
102 
103  RealVector parameterVector() const {
104  if ( m_degreeIsParam ) {
105  RealVector ret(2);
106  ret(0) = m_degree;
107  if ( m_unconstrained )
108  ret(1) = std::log( m_offset );
109  else
110  ret(1) = m_offset;
111  return ret;
112  } else {
113  RealVector ret(1);
114  if ( m_unconstrained )
115  ret(0) = std::log( m_offset );
116  else
117  ret(0) = m_offset;
118  return ret;
119  }
120  }
121 
122  void setParameterVector(RealVector const& newParameters) {
123  if ( m_degreeIsParam ) {
124  SIZE_CHECK(newParameters.size() == 2);
125  SHARK_ASSERT(std::abs((unsigned int)newParameters(0) - newParameters(0)) <= 2*std::numeric_limits<double>::epsilon());
126  RANGE_CHECK(newParameters(0) >= 1.0);
127  m_degree = (unsigned int)newParameters(0);
128  if ( m_unconstrained ) {
129  m_offset = std::exp(newParameters(1));
130  } else {
131  RANGE_CHECK(newParameters(1) >= 0.0);
132  m_offset = newParameters(1);
133  }
134  } else {
135  SIZE_CHECK(newParameters.size() == 1);
136  if ( m_unconstrained ) {
137  m_offset = std::exp(newParameters(0));
138  } else {
139  RANGE_CHECK(newParameters(0) >= 0.0);
140  m_offset = newParameters(0);
141  }
142  }
143  }
144 
145  std::size_t numberOfParameters() const {
146  if ( m_degreeIsParam )
147  return 2;
148  else
149  return 1;
150  }
151 
152  ///\brief creates the internal state of the kernel
153  boost::shared_ptr<State> createState()const{
154  return boost::shared_ptr<State>(new InternalState());
155  }
156 
157  /////////////////////////EVALUATION//////////////////////////////
158 
159  /// \f$ k(x_1, x_2) = \left( \langle x_1, x_2 \rangle + b \right)^n \f$
160  double eval(ConstInputReference x1, ConstInputReference x2) const{
161  SIZE_CHECK(x1.size() == x2.size());
162  double base = inner_prod(x1, x2) + m_offset;
163  return std::pow(base,m_degree);
164  }
165 
166  void eval(ConstBatchInputReference batchX1,ConstBatchInputReference batchX2, RealMatrix& result) const {
167  SIZE_CHECK(batchX1.size2() == batchX2.size2());
168  std::size_t sizeX1 = batchX1.size1();
169  std::size_t sizeX2 = batchX2.size1();
170  result.resize(sizeX1,sizeX2);
171 
172  //calculate the inner product
173  noalias(result) = prod(batchX1,trans(batchX2));
174  result += m_offset;
175  //now do exponentiation
176  if(m_degree != 1)
177  noalias(result) = pow(result,m_degree);
178  }
179 
180  void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result, State& state) const{
181  SIZE_CHECK(batchX1.size2() == batchX2.size2());
182 
183  std::size_t sizeX1 = batchX1.size1();
184  std::size_t sizeX2 = batchX2.size1();
185 
186  InternalState& s = state.toState<InternalState>();
187  s.resize(sizeX1,sizeX2);
188  result.resize(sizeX1,sizeX2);
189 
190  //calculate the inner product
191  noalias(s.base) = prod(batchX1,trans(batchX2));
192  s.base += m_offset;
193 
194  //now do exponentiation
195  if(m_degree != 1)
196  noalias(result) = pow(s.base,m_degree);
197  else
198  noalias(result) = s.base;
199  noalias(s.exponentedProd) = result;
200  }
201 
202  /////////////////////DERIVATIVES////////////////////////////////////
203 
205  ConstBatchInputReference batchX1,
206  ConstBatchInputReference batchX2,
207  RealMatrix const& coefficients,
208  State const& state,
209  RealVector& gradient
210  ) const{
211 
212  std::size_t sizeX1 = batchX1.size1();
213  std::size_t sizeX2 = batchX2.size1();
214  gradient.resize(1);
215  InternalState const& s = state.toState<InternalState>();
216 
217  //internal checks
218  SIZE_CHECK(batchX1.size2() == batchX2.size2());
219  SIZE_CHECK(s.base.size1() == sizeX1);
220  SIZE_CHECK(s.base.size2() == sizeX2);
221  SIZE_CHECK(s.exponentedProd.size1() == sizeX1);
222  SIZE_CHECK(s.exponentedProd.size2() == sizeX2);
223 
224 
225  //m_degree == 1 is easy
226  if(m_degree == 1){//result_ij/base_ij = 1
227  gradient(0) = sum(coefficients);
228  if ( m_unconstrained )
229  gradient(0) *= m_offset;
230  return;
231  }
232 
233  gradient(0) = m_degree * sum(safe_div(s.exponentedProd,s.base,0.0) * coefficients);
234  if ( m_unconstrained )
235  gradient(0) *= m_offset;
236  }
237 
238  /// \f$ k(x_1, x_2) = \left( \langle x_1, x_2 \rangle + b \right)^n \f$
239  /// <br/>
240  /// \f$ \frac{\partial k(x_1, x_2)}{\partial x_1} = \left[ n \cdot (\langle x_1, x_2 \rangle + b)^{n-1} \right] \cdot x_2 \f$
242  ConstBatchInputReference batchX1,
243  ConstBatchInputReference batchX2,
244  RealMatrix const& coefficientsX2,
245  State const& state,
246  BatchInputType& gradient
247  ) const{
248 
249  std::size_t sizeX1 = batchX1.size1();
250  std::size_t sizeX2 = batchX2.size1();
251  gradient.resize(sizeX1,batchX1.size2());
252 
253  InternalState const& s = state.toState<InternalState>();
254 
255  //internal checks
256  SIZE_CHECK(batchX1.size2() == batchX2.size2());
257  SIZE_CHECK(s.base.size1() == sizeX1);
258  SIZE_CHECK(s.base.size2() == sizeX2);
259  SIZE_CHECK(s.exponentedProd.size1() == sizeX1);
260  SIZE_CHECK(s.exponentedProd.size2() == sizeX2);
261  SIZE_CHECK(coefficientsX2.size1() == sizeX1);
262  SIZE_CHECK(coefficientsX2.size2() == sizeX2);
263 
264 
265  //again m_degree == 1 is easy, as it is for the i-th row
266  //just c_i X2;
267  if(m_degree == 1){
268  noalias(gradient) = prod(coefficientsX2,batchX2);
269  return;
270  }
271 
272  //first calculate weights(i,j) = coeff(i)*exp(i,j)/prod(i,j)
273  //we have to take the usual division by 0 into account
274  RealMatrix weights = coefficientsX2 * safe_div(s.exponentedProd,s.base,0.0);
275  //and the derivative of input i of batch x1 is
276  //g = sum_j m_n*weights(i,j)*x2_j
277  //we now sum over j which is a matrix-matrix product
278  noalias(gradient) = m_degree * prod(weights,batchX2);
279  }
280 
281  void read(InArchive& ar){
282  ar >> m_degree;
283  ar >> m_offset;
284  ar >> m_degreeIsParam;
285  ar >> m_unconstrained;
286  }
287 
288  void write(OutArchive& ar) const{
289  ar << m_degree;
290  ar << m_offset;
291  ar << m_degreeIsParam;
292  ar << m_unconstrained;
293  }
294 
295 protected:
296  int m_degree; ///< exponent n
297  double m_offset; ///< offset b
298  bool m_degreeIsParam; ///< is the degree a model parameter?
299  bool m_unconstrained; ///< is the degree internally represented as exponential of the parameter?
300 };
301 
304 
305 
306 }
307 #endif