KernelExpansion.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Affine linear kernel function expansion
6  *
7  * \par
8  * Affine linear kernel expansions resulting from Support
9  * vector machine (SVM) training and other kernel methods.
10  *
11  *
12  *
13  *
14  * \author T. Glasmachers
15  * \date 2007-2011
16  *
17  *
18  * \par Copyright 1995-2017 Shark Development Team
19  *
20  * <BR><HR>
21  * This file is part of Shark.
22  * <http://shark-ml.org/>
23  *
24  * Shark is free software: you can redistribute it and/or modify
25  * it under the terms of the GNU Lesser General Public License as published
26  * by the Free Software Foundation, either version 3 of the License, or
27  * (at your option) any later version.
28  *
29  * Shark is distributed in the hope that it will be useful,
30  * but WITHOUT ANY WARRANTY; without even the implied warranty of
31  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32  * GNU Lesser General Public License for more details.
33  *
34  * You should have received a copy of the GNU Lesser General Public License
35  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
36  *
37  */
38 //===========================================================================
39 
40 
41 #ifndef SHARK_MODELS_KERNELEXPANSION_H
42 #define SHARK_MODELS_KERNELEXPANSION_H
43 
46 #include <shark/Data/Dataset.h>
47 #include <shark/Data/DataView.h>
48 
49 namespace shark {
50 
51 
52 ///
53 /// \brief Linear model in a kernel feature space.
54 ///
55 /// An affine linear kernel expansion is a model of the type
56 /// \f[ x : \mathbb{R}^d \to \mathbb{R}^d \enspace , \enspace x \mapsto \sum_{n=1}^{\ell} \alpha_n k(x_n, x) + b \enspace ,\f]
57 /// with parameters \f$ \alpha_n \in \mathbb{R}^{d} \f$ for all
58 /// \f$ n \in \{1, \dots, \ell\} \f$ and \f$ b \in \mathbb{R}^d \f$.
59 ///
60 /// One way in which the possibility for vector-valued input and output of dimension \f$ d \f$ may be interpreted
61 /// is as allowing for a KernelExpansion model for \f$ d \f$ different classes/outputs in multi-class problems. Then,
62 /// the i-th column of the matrix #m_alpha is the KernelExpansion for class/output i.
63 ///
64 /// \tparam InputType Type of basis elements supplied to the kernel
65 ///
66 template<class InputType>
67 class KernelExpansion : public AbstractModel<InputType, RealVector>
68 {
69 public:
74 
75  // //////////////////////////////////////////////////////////
76  // //////////// CONSTRUCTORS /////////////////////
77  // //////////////////////////////////////////////////////////
78 
80 
81  KernelExpansion(KernelType* kernel):mep_kernel(kernel){
82  SHARK_ASSERT(kernel != NULL);
83  }
84 
85  KernelExpansion(KernelType* kernel, Data<InputType> const& basis,bool offset, std::size_t outputs = 1){
86  SHARK_ASSERT(kernel != NULL);
87  setStructure(kernel, basis,offset,outputs);
88  }
89 
90  void setStructure(KernelType* kernel, Data<InputType> const& basis,bool offset, std::size_t outputs = 1){
91  SHARK_ASSERT(kernel != NULL);
93  if(offset)
94  m_b.resize(outputs);
95  m_basis = basis;
96  m_alpha.resize(basis.numberOfElements(), outputs);
97  m_alpha.clear();
98  }
99 
100  /// \brief From INameable: return the class name.
101  std::string name() const
102  { return "KernelExpansion"; }
103 
104  /// dimensionality of the output RealVector
106  return m_alpha.size2();
107  }
108 
109  Shape inputShape() const{
110  return Shape();
111  }
112 
113  // //////////////////////////////////////////////////////////
114  // /////////// ALL THINGS KERNEL //////////////////////
115  // //////////////////////////////////////////////////////////
116 
117  KernelType const* kernel() const{
118  return mep_kernel;
119  }
120  KernelType* kernel(){
121  return mep_kernel;
122  }
123  void setKernel(KernelType* kernel){
124  mep_kernel = kernel;
125  }
126 
127  // //////////////////////////////////////////////////////////
128  // /////// ALL THINGS ALPHA AND OFFSET ////////////////
129  // //////////////////////////////////////////////////////////
130 
131  bool hasOffset() const{
132  return m_b.size() != 0;
133  }
134  RealMatrix& alpha(){
135  return m_alpha;
136  }
137  RealMatrix const& alpha() const{
138  return m_alpha;
139  }
140  double& alpha(std::size_t example, std::size_t cls){
141  return m_alpha(example, cls);
142  }
143  double const& alpha(std::size_t example, std::size_t cls) const{
144  return m_alpha(example, cls);
145  }
146  RealVector& offset(){
147  SHARK_RUNTIME_CHECK(hasOffset(), "[KernelExpansion::offset] invalid call for object without offset term");
148  return m_b;
149  }
150  RealVector const& offset() const{
151  SHARK_RUNTIME_CHECK(hasOffset(), "[KernelExpansion::offset] invalid call for object without offset term");
152  return m_b;
153  }
154  double& offset(std::size_t cls){
155  SHARK_RUNTIME_CHECK(hasOffset(), "[KernelExpansion::offset] invalid call for object without offset term");
156  return m_b(cls);
157  }
158  double const& offset(std::size_t cls) const{
159  SHARK_RUNTIME_CHECK(hasOffset(), "[KernelExpansion::offset] invalid call for object without offset term");
160  return m_b(cls);
161  }
162 
163  // //////////////////////////////////////////////////////////
164  // //////// ALL THINGS UNDERLYING DATA ////////////////
165  // //////////////////////////////////////////////////////////
166 
167 
168  Data<InputType> const& basis() const {
169  return m_basis;
170  }
171 
173  return m_basis;
174  }
175 
176  /// The sparsify method removes non-support-vectors from
177  /// its set of basis vectors and the coefficient matrix.
178  void sparsify(){
179  std::size_t ic = m_basis.numberOfElements();
180  std::vector<std::size_t> svIndices;
181  for (std::size_t i=0; i != ic; ++i){
182  if (blas::norm_1(row(m_alpha, i)) > 0.0){
183  svIndices.push_back(i);
184  }
185  }
186  //project basis on the support vectors
187  m_basis = toDataset(subset(toView(m_basis),svIndices));
188 
189  //reduce alpha to it's support vector variables
190  RealMatrix a(svIndices.size(), m_alpha.size2());
191  for (std::size_t i=0; i!= svIndices.size(); ++i){
192  noalias(row(a,i)) = row(m_alpha,svIndices[i]);
193  }
194  swap(m_alpha,a);
195  }
196 
197  // //////////////////////////////////////////////////////////
198  // //////// ALL THINGS KERNEL PARAMETERS //////////////
199  // //////////////////////////////////////////////////////////
200 
201  RealVector parameterVector() const{
202  if (hasOffset()){
203  return to_vector(m_alpha) | m_b;
204  }
205  else{
206  return to_vector(m_alpha);
207  }
208  }
209 
210  void setParameterVector(RealVector const& newParameters){
211  SHARK_RUNTIME_CHECK(newParameters.size() == numberOfParameters(), "Invalid size of the parameter vector");
212  std::size_t numParams = m_alpha.size1() * m_alpha.size2();
213  noalias(to_vector(m_alpha)) = subrange(newParameters, 0, numParams);
214  if (hasOffset())
215  noalias(m_b) = subrange(newParameters, numParams, numParams + m_b.size());
216  }
217 
218  std::size_t numberOfParameters() const{
219  if (hasOffset())
220  return m_alpha.size1() * m_alpha.size2() + m_b.size();
221  else
222  return m_alpha.size1() * m_alpha.size2();
223  }
224 
225  // //////////////////////////////////////////////////////////
226  // //////// ALL THINGS EVALUATION //////////////
227  // //////////////////////////////////////////////////////////
228 
229  boost::shared_ptr<State> createState()const{
230  return boost::shared_ptr<State>(new EmptyState());
231  }
232 
234  void eval(BatchInputType const& patterns, BatchOutputType& output)const{
235  std::size_t numPatterns = batchSize(patterns);
236  SHARK_ASSERT(mep_kernel != NULL);
237 
238  output.resize(numPatterns,m_alpha.size2());
239  if (hasOffset())
240  output = repeat(m_b,numPatterns);
241  else
242  output.clear();
243 
244  std::size_t batchStart = 0;
245  for (std::size_t i=0; i != m_basis.numberOfBatches(); i++){
246  std::size_t batchEnd = batchStart+batchSize(m_basis.batch(i));
247  //evaluate kernels
248  //results in a matrix of the form where a column consists of the kernel evaluation of
249  //pattern i with respect to the batch of the basis,this gives a good memory alignment
250  //in the following matrix matrix product
251  RealMatrix kernelEvaluations = (*mep_kernel)(m_basis.batch(i),patterns);
252 
253  //get the part of the alpha matrix which is suitable for this batch
254  auto batchAlpha = subrange(m_alpha,batchStart,batchEnd,0,m_alpha.size2());
255  noalias(output) += prod(trans(kernelEvaluations),batchAlpha);
256  batchStart = batchEnd;
257  }
258  }
259  void eval(BatchInputType const& patterns, BatchOutputType& outputs, State & state)const{
260  eval(patterns, outputs);
261  }
262 
263  // //////////////////////////////////////////////////////////
264  // //////// ALL THINGS SERIALIZATION //////////////
265  // //////////////////////////////////////////////////////////
266 
267  /// From ISerializable, reads a model from an archive
268  void read( InArchive & archive ){
269  SHARK_ASSERT(mep_kernel != NULL);
270 
271  archive >> m_alpha;
272  archive >> m_b;
273  archive >> m_basis;
274  archive >> (*mep_kernel);
275  }
276 
277  /// From ISerializable, writes a model to an archive
278  void write( OutArchive & archive ) const{
279  SHARK_ASSERT(mep_kernel != NULL);
280 
281  archive << m_alpha;
282  archive << m_b;
283  archive << m_basis;
284  archive << const_cast<KernelType const&>(*mep_kernel);//prevent compilation warning
285  }
286 
287 // //////////////////////////////////////////////////////////
288 // //////// MEMBERS //////////////
289 // //////////////////////////////////////////////////////////
290 
291 protected:
292  /// kernel function used in the expansion
293  KernelType* mep_kernel;
294 
295  /// "support" basis vectors
297 
298  /// kernel coefficients
299  RealMatrix m_alpha;
300 
301  /// offset or bias term
302  RealVector m_b;
303 };
304 
305 ///
306 /// \brief Linear classifier in a kernel feature space.
307 ///
308 /// This model is a simple wrapper for the KernelExpansion calculating the arg max
309 /// of the outputs of the model. This is the model used by kernel classifier models like SVMs.
310 ///
311 template<class InputType>
312 struct KernelClassifier: public Classifier<KernelExpansion<InputType> >{
315 
317  { }
319  : Classifier<KernelExpansion<InputType> >(KernelExpansionType(kernel))
320  { }
321  KernelClassifier(KernelExpansionType const& decisionFunction)
322  : Classifier<KernelExpansion<InputType> >(decisionFunction)
323  { }
324 
325  std::string name() const
326  { return "KernelClassifier"; }
327 };
328 
329 
330 }
331 #endif