MultiTaskKernel.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Special kernel classes for multi-task and transfer learning.
6  *
7  *
8  *
9  * \author T. Glasmachers, O.Krause
10  * \date 2012
11  *
12  *
13  * \par Copyright 1995-2017 Shark Development Team
14  *
15  * <BR><HR>
16  * This file is part of Shark.
17  * <http://shark-ml.org/>
18  *
19  * Shark is free software: you can redistribute it and/or modify
20  * it under the terms of the GNU Lesser General Public License as published
21  * by the Free Software Foundation, either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * Shark is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU Lesser General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License
30  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
33 //===========================================================================
34 
35 #ifndef SHARK_MODELS_KERNELS_MULTITASKKERNEL_H
36 #define SHARK_MODELS_KERNELS_MULTITASKKERNEL_H
37 
40 #include <shark/Data/Dataset.h>
41 #include "Impl/MklKernelBase.h"
42 
43 namespace shark {
44 
45 ///
46 /// \brief Aggregation of input data and task index.
47 ///
48 /// \par
49 /// Generic data structure for augmenting arbitrary data
50 /// with an integer. This integer is typically used as a
51 /// task identifier in multi-task and transfer learning.
52 ///
53 template <class InputTypeT>
55 {
56  typedef InputTypeT InputType;
57  /// \brief Default constructor.
59  { }
60 
61  /// \brief Construction from an input and a task index
62  MultiTaskSample(InputType const& i, std::size_t t)
63  : input(i), task(t)
64  { }
65 
66  void read(InArchive& ar){
67  ar >> input;
68  ar >> task;
69  }
70 
71  void write(OutArchive& ar) const{
72  ar << input;
73  ar << task;
74  }
75 
76  InputType input; ///< input data
77  std::size_t task; ///< task index
78 
79 };
80 }
81 
82 #ifndef DOXYGEN_SHOULD_SKIP_THIS
83 
84  BOOST_FUSION_ADAPT_TPL_STRUCT(
85  (InputType),
87  (InputType, input)(std::size_t, task)
88  )
89 
90 namespace shark {
91 template<class InputType>
92 struct Batch< MultiTaskSample<InputType> >{
95  (InputType, input)(std::size_t, task)
96  )
97 };
98 }
99 
100 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
101 namespace shark {
102 
103 ///
104 /// \brief Special "Gaussian-like" kernel function on tasks.
105 ///
106 /// \par
107 /// See<br/>
108 /// Learning Marginal Predictors: Transfer to an Unlabeled Task.
109 /// G. Blanchard, G. Lee, C. Scott.
110 ///
111 /// \par
112 /// This class computes a Gaussian kernel based on the distance
113 /// of empirical distributions in feature space induced by yet
114 /// another kernel. This is useful for multi-task and transfer
115 /// learning. It reduces the definition of a kernel on tasks to
116 /// that of a kernel on inputs, plus a single bandwidth parameter
117 /// for the Gaussian kernel of distributions.
118 ///
119 /// \par
120 /// Given unlabaled data \f$ x_i, t_i \f$ where the x-component
121 /// is an input and the t-component is a task index, the kernel
122 /// on tasks t and t' is defined as
123 /// \f[
124 /// k(t, t') = \exp \left( -\gamma \cdot \left\| \frac{1}{\ell_{t}\ell{t'}} \sum_{i | t_i = t}\sum_{j | t_j = t'} k'(x_i, x_j) \right\|^2 \right)
125 /// \f]
126 /// where k' is an arbitrary kernel on inputs.
127 ///
128 template <class InputTypeT >
130 {
131 private:
132  typedef DiscreteKernel base_type;
133 public:
134  typedef InputTypeT InputType;
137 
138  /// \brief Construction of a Gaussian kernel on tasks.
139  ///
140  /// \param data unlabeled data from multiple tasks
141  /// \param tasks number of tasks in the problem
142  /// \param inputkernel kernel on inputs based on which task similarity is defined
143  /// \param gamma Gaussian bandwidth parameter (also refer to the member functions setGamma and setSigma).
145  Data<MultiTaskSampleType> const& data,
146  std::size_t tasks,
147  KernelType& inputkernel,
148  double gamma)
149  : DiscreteKernel(RealMatrix(tasks, tasks,0.0))
150  , m_data(data)
151  , mpe_inputKernel(&inputkernel)
152  , m_gamma(gamma){
153  computeMatrix();
154  }
155 
156  /// \brief From INameable: return the class name.
157  std::string name() const
158  { return "GaussianTaskKernel"; }
159 
160  RealVector parameterVector() const{
161  return mpe_inputKernel->parameterVector() | m_gamma;
162  }
163 
164  void setParameterVector(RealVector const& newParameters){
165  std::size_t kParams = mpe_inputKernel->numberOfParameters();
166  mpe_inputKernel->setParameterVector(subrange(newParameters,0,kParams));
167  m_gamma = newParameters.back();
168  computeMatrix();
169  }
170 
171  std::size_t numberOfParameters() const{
172  return mpe_inputKernel->numberOfParameters() + 1;
173  }
174 
175  std::size_t numberOfTasks() const
176  { return size(); }
177 
178  /// \brief Kernel bandwidth parameter.
179  double gamma() const
180  { return m_gamma; }
181 
182  /// \brief Kernel width parameter, equivalent to the bandwidth parameter.
183  ///
184  /// The bandwidth gamma and the width sigma are connected: \f$ gamma = 1 / (2 \cdot sigma^2) \f$.
185  double sigma() const
186  { return (1.0 / std::sqrt(2 * m_gamma)); }
187 
188  // \brief Set the kernel bandwidth parameter.
189  void setGamma(double gamma)
190  {
191  SHARK_ASSERT(gamma > 0.0);
192  m_gamma = gamma;
193  }
194 
195  /// \brief Set the kernel width (equivalent to setting the bandwidth).
196  ///
197  /// The bandwidth gamma and the width sigma are connected: \f$ gamma = 1 / (2 \cdot sigma^2) \f$.
198  void setWidth(double sigma)
199  {
200  SHARK_ASSERT(sigma > 0.0);
201  m_gamma = 1.0 / (2.0 * sigma * sigma);
202  }
203 
204  /// From ISerializable.
205  void read(InArchive& ar)
206  {
207  base_type::read(ar);
208  ar >> m_gamma;
209  }
210 
211  /// From ISerializable.
212  void write(OutArchive& ar) const
213  {
214  base_type::write(ar);
215  ar << m_gamma;
216  }
217 
218 protected:
219 
220  /// \brief Compute the Gram matrix of the task kernel.
221  ///
222  /// \par
223  /// Here is the real meat. This function implements the
224  /// kernel function defined in<br/>
225  /// Learning Marginal Predictors: Transfer to an Unlabeled Task.
226  /// G. Blanchard, G. Lee, C. Scott.
227  ///
228  /// \par
229  /// In a first step the function computes the inner products
230  /// of the task-wise empirical distributions, represented by
231  /// their mean elements in the kernel-induced feature space.
232  /// In a second step this information is used for the computation
233  /// of squared distances between empirical distribution, which
234  /// allows for the straightforward computation of a Gaussian
235  /// kernel.
237  {
238  // count number of examples for each task
239  const std::size_t tasks = numberOfTasks();
240  std::size_t elements = m_data.numberOfElements();
241  std::vector<std::size_t> ell(tasks, 0);
242  for (std::size_t i=0; i<elements; i++)
243  ell[m_data.element(i).task]++;
244 
245  // compute inner products between mean elements of empirical distributions
246  for (std::size_t i=0; i<elements; i++){
247  const std::size_t task_i = m_data.element(i).task;
248  for (std::size_t j=0; j<i; j++){
249  const std::size_t task_j = m_data.element(j).task;
250  const double k = mpe_inputKernel->eval(m_data.element(i).input, m_data.element(j).input);
251  base_type::m_matrix(task_i, task_j) += k;
252  base_type::m_matrix(task_j, task_i) += k;
253  }
254  const double k = mpe_inputKernel->eval(m_data.element(i).input, m_data.element(i).input);
255  base_type::m_matrix(task_i, task_i) += k;
256  }
257  for (std::size_t i=0; i<tasks; i++){
258  if (ell[i] == 0) continue;
259  for (std::size_t j=0; j<tasks; j++){
260  if (ell[j] == 0) continue;
261  base_type::m_matrix(i, j) /= (double)(ell[i] * ell[j]);
262  }
263  }
264 
265  // compute Gaussian kernel
266  for (std::size_t i=0; i<tasks; i++)
267  {
268  const double norm2_i = base_type::m_matrix(i, i);
269  for (std::size_t j=0; j<i; j++)
270  {
271  const double norm2_j = base_type::m_matrix(j, j);
272  const double dist2 = norm2_i + norm2_j - 2.0 * base_type::m_matrix(i, j);
273  const double k = std::exp(-m_gamma * dist2);
274  base_type::m_matrix(i, j) = base_type::m_matrix(j, i) = k;
275  }
276  }
277  for (std::size_t i=0; i<tasks; i++) base_type::m_matrix(i, i) = 1.0;
278  }
279 
280 
281  Data<MultiTaskSampleType > const& m_data; ///< multi-task data
282  KernelType* mpe_inputKernel; ///< kernel on inputs
283  double m_gamma; ///< bandwidth of the Gaussian task kernel
284 };
285 
286 
287 ///
288 /// \brief Special kernel function for multi-task and transfer learning.
289 ///
290 /// \par
291 /// This class is a convenience wrapper for the product of an
292 /// input kernel and a kernel on tasks. It also encapsulates
293 /// the projection from multi-task learning data (see class
294 /// MultiTaskSample) to inputs and task indices.
295 ///
296 template <class InputTypeT>
298 : private detail::MklKernelBase<MultiTaskSample<InputTypeT> >
299 , public ProductKernel< MultiTaskSample<InputTypeT> >
300 {
301 private:
302  typedef detail::MklKernelBase<MultiTaskSample<InputTypeT> > base_type1;
304 public:
306  /// \brief Constructor.
307  ///
308  /// \param inputkernel kernel on inputs
309  /// \param taskkernel kernel on task indices
311  InputKernelType* inputkernel,
312  DiscreteKernel* taskkernel)
313  :base_type1(boost::fusion::make_vector(inputkernel,taskkernel))
314  ,base_type2(base_type1::makeKernelVector())
315  {}
316 
317  /// \brief From INameable: return the class name.
318  std::string name() const
319  { return "MultiTaskKernel"; }
320 };
321 
322 } // namespace shark {
323 
324 #endif