AbstractModel.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief base class for all models, as well as a specialized differentiable model
6  *
7  *
8  *
9  * \author T.Glasmachers, O. Krause
10  * \date 2010
11  *
12  *
13  * \par Copyright 1995-2017 Shark Development Team
14  *
15  * <BR><HR>
16  * This file is part of Shark.
17  * <http://shark-ml.org/>
18  *
19  * Shark is free software: you can redistribute it and/or modify
20  * it under the terms of the GNU Lesser General Public License as published
21  * by the Free Software Foundation, either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * Shark is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU Lesser General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License
30  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
33 //===========================================================================
34 
35 #ifndef SHARK_MODELS_ABSTRACTMODEL_H
36 #define SHARK_MODELS_ABSTRACTMODEL_H
37 
38 #include <shark/Core/Flags.h>
40 #include <shark/Core/INameable.h>
41 #include <shark/Core/State.h>
42 #include <shark/Core/Shape.h>
43 #include <shark/Core/Random.h>
44 #include<shark/Data/Dataset.h>
45 
46 namespace shark {
47 
48 ///\brief Base class for all Models
49 ///
50 /// \par
51 /// A model is one of the three fundaments of supervised learning: model, error measure
52 /// and an optimization algorithm.
53 /// It is a concept of a function which performs a mapping \f$ x \rightarrow f_w(x)\f$.
54 /// In contrast to an error function it has two sets of parameters:
55 /// The first is the current point to map \f$x\f$, the others are the internal model parameters \f$w\f$
56 /// which define the mapping.
57 /// Often a model is used to find an optimal mapping for a problem, for example a function which
58 /// best fits the points of a given dataset. Therefore, AbstractModel does not only offer
59 /// the mapping itself, but also a set of special derivatives with respect to \f$ x \f$ and \f$ w \f$.
60 /// Most of the time, only the derivative with respect to \f$ w \f$ is needed, but in some special problems,
61 /// like finding optimal stimuli or stacking models, also the input derivative is needed.
62 ///
63 ///\par Models are optimized for batch processing. This means, that instead of only one data point at a time, it can
64 /// evaluate a big set of inputs at the same time, using optimized routines for this task.
65 ///
66 /// \par
67 /// The derivatives are weighted, which means that the derivatives of every single output are added together
68 /// weighted by coefficients (see #weightedParameterDerivative). This is an optimization for the chain rule
69 /// which is very efficient to calculate most of the time.
70 ///
71 /// \par
72 /// It is allowed to store intermediate values during #eval and use them to speed up calculation of
73 /// derivatives. Therefore it must be guaranteed that eval() is called before calculating derivatives.
74 /// This is no restriction, since typical error measures need the mapping itself and not only the derivative.
75 ///
76 /// \par
77 /// Models have names and can be serialised and have parameters. The type of the parameter vector
78 /// can be set as third argument. By default, this is RealVector.
79 template<class InputTypeT, class OutputTypeT, class ParameterType=RealVector>
80 class AbstractModel : public IParameterizable<ParameterType>, public INameable, public ISerializable
81 {
82 public:
83  /// \brief Defines the input type of the model.
84  typedef InputTypeT InputType;
85  /// \brief Defines the output type of the model.
86  typedef OutputTypeT OutputType;
87  /// \brief Defines the output type of the model compatible with standard functors
88  typedef OutputType result_type;
89 
90  ///\brief Defines the BaseType used by the model (this type). Useful for creating derived models
92 
93  /// \brief defines the batch type of the input type.
94  ///
95  /// This could for example be std::vector<InputType> but for example for RealVector it could be RealMatrix
97  /// \brief defines the batch type of the output type
99 
100 
102 
103  virtual ~AbstractModel() { }
104 
105  enum Feature {
108  };
110 
111  /// \brief Returns true when the first parameter derivative is implemented.
114  }
115  /// \brief Returns true when the first input derivative is implemented.
118  }
119 
120  ///\brief Returns the expected shape of the input.
121  virtual Shape inputShape() const = 0;
122  ///\brief Returns the shape of the output.
123  virtual Shape outputShape() const = 0;
124 
125  ///\brief Creates an internal state of the model.
126  ///
127  ///The state is needed when the derivatives are to be
128  ///calculated. Eval can store a state which is then reused to speed up
129  ///the calculations of the derivatives. This also allows eval to be
130  ///evaluated in parallel!
131  virtual boost::shared_ptr<State> createState() const
132  {
135  {
136  throw SHARKEXCEPTION("[AbstractModel::createState] createState must be overridden by models with derivatives");
137  }
138  return boost::shared_ptr<State>(new EmptyState());
139  }
140 
141  /// \brief From ISerializable, reads a model from an archive.
142  virtual void read( InArchive & archive ){
143  m_features.read(archive);
144  RealVector p;
145  archive & p;
146  this->setParameterVector(p);
147  }
148 
149  /// \brief writes a model to an archive
150  ///
151  /// the default implementation just saves the parameters, not the structure!
152  virtual void write( OutArchive & archive ) const{
153  m_features.write(archive);
154  RealVector p = this->parameterVector();
155  archive & p;
156  }
157 
158  /// \brief Standard interface for evaluating the response of the model to a batch of patterns.
159  ///
160  /// \param patterns the inputs of the model
161  /// \param outputs the predictions or response of the model to every pattern
162  virtual void eval(BatchInputType const & patterns, BatchOutputType& outputs) const{
163  boost::shared_ptr<State> state = createState();
164  eval(patterns,outputs,*state);
165  }
166 
167  /// \brief Standard interface for evaluating the response of the model to a batch of patterns.
168  ///
169  /// \param patterns the inputs of the model
170  /// \param outputs the predictions or response of the model to every pattern
171  /// \param state intermediate results stored by eval which can be reused for derivative computation.
172  virtual void eval(BatchInputType const & patterns, BatchOutputType& outputs, State& state) const = 0;
173 
174  /// \brief Standard interface for evaluating the response of the model to a single pattern.
175  ///
176  /// \param pattern the input of the model
177  /// \param output the prediction or response of the model to the pattern
178  virtual void eval(InputType const & pattern, OutputType& output)const{
179  BatchInputType patternBatch=Batch<InputType>::createBatch(pattern);
180  getBatchElement(patternBatch,0) = pattern;
181  BatchOutputType outputBatch;
182  eval(patternBatch,outputBatch);
183  output = getBatchElement(outputBatch,0);
184  }
185 
186  /// \brief Model evaluation as an operator for a whole dataset. This is a convenience function
187  ///
188  /// \param patterns the input of the model
189  /// \returns the responses of the model
191  return transform(patterns,*this);
192  }
193 
194  /// \brief Model evaluation as an operator for a single pattern. This is a convenience function
195  ///
196  /// \param pattern the input of the model
197  /// \returns the response of the model
198  OutputType operator()(InputType const & pattern)const{
199  OutputType output;
200  eval(pattern,output);
201  return output;
202  }
203 
204  /// \brief Model evaluation as an operator for a single pattern. This is a convenience function
205  ///
206  /// \param patterns the input of the model
207  /// \returns the response of the model
208  BatchOutputType operator()(BatchInputType const & patterns)const{
209  BatchOutputType output;
210  eval(patterns,output);
211  return output;
212  }
213 
214  /// \brief calculates the weighted sum of derivatives w.r.t the parameters.
215  ///
216  /// \param pattern the patterns to evaluate
217  /// \param coefficients the coefficients which are used to calculate the weighted sum for every pattern
218  /// \param state intermediate results stored by eval to speed up calculations of the derivatives
219  /// \param derivative the calculated derivative as sum over all derivates of all patterns
221  BatchInputType const & pattern,
222  BatchOutputType const& outputs,
223  BatchOutputType const & coefficients,
224  State const& state,
225  RealVector& derivative
226  )const{
228  }
229 
230  ///\brief calculates the weighted sum of derivatives w.r.t the inputs
231  ///
232  /// \param pattern the patterns to evaluate
233  /// \param coefficients the coefficients which are used to calculate the weighted sum for every pattern
234  /// \param state intermediate results stored by eval to sped up calculations of the derivatives
235  /// \param derivative the calculated derivative for every pattern
237  BatchInputType const & pattern,
238  BatchOutputType const& outputs,
239  BatchOutputType const & coefficients,
240  State const& state,
241  BatchInputType& derivative
242  )const{
244  }
245 
246  ///\brief calculates weighted input and parameter derivative at the same time
247  ///
248  /// Sometimes, both derivatives are needed at the same time. But sometimes, when calculating the
249  /// weighted parameter derivative, the input derivative can be calculated for free. This is for example true for
250  /// the feed-forward neural networks. However, there exists the obvious default implementation to just calculate
251  /// the derivatives one after another.
252  /// \param patterns the patterns to evaluate
253  /// \param coefficients the coefficients which are used to calculate the weighted sum
254  /// \param state intermediate results stored by eval to sped up calculations of the derivatives
255  /// \param parameterDerivative the calculated parameter derivative as sum over all derivates of all patterns
256  /// \param inputDerivative the calculated derivative for every pattern
257  virtual void weightedDerivatives(
258  BatchInputType const & patterns,
259  BatchOutputType const& outputs,
260  BatchOutputType const & coefficients,
261  State const& state,
262  RealVector& parameterDerivative,
263  BatchInputType& inputDerivative
264  )const{
265  weightedParameterDerivative(patterns, outputs, coefficients,state,parameterDerivative);
266  weightedInputDerivative(patterns, outputs, coefficients,state,inputDerivative);
267  }
268 };
269 
270 
271 /**
272  * \ingroup shark_globals
273  *
274  * @{
275  */
276 
277 /// \brief Initialize model parameters normally distributed.
278 ///
279 /// \param model: model to be initialized
280 /// \param s: variance of mean-free normal distribution
281 template <class InputType, class OutputType>
283  RealVector weights(model.numberOfParameters());
284  std::generate(weights.begin(), weights.end(), [&](){return random::gauss(random::globalRng,0,s);});
285  model.setParameterVector(weights);
286 }
287 
288 
289 /// \brief Initialize model parameters uniformly at random.
290 ///
291 /// \param model model to be initialized
292 /// \param lower lower bound of initialization interval
293 /// \param upper upper bound of initialization interval
294 template <class InputType, class OutputType>
295 void initRandomUniform(AbstractModel<InputType, OutputType>& model, double lower, double upper){
296  RealVector weights(model.numberOfParameters());
297  std::generate(weights.begin(), weights.end(), [&](){return random::uni(random::globalRng,lower,upper);});
298  model.setParameterVector(weights);
299 }
300 
301 /** @}*/
302 
303 namespace detail{
304 //Required for correct shape infering of transform
305 template<class I, class O, class V>
306 struct InferShape<AbstractModel<I,O,V> >{
307  static Shape infer(AbstractModel<I,O,V> const& f){return f.outputShape();}
308 };
309 
310 }
311 
312 }
313 
314 
315 #endif