include/shark/Models/AbstractModel.h Source File

Go to the documentation of this file.
 //===========================================================================
 /*!
  * 
  *
  * \brief       base class for all models, as well as a specialized differentiable model
  * 
  * 
  *
  * \author      T.Glasmachers, O. Krause
  * \date        2010
  *
  *
  * \par Copyright 1995-2017 Shark Development Team
  * 
  * <BR><HR>
  * This file is part of Shark.
  * <http://shark-ml.org/>
  * 
  * Shark is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published 
  * by the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
  * 
  * Shark is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  * 
  * You should have received a copy of the GNU Lesser General Public License
  * along with Shark.  If not, see <http://www.gnu.org/licenses/>.
  *
  */
 //===========================================================================
 
 #ifndef SHARK_MODELS_ABSTRACTMODEL_H
 #define SHARK_MODELS_ABSTRACTMODEL_H
 
 #include <shark/Core/Flags.h>
 #include <shark/Core/IParameterizable.h>
 #include <shark/Core/INameable.h>
 #include <shark/Core/State.h>
 #include <shark/Core/Shape.h>
 #include <shark/Core/Random.h>
 #include<shark/Data/Dataset.h>
 
 namespace shark {
 
 ///\brief Base class for all Models
 ///
 /// \par
 /// A model is one of the three fundaments of supervised learning: model, error measure
 /// and an optimization algorithm.
 /// It is a concept of a function which performs a mapping \f$ x \rightarrow f_w(x)\f$.
 /// In contrast to an error function it has two sets of parameters:
 /// The first is the current point to map \f$x\f$, the others are the internal model parameters \f$w\f$
 /// which define the mapping.
 /// Often a model is used to find an optimal mapping for a problem, for example a function which
 /// best fits the points of a given dataset. Therefore, AbstractModel does not only offer
 /// the mapping itself, but also a set of special derivatives with respect to \f$ x \f$ and \f$ w \f$.
 /// Most of the time, only the derivative with respect to \f$ w \f$ is needed, but in some special problems,
 /// like finding optimal stimuli or stacking models, also the input derivative is needed.
 ///
 ///\par Models are optimized for batch processing. This means, that instead of only one data point at a time, it can
 /// evaluate a big set of inputs at the same time, using optimized routines for this task.
 ///
 /// \par
 /// The derivatives are weighted, which means that the derivatives of every single output are added together
 /// weighted by coefficients (see #weightedParameterDerivative). This is an optimization for the chain rule
 /// which is very efficient to calculate most of the time.
 ///
 /// \par
 /// It is allowed to store intermediate values during #eval and use them to speed up calculation of
 /// derivatives. Therefore it must be guaranteed that eval() is called before calculating derivatives.
 /// This is no restriction, since typical error measures need the mapping itself and not only the derivative.
 ///
 /// \par
 /// Models have names and can be serialised and have parameters. The type of the parameter vector
 /// can be set as third argument. By default, this is RealVector.
 template<class InputTypeT, class OutputTypeT, class ParameterType=RealVector>
 class AbstractModel : public IParameterizable<ParameterType>, public INameable, public ISerializable
 {
 public:
     /// \brief Defines the input type of the model.
     typedef InputTypeT InputType;
     /// \brief Defines the output type of the model.
     typedef OutputTypeT OutputType;
     /// \brief Defines the output type of the model compatible with standard functors
     typedef OutputType result_type;
 
     ///\brief Defines the BaseType used by the model (this type). Useful for creating derived models
     typedef AbstractModel<InputTypeT,OutputTypeT,ParameterType> ModelBaseType;
 
     /// \brief defines the batch type of the input type.
     ///
     /// This could for example be std::vector<InputType> but for example for RealVector it could be RealMatrix
     typedef typename Batch<InputType>::type BatchInputType;
     /// \brief defines the batch type of the output type
     typedef typename Batch<OutputType>::type BatchOutputType;
 
 
     AbstractModel() { }
 
     virtual ~AbstractModel() { }
 
     enum Feature {
         HAS_FIRST_PARAMETER_DERIVATIVE  = 1,
         HAS_FIRST_INPUT_DERIVATIVE      = 4,
     };
     SHARK_FEATURE_INTERFACE;
 
     /// \brief Returns true when the first parameter derivative is implemented.
     bool hasFirstParameterDerivative()const{
         return m_features & HAS_FIRST_PARAMETER_DERIVATIVE;
     }
     /// \brief Returns true when the first input derivative is implemented.
     bool hasFirstInputDerivative()const{
         return m_features & HAS_FIRST_INPUT_DERIVATIVE;
     }
     
     ///\brief Returns the expected shape of the input.
     virtual Shape inputShape() const = 0;
     ///\brief Returns the shape of the output.
     virtual Shape outputShape() const = 0;
     
     ///\brief Creates an internal state of the model.
     ///
     ///The state is needed when the derivatives are to be
     ///calculated. Eval can store a state which is then reused to speed up
     ///the calculations of the derivatives. This also allows eval to be
     ///evaluated in parallel!
     virtual boost::shared_ptr<State> createState() const
     {
         if (hasFirstParameterDerivative()
         || hasFirstInputDerivative())
         {
             throw SHARKEXCEPTION("[AbstractModel::createState] createState must be overridden by models with derivatives");
         }
         return boost::shared_ptr<State>(new EmptyState());
     }
 
     /// \brief From ISerializable, reads a model from an archive.
     virtual void read( InArchive & archive ){
         m_features.read(archive);
         RealVector p;
         archive & p;
         this->setParameterVector(p);
     }
 
     /// \brief writes a model to an archive
     ///
     /// the default implementation just saves the parameters, not the structure!
     virtual void write( OutArchive & archive ) const{
         m_features.write(archive);
         RealVector p = this->parameterVector();
         archive & p;
     }
 
     /// \brief  Standard interface for evaluating the response of the model to a batch of patterns.
     ///
     /// \param patterns the inputs of the model
     /// \param outputs the predictions or response of the model to every pattern
     virtual void eval(BatchInputType const & patterns, BatchOutputType& outputs) const{
         boost::shared_ptr<State> state = createState();
         eval(patterns,outputs,*state);
     }
     
     /// \brief  Standard interface for evaluating the response of the model to a batch of patterns.
     ///
     /// \param patterns the inputs of the model
     /// \param outputs the predictions or response of the model to every pattern
     /// \param state intermediate results stored by eval which can be reused for derivative computation.
     virtual void eval(BatchInputType const & patterns, BatchOutputType& outputs, State& state) const = 0;
 
     /// \brief  Standard interface for evaluating the response of the model to a single pattern.
     ///
     /// \param pattern the input of the model
     /// \param output the prediction or response of the model to the pattern
     virtual void eval(InputType const & pattern, OutputType& output)const{
         BatchInputType patternBatch=Batch<InputType>::createBatch(pattern);
         getBatchElement(patternBatch,0) = pattern;
         BatchOutputType outputBatch;
         eval(patternBatch,outputBatch);
         output = getBatchElement(outputBatch,0);
     }
 
     /// \brief Model evaluation as an operator for a whole dataset. This is a convenience function
     ///
     /// \param patterns the input of the model
     /// \returns the responses of the model
     Data<OutputType> operator()(Data<InputType> const& patterns)const{
         return transform(patterns,*this);
     }
 
     /// \brief Model evaluation as an operator for a single pattern. This is a convenience function
     ///
     /// \param pattern the input of the model
     /// \returns the response of the model
     OutputType operator()(InputType const & pattern)const{
         OutputType output;
         eval(pattern,output);
         return output;
     }
 
     /// \brief Model evaluation as an operator for a single pattern. This is a convenience function
     ///
     /// \param patterns the input of the model
     /// \returns the response of the model
     BatchOutputType operator()(BatchInputType const & patterns)const{
         BatchOutputType output;
         eval(patterns,output);
         return output;
     }
 
     /// \brief calculates the weighted sum of derivatives w.r.t the parameters.
     ///
     /// \param  pattern       the patterns to evaluate
     /// \param  coefficients  the coefficients which are used to calculate the weighted sum for every pattern
     /// \param  state intermediate results stored by eval to speed up calculations of the derivatives
     /// \param  derivative    the calculated derivative as sum over all derivates of all patterns
     virtual void weightedParameterDerivative(
         BatchInputType const & pattern,
         BatchOutputType const& outputs,
         BatchOutputType const & coefficients,
         State const& state,
         RealVector& derivative
     )const{
         SHARK_FEATURE_EXCEPTION(HAS_FIRST_PARAMETER_DERIVATIVE);
     }
 
     ///\brief calculates the weighted sum of derivatives w.r.t the inputs
     ///
     /// \param  pattern       the patterns to evaluate
     /// \param  coefficients  the coefficients which are used to calculate the weighted sum for every pattern
     /// \param state intermediate results stored by eval to sped up calculations of the derivatives
     /// \param  derivative    the calculated derivative for every pattern
     virtual void weightedInputDerivative(
         BatchInputType const & pattern,
         BatchOutputType const& outputs,
         BatchOutputType const & coefficients,
         State const& state,
         BatchInputType& derivative
     )const{
         SHARK_FEATURE_EXCEPTION(HAS_FIRST_INPUT_DERIVATIVE);
     }
 
     ///\brief calculates weighted input and parameter derivative at the same time
     ///
     /// Sometimes, both derivatives are needed at the same time. But sometimes, when calculating the
     /// weighted parameter derivative, the input derivative can be calculated for free. This is for example true for
     /// the feed-forward neural networks. However, there exists the obvious default implementation to just calculate
     /// the derivatives one after another.
     /// \param patterns       the patterns to evaluate
     /// \param coefficients  the coefficients which are used to calculate the weighted sum
     /// \param state intermediate results stored by eval to sped up calculations of the derivatives
     /// \param parameterDerivative  the calculated parameter derivative as sum over all derivates of all patterns
     /// \param inputDerivative    the calculated derivative for every pattern
     virtual void weightedDerivatives(
         BatchInputType const & patterns,
         BatchOutputType const& outputs,
         BatchOutputType const & coefficients,
         State const& state,
         RealVector& parameterDerivative,
         BatchInputType& inputDerivative
     )const{
         weightedParameterDerivative(patterns, outputs, coefficients,state,parameterDerivative);
         weightedInputDerivative(patterns, outputs, coefficients,state,inputDerivative);
     }
 };
 
 
 /**
  * \ingroup shark_globals
  *
  * @{
  */
 
 /// \brief Initialize model parameters normally distributed.
 ///
 /// \param model: model to be initialized
 /// \param s: variance of mean-free normal distribution
 template <class InputType, class OutputType>
 void initRandomNormal(AbstractModel<InputType, OutputType>& model, double s){
     RealVector weights(model.numberOfParameters());
     std::generate(weights.begin(), weights.end(), [&](){return random::gauss(random::globalRng,0,s);});
     model.setParameterVector(weights);
 }
 
 
 /// \brief Initialize model parameters uniformly at random.
 ///
 /// \param model model to be initialized
 /// \param lower lower bound of initialization interval
 /// \param upper upper bound of initialization interval
 template <class InputType, class OutputType>
 void initRandomUniform(AbstractModel<InputType, OutputType>& model, double lower, double upper){
     RealVector weights(model.numberOfParameters());
     std::generate(weights.begin(), weights.end(), [&](){return random::uni(random::globalRng,lower,upper);});
     model.setParameterVector(weights);
 }
 
 /** @}*/
 
 namespace detail{
 //Required for correct shape infering of transform
 template<class I, class O, class V>
 struct InferShape<AbstractModel<I,O,V> >{
     static Shape infer(AbstractModel<I,O,V> const& f){return f.outputShape();}
 };
 
 }
 
 }
 
 
 #endif