RNNet.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Offers the functions to create and to work with a
6  * recurrent neural network.
7  *
8  *
9  *
10  * \author O. Krause
11  * \date 2010
12  *
13  *
14  * \par Copyright 1995-2017 Shark Development Team
15  *
16  * <BR><HR>
17  * This file is part of Shark.
18  * <http://shark-ml.org/>
19  *
20  * Shark is free software: you can redistribute it and/or modify
21  * it under the terms of the GNU Lesser General Public License as published
22  * by the Free Software Foundation, either version 3 of the License, or
23  * (at your option) any later version.
24  *
25  * Shark is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public License
31  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32  *
33  */
34 #ifndef SHARK_MODELS_RNNET_H
35 #define SHARK_MODELS_RNNET_H
36 
37 #include <shark/Core/DLLSupport.h>
40 
41 namespace shark{
42 
43 //! \brief A recurrent neural network regression model that learns
44 //! with Back Propagation Through Time
45 //!
46 //! This class defines a recurrent neural network regression
47 //! model. Its inputs and output types are Matrices which represet
48 //! sequences of inputs. The gradient is calculated via
49 //! BackPropagationTroughTime (BPTT).
50 //!
51 //! The inputs of this Network are not sigmoidal, but the hidden and output
52 //! neurons are.
53 //!
54 //! This class is optimized for batch learning. See OnlineRNNet for an online
55 //! version.
56 class RNNet:public AbstractModel<Sequence,Sequence >
57 {
58 private:
59  struct InternalState: public State{
60  //! Activation of the neurons after processing the time series.
61  //! m_timeActivation(b,t,i) is a 3-dimensional array, the first dimension
62  //! returns the i-th element of the batch, the second dimension returns
63  //! the activation for timestep t, the third dimension the activation
64  //! of the neuron at the timestep of the batch element.
65  std::vector<Sequence> timeActivation;
66  };
67 public:
68 
69  //! creates a neural network with a potentially shared structure
70  //! \param structure the structure of this neural network. It can be shared between multiple instances or with then
71  //! online version of this net.
72  RNNet(RecurrentStructure* structure):mpe_structure(structure){
73  SHARK_RUNTIME_CHECK(mpe_structure,"[RNNet] structure is not allowed to be empty");
75  }
76 
77  /// \brief From INameable: return the class name.
78  std::string name() const
79  { return "RNNet"; }
80 
81  //! \brief Sets the warm up sequence
82  //!
83  //! Usually, when processing a new data series all the
84  //! `states' of the network are reset to zero. By `states' I mean the
85  //! buffered activations to which time-delayed synapses refer
86  //! to. Effectively, this means one assumes a zero activation history.
87  //!
88  //! The advantage of this is, that it makes the model behavior well
89  //! defined. The disadvantage is that you can't predict a time series
90  //! well with a zero history. Thus, one should use a data series to
91  //! initialize the network, i.e., to let it converge into a `normal'
92  //! dynamic state from which prediction of new data is possible.
93  //! This phase is called the warmup phase.
94  //!
95  //! With this method, the warm up sequence can be set, which is then used
96  //! during the warm up phase.
97  //!
98  //! \param warmUpSequence the warm up sequence used before each batch of data. The
99  //! default is an empty sequence
100  void setWarmUpSequence(Sequence const& warmUpSequence = Sequence()){
101  m_warmUpSequence = warmUpSequence;
102  }
103 
104  boost::shared_ptr<State> createState()const{
105  return boost::shared_ptr<State>(new InternalState());
106  }
107 
108  //! \brief Feed a data series to the model. The output (i.e., the time
109  //! series of activations of the output neurons) it copied into the
110  //! output buffer.
111  //!
112  //! \param pattern batch of timeseries for the network.
113  //! \param output Used to store the outputs of the network.
114  //! \param state stores additional information which can be reused for the computation of the derivative
115  SHARK_EXPORT_SYMBOL void eval(BatchInputType const& pattern, BatchOutputType& output, State& state)const;
117 
118  /// obtain the input dimension
119  std::size_t inputSize() const{
120  return mpe_structure->inputs();
121  }
122 
123  /// obtain the output dimension
124  std::size_t outputSize() const{
125  return mpe_structure->outputs();
126  }
127 
128  //!\brief calculates the weighted sum of gradients w.r.t the parameters
129  //!
130  //!The RNNet uses internally BPTT to calculate the gradient.
131  //! Stores the BPTT error values for the calculation
132  //! of the gradient.
133  //!
134  //! Given the gradient of the loss function \f$ \frac{\delta L(t)}{\delta y_i(t)}\f$,
135  //! the BPTT error is calculated as
136  //!\f[ \frac{\delta E}{\delta y_i(t)}= \mu_i \frac{\delta L(t)}{\delta y_i(t)}
137  //! +\sum_{j=1}^N \frac{\delta E}{\delta y_i(t+1)} y_i'(t+1) w^R_{ij} \f]
138  //! Where \f$ L \f$ is the loss, \f$ y_i \f$ the ith neuron and
139  //! \f$ w^R_ij\f$ is the recurrent weight of the connection from neuron i to j.
140  //! The factor \f$ \mu_i \f$ is one of the neuron is an output neuron, else zero.
141  //!
142  //! \todo expand documentation
143  //!
144  //! \param patterns the batch of patterns to evaluate
145  //! \param coefficients the coefficients which are used to calculate the weighted sum
146  //! \param state the last state stord during eval
147  //! \param gradient the calculated gradient
149  BatchInputType const& patterns, BatchInputType const& coefficients, State const& state,
150  RealVector& gradient
151  )const;
152 
153  //! get internal parameters of the model
154  RealVector parameterVector() const{
155  return mpe_structure->parameterVector();
156  }
157 
158  //! set internal parameters of the model
159  //! \param newParameters the new parameters of the model. this changes the internal referenced RecurrentStructure
160  void setParameterVector(RealVector const& newParameters){
161  mpe_structure->setParameterVector(newParameters);
162  }
163 
164  //!number of parameters of the network
165  std::size_t numberOfParameters() const{
166  return mpe_structure->parameters();
167  }
168 protected:
169  //! the warm up sequence of the network
171 
172  //! the topology of the network.
174 
175  RealMatrix m_errorDerivative;
176 };
177 }
178 
179 #endif //RNNET_H
180 
181 
182 
183 
184 
185 
186 
187 
188