ConcatenatedModel.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief concatenation of two models, with type erasure
6  *
7  *
8  *
9  * \author O. Krause
10  * \date 2010-2011
11  *
12  *
13  * \par Copyright 1995-2017 Shark Development Team
14  *
15  * <BR><HR>
16  * This file is part of Shark.
17  * <http://shark-ml.org/>
18  *
19  * Shark is free software: you can redistribute it and/or modify
20  * it under the terms of the GNU Lesser General Public License as published
21  * by the Free Software Foundation, either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * Shark is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU Lesser General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License
30  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
33 //===========================================================================
34 
35 #ifndef SHARK_MODEL_CONCATENATEDMODEL_H
36 #define SHARK_MODEL_CONCATENATEDMODEL_H
37 
39 #include <boost/scoped_ptr.hpp>
40 #include <boost/serialization/scoped_ptr.hpp>
41 
42 namespace shark {
43 
44 ///\brief ConcatenatedModel concatenates two models such that the output of the first model is input to the second.
45 ///
46 ///Sometimes a series of models is needed to generate the desired output. For example when input data needs to be
47 ///normalized before it can be put into the trained model. In this case, the ConcatenatedModel can be used to
48 ///represent this series as one model.
49 ///The easiest way to do is is using the operator >> of AbstractModel:
50 ///ConcatenatedModel<InputType,OutputType> model = model1>>model2;
51 ///InputType must be the type of input model1 receives and model2 the output of model2. The output of model1 and input
52 ///of model2 must match. Another way of construction is calling the constructor of ConcatenatedModel using the constructor:
53 /// ConcatenatedModel<InputType,OutputType> model (&modell,&model2);
54 ///warning: model1 and model2 must outlive model. When they are destroyed first, behavior is undefined.
55 template<class VectorType>
56 class ConcatenatedModel: public AbstractModel<VectorType, VectorType, VectorType> {
57 private:
59 public:
63 
64  /// \brief From INameable: return the class name.
65  std::string name() const
66  { return "ConcatenatedModel"; }
67 
68 
69  ///\brief Returns the expected shape of the input
70  Shape inputShape() const{
71  return m_layers.front().model->inputShape();
72  }
73  ///\brief Returns the shape of the output
74  Shape outputShape() const{
75  return m_layers.back().model->outputShape();
76  }
77 
78 
79  void add(AbstractModel<VectorType, VectorType>* layer, bool optimize){
80  m_layers.push_back({layer,optimize});
81  enableModelOptimization(m_layers.size()-1, optimize);//recompute capabilities
82  }
83 
84  ///\brief sets whether the parameters of the index-th model should be optimized
85  ///
86  /// If the model has non-differentiable submodels disabling those will make
87  /// the whole model differentiable.
88  /// Note that the models are ordered as model0 >> model1>> model2>>...
89  void enableModelOptimization(std::size_t index, bool opt){
90  SIZE_CHECK(index < m_layers.size());
91  m_layers[index].optimize = opt;
92  this->m_features.reset();
93  bool inputDerivative = true;
94  bool parameterDerivative = true;
95  for(std::size_t k = 0; k != m_layers.size(); ++k){
96  auto const& layer = m_layers[m_layers.size() - k -1];//we iterate backwards through the layers
97  if( layer.optimize && (!layer.model->hasFirstParameterDerivative() || !inputDerivative)){
98  parameterDerivative = false;
99  }
100  if( !layer.model->hasFirstInputDerivative()){
101  inputDerivative = false;
102  }
103  }
104 
105  if (parameterDerivative){
107  }
108 
109  if (inputDerivative){
111  }
112 
113  }
114  ParameterVectorType parameterVector() const {
115  ParameterVectorType params(numberOfParameters());
116  std::size_t pos = 0;
117  for(auto layer: m_layers){
118  if(!layer.optimize) continue;
119  ParameterVectorType layerParams = layer.model->parameterVector();
120  noalias(subrange(params,pos,pos+layerParams.size())) = layerParams;
121  pos += layerParams.size();
122  }
123  return params;
124  }
125 
126  void setParameterVector(ParameterVectorType const& newParameters) {
127  std::size_t pos = 0;
128  for(auto layer: m_layers){
129  if(!layer.optimize) continue;
130  ParameterVectorType layerParams = subrange(newParameters,pos,pos+layer.model->numberOfParameters());
131  layer.model->setParameterVector(layerParams);
132  pos += layerParams.size();
133  }
134  }
135 
136  std::size_t numberOfParameters() const{
137  std::size_t numParams = 0;
138  for(auto layer: m_layers){
139  if(!layer.optimize) continue;
140  numParams += layer.model->numberOfParameters();
141  }
142  return numParams;
143  }
144 
145  boost::shared_ptr<State> createState()const{
146  InternalState* state = new InternalState;
147  for(std::size_t i = 0; i != m_layers.size(); ++i){
148  state->state.push_back(m_layers[i].model->createState());
149  state->intermediates.push_back(BatchOutputType());
150  }
151  return boost::shared_ptr<State>(state);
152  }
153 
154  BatchOutputType const& hiddenResponses(State const& state, std::size_t index)const{
155  InternalState const& s = state.toState<InternalState>();
156  return s.intermediates[index];
157  }
158 
159  State const& hiddenState(State const& state, std::size_t index)const{
160  InternalState const& s = state.toState<InternalState>();
161  return *s.state[index];
162  }
163 
164  using base_type::eval;
165  void eval(BatchInputType const& patterns, BatchOutputType& outputs)const {
166  BatchOutputType intermediates;
167  outputs = patterns;
168  for(auto layer: m_layers){
169  swap(intermediates,outputs);
170  layer.model->eval(intermediates,outputs);
171  }
172  }
173  void eval(BatchInputType const& patterns, BatchOutputType& outputs, State& state)const{
174  InternalState& s = state.toState<InternalState>();
175  outputs = patterns;
176  for(std::size_t i = 0; i != m_layers.size(); ++i){
177  if(i == 0)
178  m_layers[i].model->eval(patterns,s.intermediates[i], *s.state[i]);
179  else
180  m_layers[i].model->eval(s.intermediates[i-1],s.intermediates[i], *s.state[i]);
181  }
182  outputs = s.intermediates.back();
183  }
184 
186  BatchInputType const& patterns,
187  BatchOutputType const & outputs,
188  BatchOutputType const& coefficients,
189  State const& state,
190  RealVector& gradient
191  )const{
192  InternalState const& s = state.toState<InternalState>();
193  BatchOutputType inputDerivativeLast;
194  BatchOutputType inputDerivative = coefficients;
195  gradient.resize(numberOfParameters());
196  std::size_t paramEnd = gradient.size();
197  for(std::size_t k = 0; k != m_layers.size(); ++k){
198  std::size_t i = m_layers.size() - k -1;//we iterate backwards through the layers
199  BatchInputType const* pInput = &patterns;
200  if(i != 0)
201  pInput = &s.intermediates[i-1];
202 
203  swap(inputDerivativeLast,inputDerivative);
204  //if the current layer does not need to be optimized, we just check whether we have to compute the chain rule
205  if(!m_layers[i].optimize || m_layers[i].model->numberOfParameters() == 0){
206  if(i != 0) //check, if we are done, the input layer does not need to compute anything
207  m_layers[i].model->weightedInputDerivative(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], inputDerivative);
208  }else{
209  RealVector paramDerivative;
210  if(i != 0){//if we are in an intermediates layer, compute chain rule
211  m_layers[i].model->weightedDerivatives(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], paramDerivative,inputDerivative);
212  }
213  else{//lowest layer only needs to compute parameter derivative
214  m_layers[i].model->weightedParameterDerivative(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], paramDerivative);
215  }
216  noalias(subrange(gradient,paramEnd - paramDerivative.size(),paramEnd)) = paramDerivative;
217  paramEnd -= paramDerivative.size();
218  }
219  }
220  }
221 
223  BatchInputType const& patterns,
224  BatchOutputType const & outputs,
225  BatchOutputType const& coefficients,
226  State const& state,
227  BatchOutputType& derivatives
228  )const{
229  InternalState const& s = state.toState<InternalState>();
230  BatchOutputType derivativeLast;
231  derivatives = coefficients;
232  for(std::size_t k = 0; k != m_layers.size(); ++k){
233  std::size_t i = m_layers.size() - k -1;//we iterate backwards through the layers
234 
235  BatchInputType const* pInput = &patterns;
236  if(i != 0)
237  pInput = &s.intermediates[i-1];
238 
239  swap(derivativeLast,derivatives);
240  m_layers[i].model->weightedInputDerivative(*pInput,s.intermediates[i], derivativeLast, *s.state[i], derivatives);
241  }
242  }
243 
244  virtual void weightedDerivatives(
245  BatchInputType const & patterns,
246  BatchOutputType const & outputs,
247  BatchOutputType const & coefficients,
248  State const& state,
249  RealVector& gradient,
250  BatchInputType& inputDerivative
251  )const{
252  InternalState const& s = state.toState<InternalState>();
253  BatchOutputType inputDerivativeLast;
254  inputDerivative = coefficients;
255  gradient.resize(numberOfParameters());
256  std::size_t paramEnd = gradient.size();
257  for(std::size_t k = 0; k != m_layers.size(); ++k){
258  std::size_t i = m_layers.size() - k -1;//we iterate backwards through the layers
259  BatchInputType const* pInput = &patterns;
260  if(i != 0)
261  pInput = &s.intermediates[i-1];
262 
263  swap(inputDerivativeLast,inputDerivative);
264  //if the current layer does not need to be optimized, we just check whether we have to compute the chain rule
265  if(!m_layers[i].optimize || m_layers[i].model->numberOfParameters() == 0){
266  m_layers[i].model->weightedInputDerivative(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], inputDerivative);
267  }else{
268  RealVector paramDerivative;
269  m_layers[i].model->weightedDerivatives(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], paramDerivative,inputDerivative);
270  noalias(subrange(gradient,paramEnd - paramDerivative.size(),paramEnd)) = paramDerivative;
271  paramEnd -= paramDerivative.size();
272  }
273  }
274  }
275 
276  /// From ISerializable
277  void read( InArchive & archive ){
278  for(auto layer: m_layers){
279  archive >> *layer.model;
280  archive >> layer.optimize;
281  }
282  }
283 
284  /// From ISerializable
285  void write( OutArchive & archive ) const{
286  for(auto layer: m_layers){
287  archive << *layer.model;
288  archive << layer.optimize;
289  }
290  }
291 private:
292  struct Layer{
294  bool optimize;
295  };
296  std::vector<Layer> m_layers;
297 
298  struct InternalState: State{
299  std::vector<boost::shared_ptr<State> > state;
300  std::vector<BatchOutputType> intermediates;
301  };
302 };
303 
304 
305 
306 ///\brief Connects two AbstractModels so that the output of the first model is the input of the second.
307 template<class VectorType>
311 ){
313  sequence.add(&firstModel, true);
314  sequence.add(&secondModel, true);
315  return sequence;
316 }
317 
318 template<class VectorType>
320  ConcatenatedModel<VectorType> const& firstModel,
322 ){
323  ConcatenatedModel<VectorType> sequence = firstModel;
324  sequence.add(&secondModel, true);
325  return sequence;
326 }
327 
328 
329 }
330 #endif