35 #ifndef SHARK_MODEL_CONCATENATEDMODEL_H 36 #define SHARK_MODEL_CONCATENATEDMODEL_H 39 #include <boost/scoped_ptr.hpp> 40 #include <boost/serialization/scoped_ptr.hpp> 55 template<
class VectorType>
66 {
return "ConcatenatedModel"; }
71 return m_layers.front().model->inputShape();
75 return m_layers.back().model->outputShape();
80 m_layers.push_back({layer,optimize});
91 m_layers[index].optimize = opt;
93 bool inputDerivative =
true;
94 bool parameterDerivative =
true;
95 for(std::size_t k = 0; k != m_layers.size(); ++k){
96 auto const& layer = m_layers[m_layers.size() - k -1];
97 if( layer.optimize && (!layer.model->hasFirstParameterDerivative() || !inputDerivative)){
98 parameterDerivative =
false;
100 if( !layer.model->hasFirstInputDerivative()){
101 inputDerivative =
false;
105 if (parameterDerivative){
109 if (inputDerivative){
117 for(
auto layer: m_layers){
118 if(!layer.optimize)
continue;
119 ParameterVectorType layerParams = layer.model->parameterVector();
120 noalias(subrange(params,pos,pos+layerParams.size())) = layerParams;
121 pos += layerParams.size();
128 for(
auto layer: m_layers){
129 if(!layer.optimize)
continue;
130 ParameterVectorType layerParams = subrange(newParameters,pos,pos+layer.model->numberOfParameters());
131 layer.model->setParameterVector(layerParams);
132 pos += layerParams.size();
137 std::size_t numParams = 0;
138 for(
auto layer: m_layers){
139 if(!layer.optimize)
continue;
140 numParams += layer.model->numberOfParameters();
146 InternalState* state =
new InternalState;
147 for(std::size_t i = 0; i != m_layers.size(); ++i){
148 state->state.push_back(m_layers[i].model->createState());
151 return boost::shared_ptr<State>(state);
155 InternalState
const& s = state.
toState<InternalState>();
156 return s.intermediates[index];
160 InternalState
const& s = state.
toState<InternalState>();
161 return *s.state[index];
165 void eval(BatchInputType
const& patterns, BatchOutputType& outputs)
const {
166 BatchOutputType intermediates;
168 for(
auto layer: m_layers){
169 swap(intermediates,outputs);
170 layer.model->eval(intermediates,outputs);
173 void eval(BatchInputType
const& patterns, BatchOutputType& outputs,
State& state)
const{
174 InternalState& s = state.
toState<InternalState>();
176 for(std::size_t i = 0; i != m_layers.size(); ++i){
178 m_layers[i].model->eval(patterns,s.intermediates[i], *s.state[i]);
180 m_layers[i].model->eval(s.intermediates[i-1],s.intermediates[i], *s.state[i]);
182 outputs = s.intermediates.back();
186 BatchInputType
const& patterns,
187 BatchOutputType
const & outputs,
188 BatchOutputType
const& coefficients,
192 InternalState
const& s = state.
toState<InternalState>();
193 BatchOutputType inputDerivativeLast;
194 BatchOutputType inputDerivative = coefficients;
196 std::size_t paramEnd = gradient.size();
197 for(std::size_t k = 0; k != m_layers.size(); ++k){
198 std::size_t i = m_layers.size() - k -1;
199 BatchInputType
const* pInput = &patterns;
201 pInput = &s.intermediates[i-1];
203 swap(inputDerivativeLast,inputDerivative);
205 if(!m_layers[i].optimize || m_layers[i].model->numberOfParameters() == 0){
207 m_layers[i].model->weightedInputDerivative(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], inputDerivative);
209 RealVector paramDerivative;
211 m_layers[i].model->weightedDerivatives(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], paramDerivative,inputDerivative);
214 m_layers[i].model->weightedParameterDerivative(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], paramDerivative);
216 noalias(subrange(gradient,paramEnd - paramDerivative.size(),paramEnd)) = paramDerivative;
217 paramEnd -= paramDerivative.size();
223 BatchInputType
const& patterns,
224 BatchOutputType
const & outputs,
225 BatchOutputType
const& coefficients,
227 BatchOutputType& derivatives
229 InternalState
const& s = state.
toState<InternalState>();
230 BatchOutputType derivativeLast;
231 derivatives = coefficients;
232 for(std::size_t k = 0; k != m_layers.size(); ++k){
233 std::size_t i = m_layers.size() - k -1;
235 BatchInputType
const* pInput = &patterns;
237 pInput = &s.intermediates[i-1];
239 swap(derivativeLast,derivatives);
240 m_layers[i].model->weightedInputDerivative(*pInput,s.intermediates[i], derivativeLast, *s.state[i], derivatives);
245 BatchInputType
const & patterns,
246 BatchOutputType
const & outputs,
247 BatchOutputType
const & coefficients,
249 RealVector& gradient,
250 BatchInputType& inputDerivative
252 InternalState
const& s = state.
toState<InternalState>();
253 BatchOutputType inputDerivativeLast;
254 inputDerivative = coefficients;
256 std::size_t paramEnd = gradient.size();
257 for(std::size_t k = 0; k != m_layers.size(); ++k){
258 std::size_t i = m_layers.size() - k -1;
259 BatchInputType
const* pInput = &patterns;
261 pInput = &s.intermediates[i-1];
263 swap(inputDerivativeLast,inputDerivative);
265 if(!m_layers[i].optimize || m_layers[i].model->numberOfParameters() == 0){
266 m_layers[i].model->weightedInputDerivative(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], inputDerivative);
268 RealVector paramDerivative;
269 m_layers[i].model->weightedDerivatives(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], paramDerivative,inputDerivative);
270 noalias(subrange(gradient,paramEnd - paramDerivative.size(),paramEnd)) = paramDerivative;
271 paramEnd -= paramDerivative.size();
278 for(
auto layer: m_layers){
279 archive >> *layer.model;
280 archive >> layer.optimize;
286 for(
auto layer: m_layers){
287 archive << *layer.model;
288 archive << layer.optimize;
296 std::vector<Layer> m_layers;
298 struct InternalState:
State{
299 std::vector<boost::shared_ptr<State> > state;
300 std::vector<BatchOutputType> intermediates;
307 template<
class VectorType>
313 sequence.
add(&firstModel,
true);
314 sequence.
add(&secondModel,
true);
318 template<
class VectorType>
324 sequence.
add(&secondModel,
true);