32 #ifndef SHARK_MODELS_FFNET_H 33 #define SHARK_MODELS_FFNET_H 37 #include <boost/serialization/vector.hpp> 68 template<
class H
iddenNeuron,
class OutputNeuron>
71 struct InternalState:
public State{
88 void resize(std::size_t neurons, std::size_t patterns){
89 responses.resize(neurons,patterns);
100 :m_numberOfNeurons(0),m_inputNeurons(0),m_outputNeurons(0){
101 m_features|=HAS_FIRST_PARAMETER_DERIVATIVE;
102 m_features|=HAS_FIRST_INPUT_DERIVATIVE;
111 return m_inputNeurons;
115 return m_outputNeurons;
119 return m_numberOfNeurons;
123 return numberOfNeurons() - inputSize() -outputSize();
128 return m_layerMatrix;
133 return m_layerMatrix[layer];
136 void setLayer(std::size_t layerNumber, RealMatrix
const& m, RealVector
const& bias){
138 SIZE_CHECK(m.size1() == m_layerMatrix[layerNumber].size1());
139 SIZE_CHECK(m.size2() == m_layerMatrix[layerNumber].size2());
140 m_layerMatrix[layerNumber] = m;
141 std::size_t start = 0;
142 for(std::size_t i = 0; i != layerNumber; ++i){
143 start += m_layerMatrix[i].size1();
147 setParameterVector(parameterVector());
152 return m_backpropMatrix;
159 return m_inputOutputShortcut;
164 return m_hiddenNeuron;
168 return m_outputNeuron;
173 return m_hiddenNeuron;
177 return m_outputNeuron;
190 RealVector
bias(std::size_t layer)
const{
191 std::size_t start = 0;
192 for(std::size_t i = 0; i != layer; ++i){
193 start +=layerMatrices()[i].size1();
195 return subrange(m_bias,start,start+layerMatrices()[layer].size1());
200 std::size_t numParams = m_inputOutputShortcut.size1()*m_inputOutputShortcut.size2();
201 numParams += bias().size();
202 for(std::size_t i = 0; i != layerMatrices().size(); ++i){
203 numParams += layerMatrices()[i].size1()*layerMatrices()[i].size2();
210 RealVector parameters(numberOfParameters());
212 for(
auto const& mat: m_layerMatrix){
224 for(
auto& mat: m_layerMatrix){
242 std::size_t layeriStart = 0;
243 for(std::size_t layeri = 0; layeri != m_layerMatrix.size(); ++layeri){
244 std::size_t columni = 0;
245 std::size_t neuronsi = inputSize();
247 neuronsi = m_layerMatrix[layeri-1].size1();
249 std::size_t layerjStart = layeriStart + neuronsi;
250 for(std::size_t layerj = layeri; layerj != m_layerMatrix.size(); ++layerj){
251 std::size_t neuronsj = m_layerMatrix[layerj].size1();
253 if(layerjStart-m_layerMatrix[layerj].size2() <= layeriStart){
257 std::size_t weightStartj = layeriStart -(layerjStart - m_layerMatrix[layerj].size2());
258 noalias(
columns(m_backpropMatrix[layeri],columni,columni+neuronsj))
259 =
trans(
columns(m_layerMatrix[layerj],weightStartj,weightStartj+neuronsi));
262 layerjStart += neuronsj;
264 layeriStart += neuronsi;
273 InternalState
const& s = state.
toState<InternalState>();
278 return boost::shared_ptr<State>(
new InternalState());
285 void evalLayer(std::size_t layer,RealMatrix
const& patterns,RealMatrix& outputs)
const{
286 std::size_t numPatterns = patterns.size1();
287 std::size_t numOutputs = m_layerMatrix[layer].size1();
288 outputs.resize(numPatterns,numOutputs);
298 if(layer < m_layerMatrix.size()-1) {
299 noalias(outputs) = m_hiddenNeuron(outputs);
302 noalias(outputs) = m_outputNeuron(outputs);
314 evalLayer(layer,patterns.
batch(i),result.
batch(i));
319 void eval(RealMatrix
const& patterns,RealMatrix& output,
State& state)
const{
320 InternalState& s = state.
toState<InternalState>();
321 std::size_t numPatterns = patterns.size1();
323 s.resize(numberOfNeurons(),numPatterns);
326 std::size_t beginNeuron = m_inputNeurons;
328 for(std::size_t layer = 0; layer != m_layerMatrix.size();++layer){
329 RealMatrix
const& weights = m_layerMatrix[layer];
331 std::size_t endNeuron = beginNeuron + weights.size1();
334 auto const input =
rows(s.responses,beginNeuron - weights.size2(),beginNeuron);
336 auto responses =
rows(s.responses,beginNeuron,endNeuron);
343 auto bias =
subrange(m_bias,beginNeuron-inputSize(),endNeuron-inputSize());
348 if(layer < m_layerMatrix.size()-1) {
349 noalias(responses) = m_hiddenNeuron(responses);
353 if(m_inputOutputShortcut.size1() != 0){
356 noalias(responses) = m_outputNeuron(responses);
360 beginNeuron = endNeuron;
366 output.resize(numPatterns,m_outputNeurons);
367 noalias(output) =
trans(
rows(s.responses,m_numberOfNeurons-outputSize(),m_numberOfNeurons));
372 BatchInputType const& patterns, RealMatrix
const& coefficients,
State const& state, RealVector& gradient
374 SIZE_CHECK(coefficients.size2() == m_outputNeurons);
375 SIZE_CHECK(coefficients.size1() == patterns.size1());
376 std::size_t numPatterns=patterns.size1();
380 RealMatrix delta(numberOfNeurons(),numPatterns,0.0);
381 auto outputDelta =
rows(delta,delta.size1()-outputSize(),delta.size1());
384 computeDelta(delta,state,
false);
385 computeParameterDerivative(delta,state,gradient);
392 SIZE_CHECK(coefficients.size2() == m_outputNeurons);
393 SIZE_CHECK(coefficients.size1() == patterns.size1());
394 std::size_t numPatterns=patterns.size1();
398 RealMatrix delta(numberOfNeurons(),numPatterns,0.0);
399 auto outputDelta =
rows(delta,delta.size1()-outputSize(),delta.size1());
402 computeDelta(delta,state,
true);
403 inputDerivative.resize(numPatterns,inputSize());
411 RealVector& parameterDerivative,
414 SIZE_CHECK(coefficients.size2() == m_outputNeurons);
415 SIZE_CHECK(coefficients.size1() == patterns.size1());
416 std::size_t numPatterns = patterns.size1();
420 RealMatrix delta(numberOfNeurons(),numPatterns,0.0);
421 auto outputDelta =
rows(delta,delta.size1()-outputSize(),delta.size1());
424 computeDelta(delta,state,
true);
425 inputDerivative.resize(numPatterns,inputSize());
429 computeParameterDerivative(delta,state,parameterDerivative);
438 RealMatrix
const& patterns, RealMatrix& delta,
State const& state, RealVector& gradient
440 InternalState
const& s = state.
toState<InternalState>();
441 SIZE_CHECK(delta.size1() == m_numberOfNeurons);
442 SIZE_CHECK(delta.size2() == patterns.size1());
443 SIZE_CHECK(s.responses.size2() == patterns.size1());
445 computeDelta(delta,state,
false);
447 computeParameterDerivative(delta,state,gradient);
475 std::vector<size_t>
const& layers,
477 bool biasNeuron =
true 480 m_layerMatrix.resize(layers.size()-1);
481 m_backpropMatrix.resize(layers.size()-1);
490 m_inputNeurons = layers.front();
491 m_outputNeurons = layers.back();
492 m_numberOfNeurons = 0;
493 for(std::size_t i = 0; i != layers.size(); ++i){
494 m_numberOfNeurons += layers[i];
497 m_bias.resize(m_numberOfNeurons - m_inputNeurons);
502 std::size_t numNeurons = layers[0];
503 for(std::size_t i = 0; i != m_layerMatrix.size(); ++i){
504 m_layerMatrix[i].resize(layers[i+1],numNeurons);
505 m_backpropMatrix[i].resize(layers[i],m_numberOfNeurons-numNeurons);
506 numNeurons += layers[i+1];
509 m_inputOutputShortcut.resize(0,0);
512 for(std::size_t i = 0; i != m_layerMatrix.size(); ++i){
513 m_layerMatrix[i].resize(layers[i+1],layers[i]);
514 m_backpropMatrix[i].resize(layers[i],layers[i+1]);
519 m_inputOutputShortcut.resize(m_outputNeurons,m_inputNeurons);
546 std::vector<size_t> layer(3);
550 setStructure(layer, connectivity, bias);
577 std::vector<size_t> layer(4);
582 setStructure(layer, connectivity, bias);
587 archive>>m_inputNeurons;
588 archive>>m_outputNeurons;
589 archive>>m_numberOfNeurons;
590 archive>>m_layerMatrix;
591 archive>>m_backpropMatrix;
592 archive>>m_inputOutputShortcut;
598 archive<<m_inputNeurons;
599 archive<<m_outputNeurons;
600 archive<<m_numberOfNeurons;
601 archive<<m_layerMatrix;
602 archive<<m_backpropMatrix;
603 archive<<m_inputOutputShortcut;
611 RealMatrix& delta,
State const& state,
bool computeInputDelta
613 SIZE_CHECK(delta.size1() == numberOfNeurons());
614 InternalState
const& s = state.
toState<InternalState>();
617 auto outputDelta =
rows(delta,delta.size1()-outputSize(),delta.size1());
618 auto outputResponse =
rows(s.responses,delta.size1()-outputSize(),delta.size1());
619 noalias(outputDelta) *= m_outputNeuron.derivative(outputResponse);
626 std::size_t endNeuron = delta.size1()-outputSize();
627 std::size_t layer = m_backpropMatrix.size()-1;
628 std::size_t endIndex = computeInputDelta? 0: inputSize();
629 while(endNeuron > endIndex){
631 RealMatrix
const& weights = m_backpropMatrix[layer];
632 std::size_t beginNeuron = endNeuron - weights.size1();
634 auto layerDelta =
rows(delta,beginNeuron,endNeuron);
635 auto layerDeltaInput =
rows(delta,endNeuron,endNeuron+weights.size2());
636 auto layerResponse =
rows(s.responses,beginNeuron,endNeuron);
638 noalias(layerDelta) +=
prod(weights,layerDeltaInput);
640 noalias(layerDelta) *= m_hiddenNeuron.derivative(layerResponse);
643 endNeuron=beginNeuron;
648 if(inputOutputShortcut().size1() != 0)
652 void computeParameterDerivative(RealMatrix
const& delta,
State const& state, RealVector& gradient)
const{
653 SIZE_CHECK(delta.size1() == numberOfNeurons());
654 InternalState
const& s = state.
toState<InternalState>();
657 gradient.resize(numberOfParameters());
659 std::size_t layerStart = inputSize();
660 for(std::size_t layer = 0; layer != layerMatrices().size(); ++layer){
662 std::size_t layerRows = layerMatrices()[layer].size1();
663 std::size_t layerColumns = layerMatrices()[layer].size2();
664 std::size_t params = layerRows*layerColumns;
665 auto gradMatrix =
to_matrix(
subrange(gradient,pos,pos+params),layerRows,layerColumns);
666 auto deltaLayer =
rows(delta,layerStart,layerStart+layerRows);
667 auto inputLayer =
rows(s.responses,layerStart-layerColumns,layerStart);
671 layerStart += layerRows;
676 for (std::size_t neuron = m_inputNeurons; neuron < m_numberOfNeurons; neuron++){
677 gradient(pos) =
sum(
row(delta,neuron));
682 if(inputOutputShortcut().size1() != 0){
683 std::size_t params = inputSize()*outputSize();
684 auto gradMatrix =
to_matrix(
subrange(gradient,pos,pos+params),outputSize(),inputSize());
685 auto deltaLayer =
rows(delta,delta.size1()-outputSize(),delta.size1());
686 auto inputLayer =
rows(s.responses,0,inputSize());
697 std::size_t m_numberOfNeurons;
698 std::size_t m_inputNeurons;
699 std::size_t m_outputNeurons;
708 std::vector<RealMatrix> m_layerMatrix;
713 RealMatrix m_inputOutputShortcut;
719 std::vector<RealMatrix> m_backpropMatrix;
725 HiddenNeuron m_hiddenNeuron;
727 OutputNeuron m_outputNeuron;