Shark machine learning library
About Shark
News!
Contribute
Credits and copyright
Downloads
Getting Started
Installation
Using the docs
Documentation
Tutorials
Quick references
Class list
Global functions
FAQ
Showroom
include
shark
Models
RNNet.h
Go to the documentation of this file.
1
//===========================================================================
2
/*!
3
*
4
*
5
* \brief Offers the functions to create and to work with a
6
* recurrent neural network.
7
*
8
*
9
*
10
* \author O. Krause
11
* \date 2010
12
*
13
*
14
* \par Copyright 1995-2017 Shark Development Team
15
*
16
* <BR><HR>
17
* This file is part of Shark.
18
* <http://shark-ml.org/>
19
*
20
* Shark is free software: you can redistribute it and/or modify
21
* it under the terms of the GNU Lesser General Public License as published
22
* by the Free Software Foundation, either version 3 of the License, or
23
* (at your option) any later version.
24
*
25
* Shark is distributed in the hope that it will be useful,
26
* but WITHOUT ANY WARRANTY; without even the implied warranty of
27
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28
* GNU Lesser General Public License for more details.
29
*
30
* You should have received a copy of the GNU Lesser General Public License
31
* along with Shark. If not, see <http://www.gnu.org/licenses/>.
32
*
33
*/
34
#ifndef SHARK_MODELS_RNNET_H
35
#define SHARK_MODELS_RNNET_H
36
37
#include <
shark/Core/DLLSupport.h
>
38
#include <
shark/Models/AbstractModel.h
>
39
#include <
shark/Models/RecurrentStructure.h
>
40
41
namespace
shark
{
42
43
//! \brief A recurrent neural network regression model that learns
44
//! with Back Propagation Through Time
45
//!
46
//! This class defines a recurrent neural network regression
47
//! model. Its inputs and output types are Matrices which represet
48
//! sequences of inputs. The gradient is calculated via
49
//! BackPropagationTroughTime (BPTT).
50
//!
51
//! The inputs of this Network are not sigmoidal, but the hidden and output
52
//! neurons are.
53
//!
54
//! This class is optimized for batch learning. See OnlineRNNet for an online
55
//! version.
56
class
RNNet
:
public
AbstractModel
<Sequence,Sequence >
57
{
58
private
:
59
struct
InternalState:
public
State
{
60
//! Activation of the neurons after processing the time series.
61
//! m_timeActivation(b,t,i) is a 3-dimensional array, the first dimension
62
//! returns the i-th element of the batch, the second dimension returns
63
//! the activation for timestep t, the third dimension the activation
64
//! of the neuron at the timestep of the batch element.
65
std::vector<Sequence> timeActivation;
66
};
67
public
:
68
69
//! creates a neural network with a potentially shared structure
70
//! \param structure the structure of this neural network. It can be shared between multiple instances or with then
71
//! online version of this net.
72
RNNet
(
RecurrentStructure
* structure):
mpe_structure
(structure){
73
SHARK_RUNTIME_CHECK
(
mpe_structure
,
"[RNNet] structure is not allowed to be empty"
);
74
m_features
|=
HAS_FIRST_PARAMETER_DERIVATIVE
;
75
}
76
77
/// \brief From INameable: return the class name.
78
std::string
name
()
const
79
{
return
"RNNet"
; }
80
81
//! \brief Sets the warm up sequence
82
//!
83
//! Usually, when processing a new data series all the
84
//! `states' of the network are reset to zero. By `states' I mean the
85
//! buffered activations to which time-delayed synapses refer
86
//! to. Effectively, this means one assumes a zero activation history.
87
//!
88
//! The advantage of this is, that it makes the model behavior well
89
//! defined. The disadvantage is that you can't predict a time series
90
//! well with a zero history. Thus, one should use a data series to
91
//! initialize the network, i.e., to let it converge into a `normal'
92
//! dynamic state from which prediction of new data is possible.
93
//! This phase is called the warmup phase.
94
//!
95
//! With this method, the warm up sequence can be set, which is then used
96
//! during the warm up phase.
97
//!
98
//! \param warmUpSequence the warm up sequence used before each batch of data. The
99
//! default is an empty sequence
100
void
setWarmUpSequence
(
Sequence
const
& warmUpSequence =
Sequence
()){
101
m_warmUpSequence
= warmUpSequence;
102
}
103
104
boost::shared_ptr<State>
createState
()
const
{
105
return
boost::shared_ptr<State>(
new
InternalState());
106
}
107
108
//! \brief Feed a data series to the model. The output (i.e., the time
109
//! series of activations of the output neurons) it copied into the
110
//! output buffer.
111
//!
112
//! \param pattern batch of timeseries for the network.
113
//! \param output Used to store the outputs of the network.
114
//! \param state stores additional information which can be reused for the computation of the derivative
115
SHARK_EXPORT_SYMBOL
void
eval
(
BatchInputType
const
& pattern,
BatchOutputType
& output,
State
& state)
const
;
116
using
AbstractModel<Sequence,Sequence>::eval
;
117
118
/// obtain the input dimension
119
std::size_t
inputSize
()
const
{
120
return
mpe_structure
->
inputs
();
121
}
122
123
/// obtain the output dimension
124
std::size_t
outputSize
()
const
{
125
return
mpe_structure
->
outputs
();
126
}
127
128
//!\brief calculates the weighted sum of gradients w.r.t the parameters
129
//!
130
//!The RNNet uses internally BPTT to calculate the gradient.
131
//! Stores the BPTT error values for the calculation
132
//! of the gradient.
133
//!
134
//! Given the gradient of the loss function \f$ \frac{\delta L(t)}{\delta y_i(t)}\f$,
135
//! the BPTT error is calculated as
136
//!\f[ \frac{\delta E}{\delta y_i(t)}= \mu_i \frac{\delta L(t)}{\delta y_i(t)}
137
//! +\sum_{j=1}^N \frac{\delta E}{\delta y_i(t+1)} y_i'(t+1) w^R_{ij} \f]
138
//! Where \f$ L \f$ is the loss, \f$ y_i \f$ the ith neuron and
139
//! \f$ w^R_ij\f$ is the recurrent weight of the connection from neuron i to j.
140
//! The factor \f$ \mu_i \f$ is one of the neuron is an output neuron, else zero.
141
//!
142
//! \todo expand documentation
143
//!
144
//! \param patterns the batch of patterns to evaluate
145
//! \param coefficients the coefficients which are used to calculate the weighted sum
146
//! \param state the last state stord during eval
147
//! \param gradient the calculated gradient
148
SHARK_EXPORT_SYMBOL
void
weightedParameterDerivative
(
149
BatchInputType
const
& patterns,
BatchInputType
const
& coefficients,
State
const
& state,
150
RealVector& gradient
151
)
const
;
152
153
//! get internal parameters of the model
154
RealVector
parameterVector
()
const
{
155
return
mpe_structure
->
parameterVector
();
156
}
157
158
//! set internal parameters of the model
159
//! \param newParameters the new parameters of the model. this changes the internal referenced RecurrentStructure
160
void
setParameterVector
(RealVector
const
& newParameters){
161
mpe_structure
->
setParameterVector
(newParameters);
162
}
163
164
//!number of parameters of the network
165
std::size_t
numberOfParameters
()
const
{
166
return
mpe_structure
->
parameters
();
167
}
168
protected
:
169
//! the warm up sequence of the network
170
Sequence
m_warmUpSequence
;
171
172
//! the topology of the network.
173
RecurrentStructure
*
mpe_structure
;
174
175
RealMatrix
m_errorDerivative
;
176
};
177
}
178
179
#endif //RNNET_H
180
181
182
183
184
185
186
187
188