NeuronLayers.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief -
5  *
6  * \author O.Krause
7  * \date 2011
8  *
9  *
10  * \par Copyright 1995-2017 Shark Development Team
11  *
12  * <BR><HR>
13  * This file is part of Shark.
14  * <http://shark-ml.org/>
15  *
16  * Shark is free software: you can redistribute it and/or modify
17  * it under the terms of the GNU Lesser General Public License as published
18  * by the Free Software Foundation, either version 3 of the License, or
19  * (at your option) any later version.
20  *
21  * Shark is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  * GNU Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public License
27  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28  *
29  */
30 #ifndef MODELS_NEURONS_H
31 #define MODELS_NEURONS_H
32 
33 #include <shark/LinAlg/Base.h>
35 
36 namespace shark{
37 
38 ///\brief Neuron which computes the hyperbolic tangenst with range [-1,1].
39 ///
40 ///The Tanh function is
41 ///\f[ f(x)=\tanh(x) = \frac 2 {1+exp^(-2x)}-1 \f]
42 ///it's derivative can be computed as
43 ///\f[ f'(x)= 1-f(x)^2 \f]
44 struct TanhNeuron{
45  typedef EmptyState State;
46  template<class Arg>
47  void evalInPlace(Arg& arg)const{
48  noalias(arg) = tanh(arg);
49  }
50 
51  template<class Arg>
52  void evalInPlace(Arg& arg, State&)const{
53  evalInPlace(arg);
54  }
55 
56  template<class Output, class Derivative>
57  void multiplyDerivative(Output const& output, Derivative& der, State const& )const{
58  noalias(der) *= typename Output::value_type(1) - sqr(output);
59  }
60 };
61 
62 ///\brief Neuron which computes the Logistic (logistic) function with range [0,1].
63 ///
64 ///The Logistic function is
65 ///\f[ f(x)=\frac 1 {1+exp^(-x)}\f]
66 ///it's derivative can be computed as
67 ///\f[ f'(x)= f(x)(1-f(x)) \f]
69  typedef EmptyState State;
70  template<class Arg>
71  void evalInPlace(Arg& arg)const{
72  noalias(arg) = sigmoid(arg);
73  }
74 
75  template<class Arg>
76  void evalInPlace(Arg& arg, State&)const{
77  evalInPlace(arg);
78  }
79 
80  template<class Output, class Derivative>
81  void multiplyDerivative(Output const& output, Derivative& der, State const& state)const{
82  noalias(der) *= output * (typename Output::value_type(1) - output);
83  }
84 };
85 
86 ///\brief Fast sigmoidal function, which does not need to compute an exponential function.
87 ///
88 ///It is defined as
89 ///\f[ f(x)=\frac x {1+|x|}\f]
90 ///it's derivative can be computed as
91 ///\f[ f'(x)= (1 - |f(x)|)^2 \f]
93  typedef EmptyState State;
94  template<class Arg>
95  void evalInPlace(Arg& arg)const{
96  noalias(arg) /= typename Arg::value_type(1)+abs(arg);
97  }
98 
99  template<class Arg>
100  void evalInPlace(Arg& arg, State&)const{
101  evalInPlace(arg);
102  }
103 
104  template<class Output, class Derivative>
105  void multiplyDerivative(Output const& output, Derivative& der, State const& state)const{
106  noalias(der) *= sqr(typename Output::value_type(1) - abs(output));
107  }
108 };
109 
110 ///\brief Linear activation Neuron.
112  typedef EmptyState State;
113  template<class Arg>
114  void evalInPlace(Arg&)const{}
115 
116  template<class Arg>
117  void evalInPlace(Arg& arg, State const&)const{}
118 
119  template<class Output, class Derivative>
120  void multiplyDerivative(Output const& output, Derivative& der, State const& state)const{}
121 };
122 
123 ///\brief Rectifier Neuron f(x) = max(0,x)
125  typedef EmptyState State;
126  template<class Arg>
127  void evalInPlace(Arg& arg)const{
128  noalias(arg) = max(arg,typename Arg::value_type(0));
129  }
130 
131  template<class Arg>
132  void evalInPlace(Arg& arg, State&)const{
133  evalInPlace(arg);
134  }
135 
136  template<class Output, class Derivative>
137  void multiplyDerivative(Output const& output, Derivative& der, State const& state)const{
138  //~ noalias(der) *= heaviside(output);
139  for(std::size_t i = 0; i != output.size1(); ++i){
140  for(std::size_t j = 0; j != output.size2(); ++j){
141  der(i,j) *= output(i,j) > 0? 1.0:0.0;
142  }
143  }
144  }
145 };
146 
147 template<class VectorType = RealVector>
149  struct State: public shark::State{
151 
152  void resize(std::size_t patterns){
153  norm.resize(patterns);
154  }
155  };
156 
157  template<class Arg, class Device>
158  void evalInPlace(blas::vector_expression<Arg,Device>& arg)const{
159  noalias(arg) /= sum(arg);
160  }
161 
162  template<class Arg, class Device>
163  void evalInPlace(blas::matrix_expression<Arg,Device>& arg)const{
164  noalias(trans(arg)) /= blas::repeat(sum_columns(arg),arg().size2());
165  }
166 
167  template<class Arg, class Device>
168  void evalInPlace(blas::matrix_expression<Arg,Device>& arg, State& state)const{
169  state.norm.resize(arg().size1());
170  noalias(state.norm) = sum_columns(arg);
171  noalias(arg) /= trans(blas::repeat(state.norm,arg().size2()));
172  }
173 
174  template<class Output, class Derivative>
175  void multiplyDerivative(Output const& output, Derivative& der, State const& s)const{
176  for(std::size_t i = 0; i != output.size1(); ++i){
177  double constant=inner_prod(row(der,i),row(output,i));
178  noalias(row(der,i))= (row(der,i)-constant)/s.norm(i);
179  }
180  }
181 };
182 
183 
184 template<class VectorType = RealVector>
186  typedef EmptyState State;
187 
188  template<class Arg, class Device>
189  void evalInPlace(blas::vector_expression<Arg,Device>& arg)const{
190  noalias(arg) = exp(arg);
191  noalias(arg) /= sum(arg);
192  }
193 
194  template<class Arg, class Device>
195  void evalInPlace(blas::matrix_expression<Arg,Device>& arg)const{
196  noalias(arg) = exp(arg);
197  noalias(arg) /= trans(blas::repeat(sum_columns(arg),arg().size2()));
198  }
199 
200  template<class Arg, class Device>
201  void evalInPlace(blas::matrix_expression<Arg,Device>& arg, State&)const{
202  evalInPlace(arg);
203  }
204 
205  template<class Output, class Derivative>
206  void multiplyDerivative(Output const& output, Derivative& der, State const& s)const{
207  for(size_t i = 0; i != output.size1(); ++i){
208  double mass=inner_prod(row(der,i),row(output,i));
209  noalias(row(der,i)) = (row(der,i) - mass) *row(output,i);
210  }
211  }
212 };
213 
214 template <class NeuronType, class VectorType = RealVector>
215 class NeuronLayer : public AbstractModel<VectorType, VectorType, VectorType>{
216 private:
218 
219  NeuronType m_neuron;
220  Shape m_shape;
221 public:
225 
226  NeuronLayer(Shape const& shape = Shape()): m_shape(shape){
227  base_type::m_features |= base_type::HAS_FIRST_PARAMETER_DERIVATIVE;
228  base_type::m_features |= base_type::HAS_FIRST_INPUT_DERIVATIVE;
229  }
230 
231  /// \brief From INameable: return the class name.
232  std::string name() const
233  { return "NeuronLayer"; }
234 
235  NeuronType const& neuron()const{ return m_neuron;}
236  NeuronType& neuron(){ return m_neuron;}
237 
238  Shape inputShape() const{
239  return m_shape;
240  }
241 
243  return m_shape;
244  }
245 
246  /// obtain the parameter vector
247  ParameterVectorType parameterVector() const{
248  return ParameterVectorType();
249  }
250 
251  /// overwrite the parameter vector
252  void setParameterVector(ParameterVectorType const& newParameters){
253  SIZE_CHECK(newParameters.size() == 0);
254  }
255 
256  /// return the number of parameter
257  size_t numberOfParameters() const{
258  return 0;
259  }
260 
261  boost::shared_ptr<State> createState()const{
262  return boost::shared_ptr<State>(new typename NeuronType::State());
263  }
264 
265  using base_type::eval;
266 
267  void eval(BatchInputType const& inputs, BatchOutputType& outputs)const{
268  SIZE_CHECK(inputs.size2() == m_shape.numElements());
269  outputs.resize(inputs.size1(),inputs.size2());
270  noalias(outputs) = inputs;
271  m_neuron.evalInPlace(outputs);
272  }
273 
274  void eval(VectorType const& input, VectorType& output)const{
275  SIZE_CHECK(input.size() == m_shape.numElements());
276  output.resize(input.size());
277  noalias(output) = input;
278  m_neuron.evalInPlace(output);
279  }
280  void eval(BatchInputType const& inputs, BatchOutputType& outputs, State& state)const{
281  SIZE_CHECK(inputs.size2() == m_shape.numElements());
282  outputs.resize(inputs.size1(),inputs.size2());
283  noalias(outputs) = inputs;
284  m_neuron.evalInPlace(outputs, state.toState<typename NeuronType::State>());
285  }
286 
287  ///\brief Calculates the first derivative w.r.t the parameters and summing them up over all inputs of the last computed batch
289  BatchInputType const& inputs,
290  BatchOutputType const& outputs,
291  BatchOutputType const& coefficients,
292  State const& state,
293  ParameterVectorType& gradient
294  )const{
295  SIZE_CHECK(coefficients.size1()==inputs.size1());
296  SIZE_CHECK(coefficients.size2()==inputs.size2());
297  }
298  ///\brief Calculates the first derivative w.r.t the inputs and summs them up over all inputs of the last computed batch
300  BatchInputType const & inputs,
301  BatchOutputType const & outputs,
302  BatchOutputType const & coefficients,
303  State const& state,
304  BatchInputType& derivative
305  )const{
306  SIZE_CHECK(coefficients.size1() == inputs.size1());
307  SIZE_CHECK(coefficients.size2() == inputs.size2());
308 
309  derivative.resize(inputs.size1(),inputs.size2());
310  noalias(derivative) = coefficients;
311  m_neuron.multiplyDerivative(outputs, derivative, state.toState<typename NeuronType::State>());
312 
313  }
314 
315  /// From ISerializable
316  void read(InArchive& archive){ archive >> m_shape;}
317  /// From ISerializable
318  void write(OutArchive& archive) const{ archive << m_shape;}
319 };
320 
321 
322 }
323 
324 #endif