DropoutLayer.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief -
5  *
6  * \author O.Krause
7  * \date 2017
8  *
9  *
10  * \par Copyright 1995-2017 Shark Development Team
11  *
12  * <BR><HR>
13  * This file is part of Shark.
14  * <http://shark-ml.org/>
15  *
16  * Shark is free software: you can redistribute it and/or modify
17  * it under the terms of the GNU Lesser General Public License as published
18  * by the Free Software Foundation, either version 3 of the License, or
19  * (at your option) any later version.
20  *
21  * Shark is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  * GNU Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public License
27  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28  *
29  */
30 #ifndef MODELS_DROPOUTLAYER_H
31 #define MODELS_DROPOUTLAYER_H
32 
33 #include <shark/Core/Random.h>
34 #include <shark/LinAlg/Base.h>
36 namespace shark{
37 
38 template <class VectorType = RealVector>
39 class DropoutLayer : public AbstractModel<VectorType, VectorType, VectorType>{
40 private:
42  typedef blas::matrix<int, blas::row_major, typename VectorType::device_type> MatrixType;
43  struct InternalState: public State{
44  MatrixType mask;
45  };
46  Shape m_shape;
47  random::rng_type* mep_rng;
48  double m_dropoutProbability;
49 
50 public:
54 
55  DropoutLayer(Shape const& inputShape, double probability = 0.5, random::rng_type& rng = random::globalRng)
56  : m_shape(inputShape), mep_rng(&rng), m_dropoutProbability(probability){
59  }
60 
61  /// \brief From INameable: return the class name.
62  std::string name() const
63  { return "DropoutLayer"; }
64 
65  /// obtain the parameter vector
66  ParameterVectorType parameterVector() const{
67  return ParameterVectorType();
68  }
69 
70  /// overwrite the parameter vector
71  void setParameterVector(ParameterVectorType const& newParameters){
72  SIZE_CHECK(newParameters.size() == 0);
73  }
74 
75  /// return the number of parameter
76  size_t numberOfParameters() const{
77  return 0;
78  }
79 
80  ///\brief Returns the expected shape of the input
81  Shape inputShape() const{
82  return m_shape;
83  }
84  ///\brief Returns the shape of the output
85  Shape outputShape() const{
86  return m_shape;
87  }
88 
89  boost::shared_ptr<State> createState()const{
90  return boost::shared_ptr<State>(new InternalState());
91  }
92 
93  using base_type::eval;
94 
95  void eval(BatchInputType const& inputs, BatchOutputType& outputs)const{
96  outputs.resize(inputs.size1(),inputs.size2());
97  noalias(outputs) = inputs;
98  for(std::size_t i = 0; i != outputs.size1(); ++i){
99  for(std::size_t j = 0; j != outputs.size2(); ++j){
100  if(!random::coinToss(*mep_rng,m_dropoutProbability)){
101  outputs(i,j) = 0;
102  }
103  }
104  }
105  }
106 
107  void eval(VectorType const& input, VectorType& output)const {
108  output.resize(input.size());
109  noalias(output) = input;
110  for(std::size_t j = 0; j != output.size(); ++j){
111  if(!random::coinToss(*mep_rng,m_dropoutProbability)){
112  output(j) = 0;
113  }
114  }
115  }
116  void eval(BatchInputType const& inputs, BatchOutputType& outputs, State& state)const{
117  MatrixType& mask = state.toState<InternalState>().mask;
118  outputs.resize(inputs.size1(),inputs.size2());
119  mask.resize(inputs.size1(),inputs.size2());
120  for(std::size_t i = 0; i != outputs.size1(); ++i){
121  for(std::size_t j = 0; j != outputs.size2(); ++j){
122  mask(i,j) = random::coinToss(*mep_rng,m_dropoutProbability);
123  }
124  }
125  noalias(outputs) = inputs * mask;
126  }
127 
128  ///\brief Calculates the first derivative w.r.t the parameters and summing them up over all patterns of the last computed batch
130  BatchInputType const& patterns,
131  BatchOutputType const& outputs,
132  BatchOutputType const& coefficients,
133  State const& state,
134  ParameterVectorType& gradient
135  )const{
136  SIZE_CHECK(coefficients.size1()==patterns.size1());
137  SIZE_CHECK(coefficients.size2()==patterns.size2());
138  }
139  ///\brief Calculates the first derivative w.r.t the inputs and summs them up over all patterns of the last computed batch
141  BatchInputType const & patterns,
142  BatchOutputType const & outputs,
143  BatchOutputType const & coefficients,
144  State const& state,
145  BatchInputType& derivative
146  )const{
147  SIZE_CHECK(coefficients.size1() == patterns.size1());
148  SIZE_CHECK(coefficients.size2() == patterns.size2());
149 
150  MatrixType const& mask = state.toState<InternalState>().mask;
151  derivative.resize(coefficients.size1(),coefficients.size2());
152  noalias(derivative) = coefficients * mask;
153  }
154 
155  /// From ISerializable
156  void read(InArchive& archive){archive >> m_dropoutProbability;}
157  /// From ISerializable
158  void write(OutArchive& archive) const{ archive << m_dropoutProbability;}
159 };
160 
161 
162 }
163 
164 #endif