ConvolutionalModel.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief Implements a model applying a convolution to an image
5  *
6  *
7  *
8  * \author
9  * \date 2017
10  *
11  *
12  * \par Copyright 1995-2017 Shark Development Team
13  *
14  * <BR><HR>
15  * This file is part of Shark.
16  * <http://shark-ml.org/>
17  *
18  * Shark is free software: you can redistribute it and/or modify
19  * it under the terms of the GNU Lesser General Public License as published
20  * by the Free Software Foundation, either version 3 of the License, or
21  * (at your option) any later version.
22  *
23  * Shark is distributed in the hope that it will be useful,
24  * but WITHOUT ANY WARRANTY; without even the implied warranty of
25  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26  * GNU Lesser General Public License for more details.
27  *
28  * You should have received a copy of the GNU Lesser General Public License
29  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
30  *
31  */
32 #ifndef SHARK_MODELS_CONV2DModel_H
33 #define SHARK_MODELS_CONV2DModel_H
34 
38 namespace shark {
39 
40 
41 
42 enum class Convolution{
43  Valid,
44  ZeroPad
45 };
46 ///
47 /// \brief Convolutional Model for 2D image data.
48 ///
49 /// \par
50 /// This model computes the result of
51 /// \f$ y = f(x) = g(\text{convolution}(w, x) + b) \f$, where g is an arbitrary activation function and
52 /// convolution is the convolution of the input image x with the filters w. b is a vector with one entry for each filter which is then applied to each response above
53 ///
54 /// The image is allowed to have several channels andare linearized to a single vector of size width * height * numChannels.
55 /// the linearization is performed as linearizing each channel as if it were a row-major matrix and then concatenating the different channels.
56 ///
57 /// For handling edge condition, the Conv2D model handles two different convolution modes:
58 ///
59 /// Convolution::Valid:
60 /// The output is only computed on patches which are fully inside the unpadded image as a linearized vector in the same format
61 /// of size (width - filter_width+1) * (height - filter_height+1) * numFilters.
62 ///
63 /// Convolution::ZeroPad
64 /// The output input is padded with zeros and the output has the same size as the input
65 /// of size width * height * numFilters.
66 template <class VectorType = RealVector, class ActivationFunction = LinearNeuron>
67 class Conv2DModel : public AbstractModel<VectorType,VectorType,VectorType>{
68 private:
71 
72  static_assert(!std::is_same<typename VectorType::storage_type::storage_tag, blas::dense_tag>::value, "Conv2D not implemented for sparse inputs");
73 public:
77 
78  /// Default Constructor; use setStructure later.
80  base_type::m_features |= base_type::HAS_FIRST_PARAMETER_DERIVATIVE;
81  base_type::m_features |= base_type::HAS_FIRST_INPUT_DERIVATIVE;
82  }
83 
84  ///\brief Sets the structure by setting the dimensionalities of image and filters.
85  ///
86  /// \arg imageShape Shape of the image imHeight x imWidth x channel
87  /// \arg filterShape Shape of the filter matrix numFilters x fiHeight x fiWidth x channel
88  /// \arg type Type of convolution padding to perform
90  Shape const& imageShape, Shape const& filterShape, Convolution type = Convolution::ZeroPad
91  ){
92  base_type::m_features |= base_type::HAS_FIRST_PARAMETER_DERIVATIVE;
93  base_type::m_features |= base_type::HAS_FIRST_INPUT_DERIVATIVE;
94  setStructure(imageShape, filterShape, type);
95  }
96 
97  std::string name() const
98  { return "Conv2DModel"; }
99 
100  ///\brief Returns the expected shape of the input
101  Shape inputShape() const{
102  return {m_imageHeight, m_imageWidth, m_numChannels};
103  }
104  ///\brief Returns the shape of the output
106  if(m_type != Convolution::Valid){
107  return {m_imageHeight, m_imageWidth, m_numFilters};
108  }else{
109  return {m_imageHeight - m_filterHeight + 1, m_imageWidth - m_filterWidth + 1, m_numFilters};
110  }
111  }
112 
113  /// \brief Returns the activation function.
114  ActivationFunction const& activationFunction()const{
115  return m_activation;
116  }
117 
118  /// \brief Returns the activation function.
119  ActivationFunction& activationFunction(){
120  return m_activation;
121  }
122 
123  /// \brief Obtain the parameter vector.
124  ParameterVectorType parameterVector() const{
125  return m_filters | m_offset;
126  }
127 
128  /// \brief Set the new parameters of the model.
129  void setParameterVector(ParameterVectorType const& newParameters){
130  SIZE_CHECK(newParameters.size() == numberOfParameters());
131  noalias(m_filters) = subrange(newParameters,0,m_filters.size());
132  noalias(m_offset) = subrange(newParameters,m_filters.size(),newParameters.size());
133  updateBackpropFilters();
134  }
135 
136  /// \brief Return the number of parameters.
137  size_t numberOfParameters() const{
138  return m_filters.size() + m_offset.size();
139  }
140 
141  ///\brief Sets the structure by setting the shape of image and filters.
142  ///
143  /// \arg imageShape Shape of the image imHeight x imWidth x channel
144  /// \arg filterShape Shape of the filter matrix numFilters x fiHeight x fiWidth
145  /// \arg type Type of convolution padding to perform
147  Shape const& imageShape, Shape const& filterShape, Convolution type = Convolution::ZeroPad
148  ){
149  m_type = type;
150  m_imageHeight = imageShape[0];
151  m_imageWidth = imageShape[1];
152  m_numChannels = imageShape[2];
153  m_numFilters = filterShape[0];
154  m_filterHeight = filterShape[1];
155  m_filterWidth = filterShape[2];
156  m_filters.resize(m_filterHeight * m_filterWidth * m_numFilters * m_numChannels);
157  m_offset.resize(m_numFilters);
158  updateBackpropFilters();
159  }
160 
161  boost::shared_ptr<State> createState()const{
162  return boost::shared_ptr<State>(new typename ActivationFunction::State());
163  }
164 
165  using base_type::eval;
166 
167  /// Evaluate the model
168  void eval(BatchInputType const& inputs, BatchOutputType& outputs, State& state)const{
169  SIZE_CHECK(inputs.size2() == inputShape().numElements());
170  outputs.resize(inputs.size1(),outputShape().numElements());
171  //geometry for "zero pad"
172  std::size_t outputsForFilter = outputShape().numElements()/m_numFilters;
173  std::size_t paddingHeight = (m_type != Convolution::Valid) ? m_filterHeight - 1: 0;
174  std::size_t paddingWidth = (m_type != Convolution::Valid) ? m_filterWidth - 1: 0;
175  for(std::size_t i = 0; i != inputs.size1(); ++i){
176  auto output = row(outputs,i);
177  blas::kernels::conv2d(row(inputs,i), m_filters, output,
178  m_numChannels, m_numFilters,
179  m_imageHeight, m_imageWidth,
180  m_filterHeight, m_filterWidth,
181  paddingHeight, paddingWidth
182  );
183  //apply offset
184  noalias(to_matrix(output, m_numFilters, outputsForFilter) ) += trans(blas::repeat(m_offset,outputsForFilter));
185  }
186  m_activation.evalInPlace(outputs, state.toState<typename ActivationFunction::State>());
187  }
188 
189  ///\brief Calculates the first derivative w.r.t the parameters and summing them up over all inputs of the last computed batch
191  BatchInputType const& inputs,
192  BatchOutputType const& outputs,
193  BatchOutputType const& coefficients,
194  State const& state,
195  ParameterVectorType& gradient
196  )const{
197  SIZE_CHECK(coefficients.size2()==outputShape().numElements());
198  SIZE_CHECK(coefficients.size1()==inputs.size1());
199 
200  BatchOutputType delta = coefficients;
201  m_activation.multiplyDerivative(outputs,delta, state.toState<typename ActivationFunction::State>());
202 
203  gradient.resize(numberOfParameters());
204  gradient.clear();
205  auto weightGradient = to_matrix(subrange(gradient,0,m_filters.size()), m_numFilters, m_filters.size()/m_numFilters);
206  auto offsetGradient = subrange(gradient, m_filters.size(),gradient.size());
207 
208  BatchInputType patches(outputShape().numElements()/m_numFilters, m_filters.size()/m_numFilters);
209  for(std::size_t i = 0; i != inputs.size1(); ++i){
210  if(m_type == Convolution::Valid){
211  blas::bindings::im2mat(//todo must get public interface in remora
212  row(inputs,i),patches,
213  m_numChannels,
214  m_imageHeight, m_imageWidth,
215  m_filterHeight, m_filterWidth
216  );
217  }else{
218  blas::bindings::im2mat_pad(//todo must get public interface in remora
219  row(inputs,i),patches,
220  m_numChannels,
221  m_imageHeight, m_imageWidth,
222  m_filterHeight, m_filterWidth,
223  m_filterHeight - 1, m_filterWidth - 1
224  );
225  }
226  auto delta_mat = to_matrix(row(delta,i), m_numFilters, patches.size1());
227  noalias(weightGradient) += delta_mat % patches;
228  noalias(offsetGradient) += sum_columns(delta_mat);
229  }
230  //derivative of offset
231 
232  }
233  ///\brief Calculates the first derivative w.r.t the inputs and summs them up over all inputs of the last computed batch
235  BatchInputType const & inputs,
236  BatchOutputType const& outputs,
237  BatchOutputType const & coefficients,
238  State const& state,
239  BatchInputType& derivatives
240  )const{
241  SIZE_CHECK(coefficients.size2() == outputShape().numElements());
242  SIZE_CHECK(coefficients.size1() == inputs.size1());
243 
244  BatchOutputType delta = coefficients;
245  m_activation.multiplyDerivative(outputs,delta, state.toState<typename ActivationFunction::State>());
246  Shape shape = outputShape();
247  std::size_t paddingHeight = m_filterHeight - 1;
248  std::size_t paddingWidth = m_filterWidth - 1;
249  if(m_type == Convolution::Valid){
250  paddingHeight *=2;
251  paddingWidth *=2;
252  }
253  derivatives.resize(inputs.size1(),inputShape().numElements());
254  derivatives.clear();
255  for(std::size_t i = 0; i != inputs.size1(); ++i){
256  auto derivative = row(derivatives,i);
257  blas::kernels::conv2d(row(delta,i), m_backpropFilters, derivative,
258  m_numFilters, m_numChannels,
259  shape[0], shape[1],
260  m_filterHeight, m_filterWidth,
261  paddingHeight, paddingWidth
262  );
263  }
264  }
265 
266  /// From ISerializable
267  void read(InArchive& archive){
268  archive >> m_filters;
269  archive >> m_offset;
270  archive >> m_imageHeight;
271  archive >> m_imageWidth;
272  archive >> m_filterHeight;
273  archive >> m_filterWidth;
274  archive >> m_numChannels;
275  archive >> m_numFilters;
276  archive >> m_type;
277  updateBackpropFilters();
278  }
279  /// From ISerializable
280  void write(OutArchive& archive) const{
281  archive << m_filters;
282  archive << m_offset;
283  archive << m_imageHeight;
284  archive << m_imageWidth;
285  archive << m_filterHeight;
286  archive << m_filterWidth;
287  archive << m_numChannels;
288  archive << m_numFilters;
289  archive << m_type;
290  }
291 
292 private:
293 
294  ///\brief Converts the filters into the backprop filters
295  ///
296  /// for computing the derivatie wrt the inputs in the chain rule, we
297  /// have to convove the outer derivative with the "transposed" filters
298  /// the transposition is done by switching the order of channels and filters in the storage
299  void updateBackpropFilters(){
300  m_backpropFilters.resize(m_filters.size());
301 
302  std::size_t filterImSize = m_filterWidth * m_filterHeight;
303  std::size_t filterSize = m_numChannels * m_filterWidth * m_filterHeight;
304  std::size_t bpFilterSize = m_numFilters * m_filterWidth * m_filterHeight;
305 
306  //Note: this looks a bit funny, but this way on a gpu only m_numChannel kernels need to be run
307  for(std::size_t c = 0; c != m_numChannels; ++c){
308  auto channel_mat = subrange(
309  to_matrix(m_filters, m_numFilters, filterSize), //create matrix where each row is one filter
310  0, m_numFilters, c * filterImSize, (c+1) * filterImSize //cut out all columns belonging to the current channel
311  );
312  //Todo: commented out, because we also need to flip, which is not implemented in remora
313  //~ auto channel_vec = to_vector(flip(channel_mat));//flip and linearize to vector (flip not implemented)
314  //~ //cut out target are and set values
315  //~ noalias(subrange(m_backpropFilters, c * bpFilterSize, (c+1) * bpFilterSize)) = channel_vec;
316  //instead use this cpu-only version
317  auto target_vec = subrange(m_backpropFilters, c * bpFilterSize, (c+1) * bpFilterSize);
318  auto target_mat = to_matrix(target_vec,m_numFilters, m_filterWidth * m_filterHeight);
319  for(std::size_t f = 0; f != m_numFilters; ++f){
320  for(std::size_t i = 0; i != m_filterWidth * m_filterHeight; ++i){
321  target_mat(f,i) = channel_mat(f, m_filterWidth * m_filterHeight-i-1);
322  }
323  }
324  }
325  }
326  VectorType m_filters; ///< Filters used for performing the convolution
327  VectorType m_backpropFilters;///< Same as filter just with the storage order of filters and channels reversed.
328  VectorType m_offset;///< offset applied to each filters response
329  ActivationFunction m_activation;///< The activation function to use
330 
331  std::size_t m_imageHeight;
332  std::size_t m_imageWidth;
333  std::size_t m_filterHeight;
334  std::size_t m_filterWidth;
335  std::size_t m_numChannels;
336  std::size_t m_numFilters;
337 
338  Convolution m_type;
339 };
340 
341 
342 }
343 #endif