LinearModel.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief Implements a Model using a linear function.
5  *
6  *
7  *
8  * \author T. Glasmachers, O. Krause
9  * \date 2010-2017
10  *
11  *
12  * \par Copyright 1995-2017 Shark Development Team
13  *
14  * <BR><HR>
15  * This file is part of Shark.
16  * <http://shark-ml.org/>
17  *
18  * Shark is free software: you can redistribute it and/or modify
19  * it under the terms of the GNU Lesser General Public License as published
20  * by the Free Software Foundation, either version 3 of the License, or
21  * (at your option) any later version.
22  *
23  * Shark is distributed in the hope that it will be useful,
24  * but WITHOUT ANY WARRANTY; without even the implied warranty of
25  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26  * GNU Lesser General Public License for more details.
27  *
28  * You should have received a copy of the GNU Lesser General Public License
29  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
30  *
31  */
32 #ifndef SHARK_MODELS_LINEARMODEL_H
33 #define SHARK_MODELS_LINEARMODEL_H
34 
38 namespace shark {
39 
40 
41 ///
42 /// \brief Linear Prediction with optional activation function
43 ///
44 /// \par
45 /// This model computes the result of
46 /// \f$ y = f(x) = g(A x + b) \f$, where g is an arbitrary activation function.
47 /// By default g is the identity and the model is a simple linear model.
48 /// Otherwise, this is known as a generalized linear model. There are two important special cases:
49 /// The output may be a single number, and the offset term b may be
50 /// dropped.
51 ///
52 /// The class allows for dense and sparse input vector types. However it assumes that
53 /// the weight matrix and the ouputs are dense. There are some cases where this is not
54 /// good behavior. Check for example Normalizer for a class which is designed for sparse
55 /// inputs and outputs.
56 template <class InputType = RealVector, class ActivationFunction = LinearNeuron>
57 class LinearModel : public AbstractModel<
58  InputType,
59  blas::vector<typename InputType::value_type, typename InputType::device_type>,//type of output uses same device and precision as input
60  blas::vector<typename InputType::value_type, typename InputType::device_type>//type of parameters uses same device and precision as input
61 >{
62 private:
63  typedef blas::vector<typename InputType::value_type, typename InputType::device_type> VectorType;
64  typedef blas::matrix<typename InputType::value_type, blas::row_major, typename InputType::device_type> MatrixType;
67  Shape m_inputShape;
68  Shape m_outputShape;
69  MatrixType m_matrix;
70  VectorType m_offset;
71  ActivationFunction m_activation;
72 public:
74  typedef typename base_type::BatchOutputType BatchOutputType;//same as MatrixType
75  typedef typename base_type::ParameterVectorType ParameterVectorType;//same as VectorType
76 
77  /// CDefault Constructor; use setStructure later
80  if(std::is_base_of<blas::dense_tag, typename InputType::storage_type::storage_tag>::value){
82  }
83  }
84  /// Constructor creating a model with given dimensionalities and optional offset term.
85  LinearModel(Shape const& inputs, Shape const& outputs = 1, bool offset = false)
86  : m_inputShape(inputs)
87  , m_outputShape(outputs)
88  , m_matrix(outputs.numElements(),inputs.numElements(),0.0)
89  , m_offset(offset?outputs.numElements():0,0.0){
91  if(std::is_base_of<blas::dense_tag, typename InputType::storage_type::storage_tag>::value){
93  }
94  }
95 
96  /// \brief From INameable: return the class name.
97  std::string name() const
98  { return "LinearModel"; }
99 
100  /// Construction from matrix (and vector)
101  LinearModel(MatrixType const& matrix, VectorType const& offset = VectorType())
102  : m_inputShape(matrix.size2())
103  , m_outputShape(matrix.size1())
104  , m_matrix(matrix)
105  , m_offset(offset){
107  if(std::is_base_of<blas::dense_tag, typename InputType::storage_type::storage_tag>::value){
109  }
110  }
111 
112  /// check for the presence of an offset term
113  bool hasOffset() const{
114  return m_offset.size() != 0;
115  }
116 
117  ///\brief Returns the expected shape of the input
118  Shape inputShape() const{
119  return m_inputShape;
120  }
121  ///\brief Returns the shape of the output
123  return m_outputShape;
124  }
125 
126  /// obtain the parameter vector
127  ParameterVectorType parameterVector() const{
128  return to_vector(m_matrix) | m_offset;
129  }
130 
131  /// overwrite the parameter vector
132  void setParameterVector(ParameterVectorType const& newParameters){
133  std::size_t numInputs = inputShape().numElements();
134  std::size_t numOutputs = outputShape().numElements();
135  noalias(to_vector(m_matrix)) = subrange(newParameters, 0, numInputs * numOutputs);
136  noalias(m_offset) = subrange(newParameters, numInputs * numOutputs, newParameters.size());
137  }
138 
139  /// return the number of parameter
140  size_t numberOfParameters() const{
141  return m_matrix.size1()*m_matrix.size2()+m_offset.size();
142  }
143 
144  /// overwrite structure and parameters
145  void setStructure(Shape const& inputs, Shape const& outputs = 1, bool offset = false){
147  *this = model;
148  }
149 
150  /// overwrite structure and parameters
151  void setStructure(MatrixType const& matrix, VectorType const& offset = VectorType()){
153  *this = model;
154  }
155 
156  /// return a copy of the matrix in dense format
157  MatrixType const& matrix() const{
158  return m_matrix;
159  }
160 
161  MatrixType& matrix(){
162  return m_matrix;
163  }
164 
165  /// return the offset
166  VectorType const& offset() const{
167  return m_offset;
168  }
169  VectorType& offset(){
170  return m_offset;
171  }
172 
173  /// \brief Returns the activation function.
174  ActivationFunction const& activationFunction()const{
175  return m_activation;
176  }
177 
178  /// \brief Returns the activation function.
179  ActivationFunction& activationFunction(){
180  return m_activation;
181  }
182 
183  boost::shared_ptr<State> createState()const{
184  return boost::shared_ptr<State>(new typename ActivationFunction::State());
185  }
186 
187  using base_type::eval;
188 
189  /// Evaluate the model: output = matrix * input + offset
190  void eval(BatchInputType const& inputs, BatchOutputType& outputs)const{
191  outputs.resize(inputs.size1(),m_matrix.size1());
192  //we multiply with a set of row vectors from the left
193  noalias(outputs) = inputs % trans(m_matrix);
194  if (hasOffset()){
195  noalias(outputs)+=repeat(m_offset,inputs.size1());
196  }
197  m_activation.evalInPlace(outputs);
198  }
199 
200  void eval(InputType const& input, VectorType& output)const {
201  output.resize(m_matrix.size1());
202  //we multiply with a set of row vectors from the left
203  noalias(output) = m_matrix % input;
204  if (hasOffset()) {
205  noalias(output) += m_offset;
206  }
207  m_activation.evalInPlace(output);
208  }
209  /// Evaluate the model: output = matrix * input + offset
210  void eval(BatchInputType const& inputs, BatchOutputType& outputs, State& state)const{
211  outputs.resize(inputs.size1(),m_matrix.size1());
212  //we multiply with a set of row vectors from the left
213  noalias(outputs) = inputs % trans(m_matrix);
214  if (hasOffset()){
215  noalias(outputs)+=repeat(m_offset,inputs.size1());
216  }
217  m_activation.evalInPlace(outputs, state.toState<typename ActivationFunction::State>());
218  }
219 
220  ///\brief Calculates the first derivative w.r.t the parameters and summing them up over all patterns of the last computed batch
222  BatchInputType const& patterns,
223  BatchOutputType const& outputs,
224  BatchOutputType const& coefficients,
225  State const& state,
226  ParameterVectorType& gradient
227  )const{
228  SIZE_CHECK(coefficients.size2()==m_matrix.size1());
229  SIZE_CHECK(coefficients.size1()==patterns.size1());
230 
231  gradient.resize(numberOfParameters());
232  std::size_t numInputs = inputShape().numElements();
233  std::size_t numOutputs = outputShape().numElements();
234  gradient.clear();
235  std::size_t matrixParams = numInputs*numOutputs;
236 
237  auto weightGradient = blas::to_matrix(subrange(gradient,0,matrixParams), numOutputs,numInputs);
238 
239  BatchOutputType delta = coefficients;
240  m_activation.multiplyDerivative(outputs,delta, state.toState<typename ActivationFunction::State>());
241  //sum_i coefficients(output,i)*pattern(i))
242  noalias(weightGradient) = trans(delta) % patterns;
243 
244  if (hasOffset()){
245  noalias(subrange(gradient, matrixParams, matrixParams + numOutputs)) = sum_rows(delta);
246  }
247  }
248  ///\brief Calculates the first derivative w.r.t the inputs and summs them up over all patterns of the last computed batch
250  BatchInputType const & patterns,
251  BatchOutputType const& outputs,
252  BatchOutputType const & coefficients,
253  State const& state,
254  MatrixType& derivative
255  )const{
256  SIZE_CHECK(coefficients.size2() == m_matrix.size1());
257  SIZE_CHECK(coefficients.size1() == patterns.size1());
258 
259  //compute chain rule
260  BatchOutputType delta = coefficients;
261  m_activation.multiplyDerivative(outputs,delta, state.toState<typename ActivationFunction::State>());
262 
263  derivative.resize(patterns.size1(),patterns.size2());
264  noalias(derivative) = delta % m_matrix;
265  }
266 
268  BatchInputType const & patterns,
269  BatchOutputType const& outputs,
270  BatchOutputType const & coefficients,
271  State const& state,
272  ParameterVectorType& parameterDerivative,
273  MatrixType& inputDerivative
274  )const{
275  SIZE_CHECK(coefficients.size2()==m_matrix.size1());
276  SIZE_CHECK(coefficients.size1()==patterns.size1());
277 
278  std::size_t numInputs = inputShape().numElements();
279  std::size_t numOutputs = outputShape().numElements();
280 
281  //compute chain rule
282  BatchOutputType delta = coefficients;
283  m_activation.multiplyDerivative(outputs,delta, state.toState<typename ActivationFunction::State>());
284 
285  //compute input derivative
286  inputDerivative.resize(patterns.size1(),numInputs);
287  noalias(inputDerivative) = delta % m_matrix;
288 
289  //compute parameter derivative
290  parameterDerivative.resize(numberOfParameters());
291  parameterDerivative.clear();
292  std::size_t matrixParams = numInputs*numOutputs;
293  auto weightGradient = blas::to_matrix(subrange(parameterDerivative,0,matrixParams), numOutputs,numInputs);
294  auto offsetGradient = subrange(parameterDerivative,matrixParams,parameterDerivative.size());
295 
296  //sum_i coefficients(output,i)*pattern(i))
297  noalias(weightGradient) = trans(delta) % patterns;
298  if (hasOffset()){
299  noalias(offsetGradient) = sum_rows(delta);
300  }
301  }
302 
303  /// From ISerializable
304  void read(InArchive& archive){
305  archive >> m_matrix;
306  archive >> m_offset;
307  archive >> m_inputShape;
308  archive >> m_outputShape;
309  }
310  /// From ISerializable
311  void write(OutArchive& archive) const{
312  archive << m_matrix;
313  archive << m_offset;
314  archive << m_inputShape;
315  archive << m_outputShape;
316  }
317 };
318 
319 /*! \brief Basic linear classifier.
320  *
321  * The LinearClassifier class is a multi class classifier model
322  * suited for linear discriminant analysis. For c classes
323  * \f$ 0, \dots, c-1 \f$ the model computes
324  *
325  * \f$ \arg \max_i w_i^T x + b_i \f$
326  *
327  * Thus is it a linear model with arg max computation.
328  * The internal linear model can be queried using decisionFunction().
329  */
330 template<class VectorType = RealVector>
331 class LinearClassifier : public Classifier<LinearModel<VectorType> >
332 {
333 public:
335 
336  std::string name() const
337  { return "LinearClassifier"; }
338 };
339 
340 }
341 #endif