Classifier.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Model for conversion of real valued output to class labels
6  *
7  * \author T. Glasmachers, O.Krause
8  * \date 2017
9  *
10  *
11  * \par Copyright 1995-2017 Shark Development Team
12  *
13  * <BR><HR>
14  * This file is part of Shark.
15  * <http://shark-ml.org/>
16  *
17  * Shark is free software: you can redistribute it and/or modify
18  * it under the terms of the GNU Lesser General Public License as published
19  * by the Free Software Foundation, either version 3 of the License, or
20  * (at your option) any later version.
21  *
22  * Shark is distributed in the hope that it will be useful,
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25  * GNU Lesser General Public License for more details.
26  *
27  * You should have received a copy of the GNU Lesser General Public License
28  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
29  *
30  */
31 //===========================================================================
32 
33 #ifndef SHARK_MODELS_CLASSIFIER_H
34 #define SHARK_MODELS_CLASSIFIER_H
35 
37 namespace shark {
38 
39 ///
40 /// \brief Conversion of real-valued or vector valued outputs to class labels
41 ///
42 /// \par
43 /// The Classifier is a model converting the
44 /// real-valued vector output of an underlying decision function to a
45 /// class label 0, ..., d-1 by means of an arg-max operation.
46 /// The class returns the argument of the maximal
47 /// input component as its output. This convertson is adjusted to
48 /// interpret the output of a linear model, a neural network or a support vector
49 /// machine for multi-category classification.
50 ///
51 /// In the special case that d is 1, it is assumed that the model can be represented as
52 /// a 2 d vector with both components having the same value but opposite sign.
53 /// In consequence, a positive output of the model is interpreted as class 1, a negative as class 0.
54 ///
55 /// The underlying decision function is an arbitrary model. It should
56 /// be default constructable and it can be accessed using decisionFunction().
57 /// The parameters of the Classifier are the ones of the decision function.
58 ///
59 /// Optionally the model allows to set bias values which are added on the predicted
60 /// values of the decision function. Thus adding positive weights on a class makes it
61 /// more likely to be predicted. In the binary case with a single output, a positive weight
62 /// makes class one more likely and a negative weight class 0.
63 template<class Model>
64 class Classifier : public AbstractModel<typename Model::InputType, unsigned int>
65 {
66 private:
67  typedef typename Model::BatchOutputType ModelBatchOutputType;
68 public:
69  typedef typename Model::InputType InputType;
70  typedef unsigned int OutputType;
73 
76  : m_decisionFunction(decisionFunction){}
77 
78  std::string name() const
79  { return "Classifier<"+m_decisionFunction.name()+">"; }
80 
81  RealVector parameterVector() const{
82  return m_decisionFunction.parameterVector();
83  }
84 
85  void setParameterVector(RealVector const& newParameters){
86  m_decisionFunction.setParameterVector(newParameters);
87  }
88 
89  std::size_t numberOfParameters() const{
90  return m_decisionFunction.numberOfParameters();
91  }
92 
93  ///\brief Returns the expected shape of the input
94  Shape inputShape() const{
95  return m_decisionFunction.inputShape();
96  }
97  ///\brief Returns the shape of the output
98  ///
99  /// For the classifier, Shape is 0 as the output is a scalar
101  return Shape();
102  }
103 
104  RealVector const& bias()const{
105  return m_bias;
106  }
107  RealVector& bias(){
108  return m_bias;
109  }
110 
111  /// \brief Return the decision function
112  Model const& decisionFunction()const{
113  return m_decisionFunction;
114  }
115 
116  /// \brief Return the decision function
118  return m_decisionFunction;
119  }
120 
121  void eval(BatchInputType const& input, BatchOutputType& output)const{
122  SIZE_CHECK(m_bias.empty() || m_decisionFunction.outputShape().numElements() == m_bias.size());
123  ModelBatchOutputType modelResult;
124  m_decisionFunction.eval(input,modelResult);
125  std::size_t batchSize = modelResult.size1();
126  output.resize(batchSize);
127  if(modelResult.size2()== 1){
128  double bias = m_bias.empty()? 0.0 : m_bias(0);
129  for(std::size_t i = 0; i != batchSize; ++i){
130  output(i) = modelResult(i,0) + bias > 0.0;
131  }
132  }
133  else{
134  for(std::size_t i = 0; i != batchSize; ++i){
135  if(m_bias.empty())
136  output(i) = static_cast<unsigned int>(arg_max(row(modelResult,i)));
137  else
138  output(i) = static_cast<unsigned int>(arg_max(row(modelResult,i) + m_bias));
139  }
140  }
141  }
142  void eval(BatchInputType const& input, BatchOutputType& output, State& state)const{
143  eval(input,output);
144  }
145 
146  void eval(InputType const & pattern, OutputType& output)const{
147  SIZE_CHECK(m_bias.empty() || m_decisionFunction.outputShape().numElements() == m_bias.size());
148  typename Model::OutputType modelResult;
149  m_decisionFunction.eval(pattern,modelResult);
150  if(m_bias.empty()){
151  if(modelResult.size() == 1){
152  double bias = m_bias.empty()? 0.0 : m_bias(0);
153  output = modelResult(0) + bias > 0.0;
154  }
155  else{
156  if(m_bias.empty())
157  output = static_cast<unsigned int>(arg_max(modelResult));
158  else
159  output = static_cast<unsigned int>(arg_max(modelResult + m_bias));
160  }
161  }
162  }
163 
164  /// From ISerializable
165  void read(InArchive& archive){
166  archive >> m_decisionFunction;
167  archive >> m_bias;
168  }
169  /// From ISerializable
170  void write(OutArchive& archive) const{
171  archive << m_decisionFunction;
172  archive << m_bias;
173  }
174 
175 private:
176  Model m_decisionFunction;
177  RealVector m_bias;
178 };
179 
180 };
181 #endif