include/shark/Models/Classifier.h Source File

Go to the documentation of this file.
 //===========================================================================
 /*!
  * 
  *
  * \brief       Model for conversion of real valued output to class labels
  *
  * \author      T. Glasmachers, O.Krause
  * \date        2017
  *
  *
  * \par Copyright 1995-2017 Shark Development Team
  * 
  * <BR><HR>
  * This file is part of Shark.
  * <http://shark-ml.org/>
  * 
  * Shark is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published 
  * by the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
  * 
  * Shark is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  * 
  * You should have received a copy of the GNU Lesser General Public License
  * along with Shark.  If not, see <http://www.gnu.org/licenses/>.
  *
  */
 //===========================================================================
 
 #ifndef SHARK_MODELS_CLASSIFIER_H
 #define SHARK_MODELS_CLASSIFIER_H
 
 #include <shark/Models/AbstractModel.h>
 namespace shark {
 
 ///
 /// \brief Conversion of real-valued or vector valued outputs to class labels
 ///
 /// \par
 /// The Classifier is a model converting the
 /// real-valued vector output of an underlying decision function to a 
 /// class label 0, ..., d-1 by means of an arg-max operation.
 /// The class returns the argument of the maximal
 /// input component as its output. This convertson is adjusted to
 /// interpret the output of a linear model, a neural network or a support vector
 /// machine for multi-category classification.
 ///
 /// In the special case that d is 1, it is assumed that the model can be represented as
 /// a 2 d vector with both components having the same value but opposite sign. 
 /// In consequence, a positive output of the model is interpreted as class 1, a negative as class 0.
 ///
 /// The underlying decision function is an arbitrary model. It should
 /// be default constructable and it can be accessed using decisionFunction().
 /// The parameters of the Classifier are the ones of the decision function.
 ///
 /// Optionally the model allows to set bias values which are added on the predicted
 /// values of the decision function. Thus adding positive weights on a class makes it
 /// more likely to be predicted. In the binary case with a single output, a positive weight
 /// makes class one more likely and a negative weight class 0.
 template<class Model>
 class Classifier : public AbstractModel<typename Model::InputType, unsigned int>
 {
 private:
     typedef typename Model::BatchOutputType ModelBatchOutputType;
 public:
     typedef typename Model::InputType InputType;
     typedef unsigned int OutputType;
     typedef typename Batch<InputType>::type BatchInputType;
     typedef Batch<unsigned int>::type BatchOutputType;
 
     Classifier(){}
     Classifier(Model const& decisionFunction)
     : m_decisionFunction(decisionFunction){}
 
     std::string name() const
     { return "Classifier<"+m_decisionFunction.name()+">"; }
     
     RealVector parameterVector() const{
         return m_decisionFunction.parameterVector();
     }
 
     void setParameterVector(RealVector const& newParameters){
         m_decisionFunction.setParameterVector(newParameters);
     }
 
     std::size_t numberOfParameters() const{
         return m_decisionFunction.numberOfParameters();
     }
     
     ///\brief Returns the expected shape of the input
     Shape inputShape() const{
         return m_decisionFunction.inputShape();
     }
     ///\brief Returns the shape of the output
     ///
     /// For the classifier, Shape is 0 as the output is a scalar
     Shape outputShape() const{
         return Shape();
     }
     
     RealVector const& bias()const{
         return m_bias;
     }
     RealVector& bias(){
         return m_bias;
     }
     
     /// \brief Return the decision function
     Model const& decisionFunction()const{
         return m_decisionFunction;
     }
     
     /// \brief Return the decision function
     Model& decisionFunction(){
         return m_decisionFunction;
     }
     
     void eval(BatchInputType const& input, BatchOutputType& output)const{
         SIZE_CHECK(m_bias.empty() || m_decisionFunction.outputShape().numElements() == m_bias.size());
         ModelBatchOutputType modelResult;
         m_decisionFunction.eval(input,modelResult);
         std::size_t batchSize = modelResult.size1();
         output.resize(batchSize);
         if(modelResult.size2()== 1){
             double bias = m_bias.empty()? 0.0 : m_bias(0);
             for(std::size_t i = 0; i != batchSize; ++i){
                 output(i) = modelResult(i,0) + bias > 0.0;
             }
         }
         else{
             for(std::size_t i = 0; i != batchSize; ++i){
                 if(m_bias.empty())
                     output(i) = static_cast<unsigned int>(arg_max(row(modelResult,i)));
                 else
                     output(i) = static_cast<unsigned int>(arg_max(row(modelResult,i) + m_bias));
             }
         }
     }
     void eval(BatchInputType const& input, BatchOutputType& output, State& state)const{
         eval(input,output);
     }
     
     void eval(InputType const & pattern, OutputType& output)const{
         SIZE_CHECK(m_bias.empty() || m_decisionFunction.outputShape().numElements() == m_bias.size());
         typename Model::OutputType modelResult;
         m_decisionFunction.eval(pattern,modelResult);
         if(m_bias.empty()){
             if(modelResult.size() == 1){
                 double bias = m_bias.empty()? 0.0 : m_bias(0);
                 output = modelResult(0) + bias > 0.0;
             }
             else{
                 if(m_bias.empty())
                     output = static_cast<unsigned int>(arg_max(modelResult));
                 else
                     output = static_cast<unsigned int>(arg_max(modelResult + m_bias));
             }
         }
     }
     
     /// From ISerializable
     void read(InArchive& archive){
         archive >> m_decisionFunction;
         archive >> m_bias;
     }
     /// From ISerializable
     void write(OutArchive& archive) const{
         archive << m_decisionFunction;
         archive << m_bias;
     }
     
 private:
     Model m_decisionFunction;
     RealVector m_bias;
 };
 
 };
 #endif