/*! * \file KernelTargetAlignment.h * * \brief basic error function * * \author T.Voss, T. Glasmachers, O.Krause * \date 2010-2011 * * \par Copyright (c) 1998-2007: * Institut für Neuroinformatik
* Ruhr-Universität Bochum
* D-44780 Bochum, Germany
* Phone: +49-234-32-25558
* Fax: +49-234-32-14209
* eMail: Shark-admin@neuroinformatik.ruhr-uni-bochum.de
* www: http://www.neuroinformatik.ruhr-uni-bochum.de
*
* * *

* This file is part of Shark. This library is free software; * you can redistribute it and/or modify it under the terms of the * GNU General Public License as published by the Free Software * Foundation; either version 3, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this library; if not, see . * */ #ifndef SHARK_OBJECTIVEFUNCTIONS_KERNELTARGETALIGNMENT_H #define SHARK_OBJECTIVEFUNCTIONS_KERNELTARGETALIGNMENT_H #include #include namespace shark{ /*! * \brief Implementation of the negative Kernel Target * Alignment (KTA) as proposed by Nello Cristianini. * This measure is extended for multi-class problems. * * \par * Kernel Target Alignment measures how well a kernel * fits a classification training set. It is invariant * under kernel rescaling. To turn it into an error * function (i.e., to make minimization meaningful) * the negative KTA is implemented. * * \par * The KTA has two important properties: It is * differentiable and independent of the actual * classifier. * * \par * The KTA as originally proposed by Nello Cristianini * is not properly arranged for unbalanced datasets. * * \par * KTA measures the similarity, in terms of the inner * product, of the kernel Gram matrix K with a perfect * Gram matrix D with entries 1 or -1 for examples of * coinciding or different label, respectively. Then * the kernel target alignment is given by * \f[ * \hat A = \frac{\langle D, K \rangle}{\sqrt{\langle D, D \rangle \langle K, K\rangle}} * \f] * We generalize the measure by using the value -1/(N-1) * for entries corresponding to different classes, which * gives a canonical and symmetric generalization. Here, * N denotes the number of classes. */ template class KernelTargetAlignment : public SupervisedObjectiveFunction { public: typedef SupervisedObjectiveFunction base_type; typedef typename base_type::SearchPointType SearchPointType; typedef typename base_type::ResultType ResultType; typedef typename base_type::FirstOrderDerivative FirstOrderDerivative; typedef typename base_type::SecondOrderDerivative SecondOrderDerivative; KernelTargetAlignment(AbstractKernelFunction* kernel, unsigned int numberOfClasses){ SHARK_CHECK(kernel != NULL, "[KernelTargetAlignment] kernel is not allowed to be NULL"); m_kernel = kernel; m_numberOfClasses = numberOfClasses; m_offdiag = -1.0 / (numberOfClasses - 1.0); m_2offdiag = 2.0 * m_offdiag; m_offdiag2 = m_offdiag * m_offdiag; m_2offdiag2 = 2.0 * m_offdiag2; this->m_name = "KernelTargetAlignment"; this->m_features|=base_type::HAS_VALUE; this->m_features|=base_type::CAN_PROPOSE_STARTING_POINT; if(m_kernel -> hasFirstParameterDerivative()) this->m_features|=base_type::HAS_FIRST_DERIVATIVE; } void configure( const PropertyTree & node ){ PropertyTree::const_assoc_iterator it = node.find("kernel"); if(it != node.not_found()){ m_kernel->configure(it->second); } } void setDataset(LabeledData const& dataset){ m_data = dataset; } void proposeStartingPoint(SearchPointType& startingPoint) const { startingPoint = m_kernel -> parameterVector(); } double eval(RealVector const& input) const{ m_kernel->setParameterVector(input); double DD = 0.0; double DK = 0.0; double KK = 0.0; for (std::size_t i=0; i < m_data.size(); i++) { for (std::size_t j=0; j < i; j++) { double k = m_kernel->eval(m_data.input(i), m_data.input(j)); if (m_data.label(i) == m_data.label(j)) { DD += 2.0; DK += 2.0 * k; } else { DD += m_2offdiag2; DK += m_2offdiag * k; } KK += 2.0 * k * k; } double k = m_kernel->eval(m_data.input(i), m_data.input(i)); DD += 1.0; DK += k; KK += k * k; } return ( - DK / std::sqrt(DD * KK)); } ResultType evalDerivative( const SearchPointType & input, FirstOrderDerivative & derivative ) const { size_t parameters = m_kernel->numberOfParameters(); derivative.m_gradient.resize(parameters); RealVector der1(parameters); RealVector der2(parameters); RealVector kernelDerivative(parameters); der1.clear(); der2.clear(); double DD = 0.0; double DK = 0.0; double KK = 0.0; for (std::size_t i=0; i < m_data.size(); i++) { for (std::size_t j=0; j < i; j++) { double k = (*m_kernel)(m_data.input(i), m_data.input(j),kernelDerivative); if (m_data.label(i) == m_data.label(j)) { DD += 2.0; DK += 2.0 * k; der1 += 2.0 * kernelDerivative; } else { DD += m_2offdiag2; DK += m_2offdiag * k; der1 += m_2offdiag * kernelDerivative; } KK += 2.0 * k * k; der2 += 2.0 * k * kernelDerivative; } double k = (*m_kernel)(m_data.input(i), m_data.input(i),kernelDerivative); DD += 1.0; DK += k; KK += k * k; der1 += kernelDerivative; der2 += kernelDerivative; } double denom = sqrt(DD * KK); double f1 = 1.0 / denom; double f2 = DK / (KK * denom); noalias(derivative.m_gradient) = f2 * der2 - f1 * der1; return (-DK / denom); } private: AbstractKernelFunction* m_kernel; LabeledData m_data; unsigned int m_numberOfClasses; double m_offdiag; double m_offdiag2; double m_2offdiag; double m_2offdiag2; }; } #endif