32 #ifndef SHARK_OBJECTIVEFUNCTIONS_KERNELTARGETALIGNMENT_H 33 #define SHARK_OBJECTIVEFUNCTIONS_KERNELTARGETALIGNMENT_H 102 template<
class InputType = RealVector,
class LabelType =
unsigned int>
112 bool centering =
true 121 if(mep_kernel -> hasFirstParameterDerivative())
126 m_centering = centering;
128 setupY(dataset.
labels(), centering);
133 {
return "KernelTargetAlignment"; }
137 return mep_kernel -> parameterVector();
148 double eval(RealVector
const& input)
const{
151 return -evaluateKernelMatrix().error;
179 KernelMatrixResults results = evaluateKernelMatrix();
182 derivative.resize(parameters);
185 std::size_t startX = 0;
186 for(
int j = 0; j != i; ++j){
189 RealVector threadDerivative(parameters,0.0);
190 RealVector blockDerivative;
191 boost::shared_ptr<State> state = mep_kernel->
createState();
194 std::size_t startY = 0;
195 for(
int j = 0; j <= i; ++j){
196 mep_kernel->
eval(m_data.
batch(i).input,m_data.
batch(j).input,blockK,*state);
199 generateDerivativeWeightBlock(i,j,startX,startY,blockK,results),
203 noalias(threadDerivative) += blockDerivative;
207 noalias(derivative) += threadDerivative;
211 derivative /= m_elements;
212 return -results.error;
218 RealVector m_columnMeanY;
220 std::size_t m_numberOfClasses;
221 std::size_t m_elements;
224 struct KernelMatrixResults{
235 std::vector<std::size_t> classCount =
classSizes(labels);
236 m_numberOfClasses = classCount.size();
237 RealVector classMean(m_numberOfClasses);
238 double dm1 = m_numberOfClasses-1.0;
240 for(std::size_t i = 0; i != m_numberOfClasses; ++i){
241 classMean(i) = classCount[i]-(m_elements-classCount[i])/dm1;
242 m_meanY += classCount[i] * classMean(i);
244 classMean /= m_elements;
245 m_meanY /=
sqr(
double(m_elements));
246 m_columnMeanY.resize(m_elements);
247 for(std::size_t i = 0; i != m_elements; ++i){
248 m_columnMeanY(i) = classMean(labels.
element(i));
252 m_columnMeanY.clear();
257 RealVector meanLabel =
mean(labels);
258 m_columnMeanY.resize(m_elements);
259 for(std::size_t i = 0; i != m_elements; ++i){
260 m_columnMeanY(i) = inner_prod(labels.
element(i),meanLabel);
262 m_meanY=inner_prod(meanLabel,meanLabel);
265 m_columnMeanY.clear();
268 void computeBlockY(UIntVector
const& labelsi,UIntVector
const& labelsj, RealMatrix& blockY)
const{
269 std::size_t blockSize1 = labelsi.size();
270 std::size_t blockSize2 = labelsj.size();
271 double dm1 = m_numberOfClasses-1.0;
272 for(std::size_t k = 0; k != blockSize1; ++k){
273 for(std::size_t l = 0; l != blockSize2; ++l){
274 if( labelsi(k) == labelsj(l))
277 blockY(k,l) = -1.0/dm1;
282 void computeBlockY(RealMatrix
const& labelsi,RealMatrix
const& labelsj, RealMatrix& blockY)
const{
283 noalias(blockY) = labelsi % trans(labelsj);
287 double updateYK(UIntVector
const& labelsi,UIntVector
const& labelsj, RealMatrix
const& block)
const{
288 std::size_t blockSize1 = labelsi.size();
289 std::size_t blockSize2 = labelsj.size();
292 double dm1 = m_numberOfClasses-1.0;
293 for(std::size_t k = 0; k != blockSize1; ++k){
294 for(std::size_t l = 0; l != blockSize2; ++l){
295 if(labelsi(k) == labelsj(l))
296 result += block(k,l);
298 result -= block(k,l)/dm1;
305 double updateYK(RealMatrix
const& labelsi,RealMatrix
const& labelsj, RealMatrix
const& block)
const{
306 RealMatrix Y(labelsi.size1(), labelsj.size1());
307 computeBlockY(labelsi,labelsj,Y);
308 return sum(Y * block);
313 RealMatrix generateDerivativeWeightBlock(
314 std::size_t i, std::size_t j,
315 std::size_t start1, std::size_t start2,
316 RealMatrix
const& blockK,
317 KernelMatrixResults
const& matrixStatistics
322 double KcKc = matrixStatistics.KcKc;
323 double YcKc = matrixStatistics.YcKc;
324 double meanK = matrixStatistics.meanK;
325 RealMatrix blockW(blockSize1,blockSize2);
328 computeBlockY(m_data.
batch(i).label,m_data.
batch(j).label,blockW);
335 blockW-=repeat(subrange(KcKc*m_columnMeanY - YcKc*matrixStatistics.k,start2,start2+blockSize2),blockSize1);
336 blockW-=trans(repeat(subrange(KcKc*m_columnMeanY - YcKc*matrixStatistics.k,start1,start1+blockSize1),blockSize2));
338 blockW+= KcKc*m_meanY-YcKc*meanK;
339 blockW /= KcKc*std::sqrt(KcKc);
355 KernelMatrixResults evaluateKernelMatrix()
const{
364 RealVector k(m_elements,0.0);
366 std::size_t startRow = 0;
367 for(
int j = 0; j != i; ++j){
373 RealVector threadk(m_elements,0.0);
374 std::size_t startColumn = 0;
375 for(
int j = 0; j <= i; ++j){
377 RealMatrix blockK = (*mep_kernel)(m_data.
batch(i).input,m_data.
batch(j).input);
379 threadKK += frobenius_prod(blockK,blockK);
380 subrange(threadk,startColumn,startColumn+columnSize)+=sum_rows(blockK);
381 threadYK += updateYK(m_data.
batch(i).label,m_data.
batch(j).label,blockK);
384 threadKK += 2.0 * frobenius_prod(blockK,blockK);
385 subrange(threadk,startColumn,startColumn+columnSize)+=sum_rows(blockK);
386 subrange(threadk,startRow,startRow+rowSize)+=sum_columns(blockK);
387 threadYK += 2.0 * updateYK(m_data.
batch(i).label,m_data.
batch(j).label,blockK);
389 startColumn+=columnSize;
394 noalias(k) +=threadk;
398 double n = (double)m_elements;
400 double meanK = sum(k)/n;
406 double YcKc = YK-2.0*n*inner_prod(k,m_columnMeanY)+n2*m_meanY*meanK;
407 double KcKc = KK - 2.0*n*inner_prod(k,k)+n2*
sqr(meanK);
409 KernelMatrixResults results;
413 results.meanK = meanK;
414 results.error = YcKc/std::sqrt(KcKc)/n;